From 727b5761549f342c6ccfc0685319aa29a9a460f6 Mon Sep 17 00:00:00 2001
From: Ward from fusion-voyager-3 <hotrod.master@hotmail.com>
Date: Wed, 24 Jul 2024 05:09:14 +0300
Subject: [PATCH] initial commit

---
 .github/ build-canary-v3                      |     1 +
 .github/build-nest-v3                         |     1 +
 .github/release-canary-v3                     |     1 +
 .github/release-nest-v3                       |     1 +
 .github/workflows/build-canaryv3.yml          |    34 +
 .github/workflows/build-nestv3.yml            |    34 +
 .github/workflows/build.yml                   |    24 -
 .github/workflows/release-canaryv3.yml        |    37 +
 .github/workflows/release-nestv3.yml          |    37 +
 .github/workflows/release.yml                 |    37 -
 VERSION                                       |     2 +-
 main.sh                                       |     7 -
 mainv3.sh                                     |    12 +
 output/key.gpg                                |    30 -
 ...add-support-for-2024-ROG-Mini-.patch.patch |   151 -
 ...s-wmi-add-support-for-Vivobook-GPU-M.patch |   100 -
 ...s-wmi-add-support-variant-of-TUF-RGB.patch |    74 -
 ...asus-wmi-support-toggling-POST-sound.patch |   139 -
 ...s-wmi-store-a-min-default-for-ppt-op.patch |   342 -
 patches/cachyos/0001-bore-cachy.patch         |   929 -
 patches/cachyos/0001-cachyos-base-all.patch   | 53760 ----------------
 patches/cachyos/0003-nvidia.patch             |   761 -
 ...tom-USB-pollrate-for-specific-device.patch |   258 -
 ...-REBAR-size-quirk-for-Sapphire-RX-56.patch |    34 -
 ...drop-redundant-pci_enable_pcie_error.patch |   108 -
 ....ppfeaturemask-0xffffffff-as-default.patch |    25 -
 .../0001-acpi-proc-idle-skip-dummy-wait.patch |   125 -
 patches/nobara/0001-add-acpi_call.patch       |   506 -
 patches/nobara/0001-amd-hdr.patch             |  2042 -
 ...disable-async-flipping-on-specific-d.patch |    48 -
 .../0001-hid-asus-nero-patches-rogue.patch    |   972 -
 ...nel-parameter-to-disable-async-page-.patch |    54 -
 patches/nobara/OpenRGB.patch                  |   703 -
 patches/nobara/amdgpu-si-cik-default.patch    |    70 -
 patches/nobara/lenovo-legion-laptop.patch     |  6143 --
 patches/nobara/linux-surface.patch            |  9117 ---
 ...isable-powersave-features-by-default.patch |    42 -
 .../nobara/set-ps4-bt-poll-rate-1000hz.patch  |    27 -
 patches/nobara/steam-deck.patch               |  2497 -
 patches/nobara/uinput.patch                   |   133 -
 patches/series                                |    15 -
 release.sh                                    |     2 +-
 {scripts => scripts-v3}/build.sh              |     0
 config => scripts-v3/config                   |     0
 scripts-v3/config.sh                          |    49 +
 {scripts => scripts-v3}/output.sh             |     0
 scripts-v3/patch.sh                           |     8 +
 {scripts => scripts-v3}/source.sh             |     0
 scripts/config.sh                             |    51 -
 scripts/patch.sh                              |     5 -
 50 files changed, 217 insertions(+), 79331 deletions(-)
 create mode 100644 .github/ build-canary-v3
 create mode 100644 .github/build-nest-v3
 create mode 100644 .github/release-canary-v3
 create mode 100644 .github/release-nest-v3
 create mode 100644 .github/workflows/build-canaryv3.yml
 create mode 100644 .github/workflows/build-nestv3.yml
 delete mode 100644 .github/workflows/build.yml
 create mode 100644 .github/workflows/release-canaryv3.yml
 create mode 100644 .github/workflows/release-nestv3.yml
 delete mode 100644 .github/workflows/release.yml
 delete mode 100755 main.sh
 create mode 100755 mainv3.sh
 delete mode 100644 output/key.gpg
 delete mode 100644 patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch
 delete mode 100644 patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch
 delete mode 100644 patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch
 delete mode 100644 patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch
 delete mode 100644 patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch
 delete mode 100644 patches/cachyos/0001-bore-cachy.patch
 delete mode 100644 patches/cachyos/0001-cachyos-base-all.patch
 delete mode 100644 patches/cachyos/0003-nvidia.patch
 delete mode 100644 patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
 delete mode 100644 patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch
 delete mode 100644 patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch
 delete mode 100644 patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch
 delete mode 100644 patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch
 delete mode 100644 patches/nobara/0001-add-acpi_call.patch
 delete mode 100644 patches/nobara/0001-amd-hdr.patch
 delete mode 100644 patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch
 delete mode 100644 patches/nobara/0001-hid-asus-nero-patches-rogue.patch
 delete mode 100644 patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch
 delete mode 100644 patches/nobara/OpenRGB.patch
 delete mode 100644 patches/nobara/amdgpu-si-cik-default.patch
 delete mode 100644 patches/nobara/lenovo-legion-laptop.patch
 delete mode 100644 patches/nobara/linux-surface.patch
 delete mode 100644 patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch
 delete mode 100644 patches/nobara/set-ps4-bt-poll-rate-1000hz.patch
 delete mode 100644 patches/nobara/steam-deck.patch
 delete mode 100644 patches/nobara/uinput.patch
 delete mode 100644 patches/series
 rename {scripts => scripts-v3}/build.sh (100%)
 rename config => scripts-v3/config (100%)
 create mode 100755 scripts-v3/config.sh
 rename {scripts => scripts-v3}/output.sh (100%)
 create mode 100755 scripts-v3/patch.sh
 rename {scripts => scripts-v3}/source.sh (100%)
 delete mode 100755 scripts/config.sh
 delete mode 100755 scripts/patch.sh

diff --git a/.github/ build-canary-v3 b/.github/ build-canary-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/ build-canary-v3	
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/build-nest-v3 b/.github/build-nest-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/build-nest-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/release-canary-v3 b/.github/release-canary-v3
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/.github/release-canary-v3
@@ -0,0 +1 @@
+4
diff --git a/.github/release-nest-v3 b/.github/release-nest-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/release-nest-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/workflows/build-canaryv3.yml b/.github/workflows/build-canaryv3.yml
new file mode 100644
index 0000000..72adde7
--- /dev/null
+++ b/.github/workflows/build-canaryv3.yml
@@ -0,0 +1,34 @@
+name: PikaOS Package Build Only (Canary) (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/build-canary-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ vars.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ vars.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./mainv3.sh
diff --git a/.github/workflows/build-nestv3.yml b/.github/workflows/build-nestv3.yml
new file mode 100644
index 0000000..3098818
--- /dev/null
+++ b/.github/workflows/build-nestv3.yml
@@ -0,0 +1,34 @@
+name: PikaOS Package Build Only (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/build-nest-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:nestv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ vars.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ vars.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./mainv3.sh
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index cbe6947..0000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: PikaOS Kernel Build Only
-
-on:
-  workflow_dispatch
-
-jobs:
-  build:
-    runs-on: self-hosted
-    container:
-      image: ghcr.io/pikaos-linux/pikaos-builder:canary
-      volumes:
-        - /proc:/proc
-      options: --privileged -it
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Build Kernel
-      run: ./main.sh
-      
-    - uses: actions/upload-artifact@v3
-      with:
-        name: PikaOS Kernel
-        path: output/
diff --git a/.github/workflows/release-canaryv3.yml b/.github/workflows/release-canaryv3.yml
new file mode 100644
index 0000000..3e837ff
--- /dev/null
+++ b/.github/workflows/release-canaryv3.yml
@@ -0,0 +1,37 @@
+name: PikaOS Package Build & Release (Canary) (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/release-canary-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ vars.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ vars.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./mainv3.sh
+
+    - name: Release Package
+      run: ./release.sh
diff --git a/.github/workflows/release-nestv3.yml b/.github/workflows/release-nestv3.yml
new file mode 100644
index 0000000..bb9261e
--- /dev/null
+++ b/.github/workflows/release-nestv3.yml
@@ -0,0 +1,37 @@
+name: PikaOS Package Build & Release (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/release-nest-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:nestv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ vars.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ vars.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./mainv3.sh
+
+    - name: Release Package
+      run: ./release.sh
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
deleted file mode 100644
index 9b21726..0000000
--- a/.github/workflows/release.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: PikaOS Kernel Build And Release
-
-on:
-  workflow_dispatch
-
-jobs:
-  build:
-    runs-on: self-hosted
-    container:
-      image: ghcr.io/pikaos-linux/pikaos-builder:canary
-      volumes:
-        - /proc:/proc
-      options: --privileged -it
-
-    steps:
-    - uses: actions/checkout@v3
-    
-    - name: Import GPG key
-      id: import_gpg
-      uses: crazy-max/ghaction-import-gpg@v5
-      with:
-        gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
-        passphrase: ${{ secrets.PASSPHRASE }}
-
-    - name: Install SSH key
-      uses: shimataro/ssh-key-action@v2
-      with:
-        key: ${{ secrets.SSH_KEY }}
-        name: id_rsa
-        known_hosts: ${{ secrets.KNOWN_HOSTS }}
-        if_key_exists: replace
-    
-    - name: Build Kernel
-      run: ./main.sh
-
-    - name: Release Kernel
-      run: ./release.sh
diff --git a/VERSION b/VERSION
index 5a33ecb..ad3cafd 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-6.10
+#PUT LINUX UPSTREAM VERSION HERE#
diff --git a/main.sh b/main.sh
deleted file mode 100755
index baba420..0000000
--- a/main.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-. ./scripts/source.sh
-. ../scripts/patch.sh
-. ../scripts/config.sh
-. ../scripts/build.sh
-. ../scripts/output.sh
diff --git a/mainv3.sh b/mainv3.sh
new file mode 100755
index 0000000..9f1654e
--- /dev/null
+++ b/mainv3.sh
@@ -0,0 +1,12 @@
+#! /bin/bash
+
+set -e
+
+# Move the debs to output
+mkdir -p ./output
+
+. ./scripts-v3/source.sh
+. ../scripts-v3/patch.sh
+. ../scripts-v3/config.sh
+. ../scripts-v3/build.sh
+. ../scripts-v3/output.sh
diff --git a/output/key.gpg b/output/key.gpg
deleted file mode 100644
index 9b5a79f..0000000
--- a/output/key.gpg
+++ /dev/null
@@ -1,30 +0,0 @@
------BEGIN PGP PUBLIC KEY BLOCK-----
-
-mQENBGPJoigBCADZ8tDzkO2LlWIzXZLLyRLIaRnaNHG6P9xx0ABSFsqU+X+p9qDS
-eQW6SmeCN+PauqAHlzrJ7p3XZi07E+h69PEk5R5n7qhVECW35Y1sB9EfC2nqVRxd
-RcWtwQsipEHQmjvWIsD4hR5uhq62p7grSkQxv13SGLqyJkKIpkic2vZEgqubfZd4
-KLPFvaQZar6QWa3urfYnUZzc1TNkEYxghr/dQuCFSfYPM+yHT70MXrlPOgfslGgL
-YtoN1YauF04wzAg1RFfrWX2AdHE792fVHrkHRsvQg1Pvw4KjPnM6jX2V8W8n7C++
-yxpiMUU2h9FqBWfHrqNLWtKdn6+lgHUq2Oj3ABEBAAG0IWZlcnJlbyA8aGFyZGVy
-dGhhbmZpcmVAZ21haWwuY29tPokBTgQTAQoAOBYhBIvETfAmQkhf8fPMBKt4xg37
-WBYDBQJjyaIoAhsDBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAAAoJEKt4xg37WBYD
-4/oH/2LRW4FwLHCsWeJfRx5Z7BwKrGqWIF2VujkvEjlFOGYO7aN5HxeX/QKeN+Wy
-901hv4CO7T7aSye0qjaYz0I6ZUmr9CaINdXTH7fok3CXQYBfluaLiyxMPSm+Fe5o
-vfiUiSMZ488uaUkFSww/TEP8wi5H02yqGJcx3yB54OTsVb8eUHLPXno0T4tooWvX
-EOMUKkpj3tEylJoqL5d2iz2ZrkMdX9tVXOkKY3iJD2El0TPITrTIuRuurqzc4CWU
-laV7bmZ1Mq5r21S7ISOhhzvEMwsiWylIFXmXNPvbU7DC43uT3+nKhBca8VESzvmu
-r7zC6CcQAR5IVHMjd8weFfrnGXm5AQ0EY8miKAEIALnnC+U4gx0m0yLEVOHBoccb
-T7CvhmBYer2shxe5o7zUZ5V4y1iJdzSSJksbQkZH4+JDwi7Hp3/lqI2EsxQ9TR+A
-OdRvETfz88aK/e2vJ0j7Bt3Dr0u0mgoo9kSx6rLq1oH9Nha9ReOljmEfDtuINR86
-QGEd8PyvNDcUap+6QQa6/RBEDiH1zYBYtxv4rbuciKsh+e6r6C8TJb43nKr3YBGu
-/GE1aDlGaKvFgUOZmaapgoQVdpXcg7ZtTpI8sNKdnLVEChIKk35n52XfQDZPVvAt
-bsUIr77B4hi+GsjGli7ihr+JJEiHwOyCMZvV95ZWq2ThrXxRWA8mHqCLhz7oTV8A
-EQEAAYkBNgQYAQoAIBYhBIvETfAmQkhf8fPMBKt4xg37WBYDBQJjyaIoAhsMAAoJ
-EKt4xg37WBYDdwAIAI3yJwOa6P6wz3ddLt/4FTlCSnlJ8C904RDwtJEO/C/y9qZv
-yE0qitUi7mntzYE6G7SES3Zn6b9HhdTS9kQv6VUg75TjD/WGPVju5cB11mte95Z9
-6iW5u65kxpawxiTUhaO+O4RO6fZ29rZyCQDfa7ESudkVE/yktAA5umnAbGpgxGa6
-8egCGiZ0LKUqcHxMAsoUUhlOTk3LR4yS6nKE1Q8Dr6E7NYlrWcoGDSQzKvXLqf8e
-9eJLGckePwHDzhgO9LKGW3meTV6ldLehTsxm/ycHqXL7/wYjYy6ZXj/5Px3CGLPg
-DH9mVj8ERsz096eQA+53gmcTsNtq/FLWS2MhtCc=
-=+26V
------END PGP PUBLIC KEY BLOCK-----
\ No newline at end of file
diff --git a/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch b/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch
deleted file mode 100644
index fff1b38..0000000
--- a/patches/asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch.patch
+++ /dev/null
@@ -1,151 +0,0 @@
-From 55426abb60d99efed912d8309498c0c365e8dcec Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Sun, 10 Mar 2024 15:14:37 +1300
-Subject: [PATCH 1/5] platform/x86: asus-wmi: add support for 2024 ROG Mini-LED
-
-Support the 2024 mini-led backlight and adjust the related functions
-to select the relevant dev-id. Also add `available_mini_led_mode` to the
-platform sysfs since the available mini-led levels can be different.
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- .../ABI/testing/sysfs-platform-asus-wmi       |  8 ++++
- drivers/platform/x86/asus-wmi.c               | 48 ++++++++++++++++---
- include/linux/platform_data/x86/asus-wmi.h    |  1 +
- 3 files changed, 51 insertions(+), 6 deletions(-)
-
-diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
-index 8a7e25bde085..e32b4f0ae15f 100644
---- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
-+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
-@@ -126,6 +126,14 @@ Description:
- 		Change the mini-LED mode:
- 			* 0 - Single-zone,
- 			* 1 - Multi-zone
-+			* 2 - Multi-zone strong (available on newer generation mini-led)
-+
-+What:		/sys/devices/platform/<platform>/avilable_mini_led_mode
-+Date:		Jun 2023
-+KernelVersion:	6.9
-+Contact:	"Luke Jones" <luke@ljones.dev>
-+Description:
-+		List the available mini-led modes.
- 
- What:		/sys/devices/platform/<platform>/ppt_pl1_spl
- Date:		Jun 2023
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index 18be35fdb381..a56152ccfbe7 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -297,6 +297,7 @@ struct asus_wmi {
- 
- 	bool panel_overdrive_available;
- 	bool mini_led_mode_available;
-+	u32 mini_led_dev_id;
- 
- 	struct hotplug_slot hotplug_slot;
- 	struct mutex hotplug_lock;
-@@ -2109,10 +2110,17 @@ static ssize_t mini_led_mode_show(struct device *dev,
- 	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result;
- 
--	result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MINI_LED_MODE);
--	if (result < 0)
--		return result;
-+	result = asus_wmi_get_devstate_simple(asus, asus->mini_led_dev_id);
- 
-+	// Remap the mode values to match previous generation mini-led including
-+	// if errored -19 since some of these bios return a bad result if set to "2"
-+	// which is mini-led off
-+	if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2) {
-+		if (result >= 0 || result == -19)
-+			result = result == 1 ? 2 : result == 0 ? 1 : 0;
-+	} else if (result < 0) {
-+		return result;
-+	}
- 	return sysfs_emit(buf, "%d\n", result);
- }
- 
-@@ -2129,10 +2137,15 @@ static ssize_t mini_led_mode_store(struct device *dev,
- 	if (result)
- 		return result;
- 
--	if (mode > 1)
-+	if (mode > 1 && asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE)
- 		return -EINVAL;
-+	if (mode > 2 && asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2)
-+		return -EINVAL;
-+	// Remap the mode values to match previous generation mini-led
-+	if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2)
-+		mode = mode == 2 ? 1 : mode == 0 ? 2 : 0;
- 
--	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MINI_LED_MODE, mode, &result);
-+	err = asus_wmi_set_devstate(asus->mini_led_dev_id, mode, &result);
- 
- 	if (err) {
- 		pr_warn("Failed to set mini-LED: %d\n", err);
-@@ -2150,6 +2163,21 @@ static ssize_t mini_led_mode_store(struct device *dev,
- }
- static DEVICE_ATTR_RW(mini_led_mode);
- 
-+static ssize_t available_mini_led_mode_show(struct device *dev,
-+				  struct device_attribute *attr, char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE)
-+		return sysfs_emit(buf, "0 1\n");
-+	if (asus->mini_led_dev_id == ASUS_WMI_DEVID_MINI_LED_MODE2)
-+		return sysfs_emit(buf, "0 1 2\n");
-+
-+	return sysfs_emit(buf, "0\n");
-+}
-+
-+static DEVICE_ATTR_RO(available_mini_led_mode);
-+
- /* Quirks *********************************************************************/
- 
- static void asus_wmi_set_xusb2pr(struct asus_wmi *asus)
-@@ -4174,6 +4202,7 @@ static struct attribute *platform_attributes[] = {
- 	&dev_attr_nv_temp_target.attr,
- 	&dev_attr_panel_od.attr,
- 	&dev_attr_mini_led_mode.attr,
-+	&dev_attr_available_mini_led_mode.attr,
- 	NULL
- };
- 
-@@ -4496,10 +4525,17 @@ static int asus_wmi_add(struct platform_device *pdev)
- 	asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST);
- 	asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET);
- 	asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD);
--	asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE);
- 	asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE)
- 						&& dmi_match(DMI_BOARD_NAME, "RC71L");
- 
-+	if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) {
-+		asus->mini_led_mode_available = true;
-+		asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE;
-+	} else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE2)) {
-+		asus->mini_led_mode_available = true;
-+		asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE2;
-+	}
-+
- 	err = fan_boost_mode_check_present(asus);
- 	if (err)
- 		goto fail_fan_boost_mode;
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index ab1c7deff118..9cadce10ad9a 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -71,6 +71,7 @@
- #define ASUS_WMI_DEVID_LID_FLIP		0x00060062
- #define ASUS_WMI_DEVID_LID_FLIP_ROG	0x00060077
- #define ASUS_WMI_DEVID_MINI_LED_MODE	0x0005001E
-+#define ASUS_WMI_DEVID_MINI_LED_MODE2	0x0005002E
- 
- /* Storage */
- #define ASUS_WMI_DEVID_CARDREADER	0x00080013
--- 
-2.44.0
-
-
diff --git a/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch b/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch
deleted file mode 100644
index dbd8ee9..0000000
--- a/patches/asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch
+++ /dev/null
@@ -1,100 +0,0 @@
-From 06d5a9b83548d99b70764166d723489cc8336b1d Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Sun, 10 Mar 2024 17:10:05 +1300
-Subject: [PATCH 2/5] platform/x86: asus-wmi: add support for Vivobook GPU MUX
-
-Adjust existing MUX support to select whichever MUX support is available
-so that ASUS Vivobook MUX can also be used if detected.
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- drivers/platform/x86/asus-wmi.c            | 18 +++++++++++++-----
- include/linux/platform_data/x86/asus-wmi.h |  1 +
- 2 files changed, 14 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index a56152ccfbe7..b9a2fb8007c0 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -268,6 +268,7 @@ struct asus_wmi {
- 	bool egpu_connect_available;
- 	bool dgpu_disable_available;
- 	bool gpu_mux_mode_available;
-+	u32 gpu_mux_dev;
- 
- 	/* Tunables provided by ASUS for gaming laptops */
- 	bool ppt_pl2_sppt_available;
-@@ -682,7 +683,7 @@ static ssize_t dgpu_disable_store(struct device *dev,
- 		return -EINVAL;
- 
- 	if (asus->gpu_mux_mode_available) {
--		result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX);
-+		result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev);
- 		if (result < 0)
- 			/* An error here may signal greater failure of GPU handling */
- 			return result;
-@@ -748,7 +749,7 @@ static ssize_t egpu_enable_store(struct device *dev,
- 	}
- 
- 	if (asus->gpu_mux_mode_available) {
--		result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX);
-+		result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev);
- 		if (result < 0) {
- 			/* An error here may signal greater failure of GPU handling */
- 			pr_warn("Failed to get gpu mux status: %d\n", result);
-@@ -801,7 +802,7 @@ static ssize_t gpu_mux_mode_show(struct device *dev,
- 	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result;
- 
--	result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPU_MUX);
-+	result = asus_wmi_get_devstate_simple(asus, asus->gpu_mux_dev);
- 	if (result < 0)
- 		return result;
- 
-@@ -847,7 +848,7 @@ static ssize_t gpu_mux_mode_store(struct device *dev,
- 		}
- 	}
- 
--	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_GPU_MUX, optimus, &result);
-+	err = asus_wmi_set_devstate(asus->gpu_mux_dev, optimus, &result);
- 	if (err) {
- 		dev_err(dev, "Failed to set GPU MUX mode: %d\n", err);
- 		return err;
-@@ -4514,7 +4515,6 @@ static int asus_wmi_add(struct platform_device *pdev)
- 	asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU);
- 	asus->egpu_connect_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU_CONNECTED);
- 	asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU);
--	asus->gpu_mux_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX);
- 	asus->kbd_rgb_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE);
- 	asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE);
- 	asus->ppt_pl2_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PL2_SPPT);
-@@ -4536,6 +4536,14 @@ static int asus_wmi_add(struct platform_device *pdev)
- 		asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE2;
- 	}
- 
-+	if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX)) {
-+		asus->gpu_mux_mode_available = true;
-+		asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX;
-+	} else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_GPU_MUX_VIVO)) {
-+		asus->gpu_mux_mode_available = true;
-+		asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX_VIVO;
-+	}
-+
- 	err = fan_boost_mode_check_present(asus);
- 	if (err)
- 		goto fail_fan_boost_mode;
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index 9cadce10ad9a..b48b024dd844 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -128,6 +128,7 @@
- 
- /* gpu mux switch, 0 = dGPU, 1 = Optimus */
- #define ASUS_WMI_DEVID_GPU_MUX		0x00090016
-+#define ASUS_WMI_DEVID_GPU_MUX_VIVO	0x00090026
- 
- /* TUF laptop RGB modes/colours */
- #define ASUS_WMI_DEVID_TUF_RGB_MODE	0x00100056
--- 
-2.44.0
-
diff --git a/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch b/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch
deleted file mode 100644
index 1fd2ce7..0000000
--- a/patches/asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 9b038d6db81b457738cf65e43f401ccb8bf505e6 Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Sun, 10 Mar 2024 17:20:02 +1300
-Subject: [PATCH 3/5] platform/x86: asus-wmi: add support variant of TUF RGB
-
-Adds support for a second TUF RGB wmi call that some versions of the TUF
-laptop come with. Also adjusts existing support to select whichever is
-available.
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- drivers/platform/x86/asus-wmi.c            | 12 +++++++++++-
- include/linux/platform_data/x86/asus-wmi.h |  1 +
- 2 files changed, 12 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index b9a2fb8007c0..e1100726de53 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -280,6 +280,7 @@ struct asus_wmi {
- 	bool nv_temp_tgt_available;
- 
- 	bool kbd_rgb_mode_available;
-+	u32 kbd_rgb_dev;
- 	bool kbd_rgb_state_available;
- 
- 	bool throttle_thermal_policy_available;
-@@ -870,6 +871,7 @@ static ssize_t kbd_rgb_mode_store(struct device *dev,
- 				 struct device_attribute *attr,
- 				 const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	u32 cmd, mode, r, g, b, speed;
- 	int err;
- 
-@@ -906,7 +908,7 @@ static ssize_t kbd_rgb_mode_store(struct device *dev,
- 		speed = 0xeb;
- 	}
- 
--	err = asus_wmi_evaluate_method3(ASUS_WMI_METHODID_DEVS, ASUS_WMI_DEVID_TUF_RGB_MODE,
-+	err = asus_wmi_evaluate_method3(ASUS_WMI_METHODID_DEVS, asus->kbd_rgb_dev,
- 			cmd | (mode << 8) | (r << 16) | (g << 24), b | (speed << 8), NULL);
- 	if (err)
- 		return err;
-@@ -4544,6 +4546,14 @@ static int asus_wmi_add(struct platform_device *pdev)
- 		asus->gpu_mux_dev = ASUS_WMI_DEVID_GPU_MUX_VIVO;
- 	}
- 
-+	if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE)) {
-+		asus->kbd_rgb_mode_available = true;
-+		asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE;
-+	} else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_MODE2)) {
-+		asus->kbd_rgb_mode_available = true;
-+		asus->kbd_rgb_dev = ASUS_WMI_DEVID_TUF_RGB_MODE2;
-+	}
-+
- 	err = fan_boost_mode_check_present(asus);
- 	if (err)
- 		goto fail_fan_boost_mode;
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index b48b024dd844..3e9a01467c67 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -132,6 +132,7 @@
- 
- /* TUF laptop RGB modes/colours */
- #define ASUS_WMI_DEVID_TUF_RGB_MODE	0x00100056
-+#define ASUS_WMI_DEVID_TUF_RGB_MODE2	0x0010005A
- 
- /* TUF laptop RGB power/state */
- #define ASUS_WMI_DEVID_TUF_RGB_STATE	0x00100057
--- 
-2.44.0
-
diff --git a/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch b/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch
deleted file mode 100644
index 2b0f7cf..0000000
--- a/patches/asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch
+++ /dev/null
@@ -1,139 +0,0 @@
-From 1c0f375634b3ddbcf479c4ddb81639e397795802 Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Sun, 10 Mar 2024 19:03:11 +1300
-Subject: [PATCH 4/5] platform/x86: asus-wmi: support toggling POST sound
-
-Add support for toggling the BIOS POST sound on some ASUS laptops.
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- .../ABI/testing/sysfs-platform-asus-wmi       |  7 +++
- drivers/platform/x86/asus-wmi.c               | 54 +++++++++++++++++++
- include/linux/platform_data/x86/asus-wmi.h    |  3 ++
- 3 files changed, 64 insertions(+)
-
-diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
-index e32b4f0ae15f..f3c53b7453f0 100644
---- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
-+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
-@@ -194,3 +194,10 @@ Contact:	"Luke Jones" <luke@ljones.dev>
- Description:
- 		Set the target temperature limit of the Nvidia dGPU:
- 			* min=75, max=87
-+
-+What:		/sys/devices/platform/<platform>/boot_sound
-+Date:		Jun 2023
-+KernelVersion:	6.9
-+Contact:	"Luke Jones" <luke@ljones.dev>
-+Description:
-+		Set if the BIOS POST sound is played on boot.
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index e1100726de53..e4341abb71e0 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -297,6 +297,7 @@ struct asus_wmi {
- 	// The RSOC controls the maximum charging percentage.
- 	bool battery_rsoc_available;
- 
-+	bool boot_sound_available;
- 	bool panel_overdrive_available;
- 	bool mini_led_mode_available;
- 	u32 mini_led_dev_id;
-@@ -2106,6 +2107,55 @@ static ssize_t panel_od_store(struct device *dev,
- }
- static DEVICE_ATTR_RW(panel_od);
- 
-+/* Bootup sound ***************************************************************/
-+
-+static ssize_t boot_sound_show(struct device *dev,
-+			     struct device_attribute *attr, char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+	int result;
-+
-+	result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_BOOT_SOUND);
-+	if (result < 0)
-+		return result;
-+
-+	return sysfs_emit(buf, "%d\n", result);
-+}
-+
-+static ssize_t boot_sound_store(struct device *dev,
-+			      struct device_attribute *attr,
-+			      const char *buf, size_t count)
-+{
-+	int result, err;
-+	u32 snd;
-+
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	result = kstrtou32(buf, 10, &snd);
-+	if (result)
-+		return result;
-+
-+	if (snd > 1)
-+		return -EINVAL;
-+
-+	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BOOT_SOUND, snd, &result);
-+
-+	if (err) {
-+		pr_warn("Failed to set boot sound: %d\n", err);
-+		return err;
-+	}
-+
-+	if (result > 1) {
-+		pr_warn("Failed to set panel boot sound (result): 0x%x\n", result);
-+		return -EIO;
-+	}
-+
-+	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "boot_sound");
-+
-+	return count;
-+}
-+static DEVICE_ATTR_RW(boot_sound);
-+
- /* Mini-LED mode **************************************************************/
- static ssize_t mini_led_mode_show(struct device *dev,
- 				   struct device_attribute *attr, char *buf)
-@@ -4203,6 +4253,7 @@ static struct attribute *platform_attributes[] = {
- 	&dev_attr_ppt_platform_sppt.attr,
- 	&dev_attr_nv_dynamic_boost.attr,
- 	&dev_attr_nv_temp_target.attr,
-+	&dev_attr_boot_sound.attr,
- 	&dev_attr_panel_od.attr,
- 	&dev_attr_mini_led_mode.attr,
- 	&dev_attr_available_mini_led_mode.attr,
-@@ -4255,6 +4306,8 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
- 		ok = asus->nv_dyn_boost_available;
- 	else if (attr == &dev_attr_nv_temp_target.attr)
- 		ok = asus->nv_temp_tgt_available;
-+	else if (attr == &dev_attr_boot_sound.attr)
-+		ok = asus->boot_sound_available;
- 	else if (attr == &dev_attr_panel_od.attr)
- 		ok = asus->panel_overdrive_available;
- 	else if (attr == &dev_attr_mini_led_mode.attr)
-@@ -4526,6 +4579,7 @@ static int asus_wmi_add(struct platform_device *pdev)
- 	asus->ppt_plat_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PLAT_SPPT);
- 	asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST);
- 	asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET);
-+	asus->boot_sound_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_BOOT_SOUND);
- 	asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD);
- 	asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE)
- 						&& dmi_match(DMI_BOARD_NAME, "RC71L");
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index 3e9a01467c67..3eb5cd6773ad 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -137,6 +137,9 @@
- /* TUF laptop RGB power/state */
- #define ASUS_WMI_DEVID_TUF_RGB_STATE	0x00100057
- 
-+/* Bootup sound control */
-+#define ASUS_WMI_DEVID_BOOT_SOUND	0x00130022
-+
- /* DSTS masks */
- #define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
- #define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
--- 
-2.44.0
-
diff --git a/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch b/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch
deleted file mode 100644
index 54402f0..0000000
--- a/patches/asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch
+++ /dev/null
@@ -1,342 +0,0 @@
-From 6045f385154a2c0a4aaa692d13bb0fa14bbe1d12 Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Mon, 11 Mar 2024 12:15:46 +1300
-Subject: [PATCH 5/5] platform/x86: asus-wmi: store a min default for ppt
- options
-
-Laptops with any of the ppt or nv tunables default to the minimum setting
-on boot so we can safely assume a stored value is correct.
-
-This patch adds storing of those values in the local struct, and enables
-reading of those values back.
-
-Secondary to the above it renames some internal variables to be more
-consistent (which makes code grepping show all related parts)
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- drivers/platform/x86/asus-wmi.c | 141 +++++++++++++++++++++++++-------
- 1 file changed, 111 insertions(+), 30 deletions(-)
-
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index e4341abb71e0..482e23b55e1e 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -272,12 +272,19 @@ struct asus_wmi {
- 
- 	/* Tunables provided by ASUS for gaming laptops */
- 	bool ppt_pl2_sppt_available;
-+	u32 ppt_pl2_sppt;
- 	bool ppt_pl1_spl_available;
-+	u32 ppt_pl1_spl;
- 	bool ppt_apu_sppt_available;
--	bool ppt_plat_sppt_available;
-+	u32 ppt_apu_sppt;
-+	bool ppt_platform_sppt_available;
-+	u32 ppt_platform_sppt;
- 	bool ppt_fppt_available;
--	bool nv_dyn_boost_available;
--	bool nv_temp_tgt_available;
-+	u32 ppt_fppt;
-+	bool nv_dynamic_boost_available;
-+	u32 nv_dynamic_boost;
-+	bool nv_temp_target_available;
-+	u32 nv_temp_target;
- 
- 	bool kbd_rgb_mode_available;
- 	u32 kbd_rgb_dev;
-@@ -999,11 +1006,10 @@ static ssize_t ppt_pl2_sppt_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1022,22 +1028,31 @@ static ssize_t ppt_pl2_sppt_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->ppt_pl2_sppt = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_pl2_sppt");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(ppt_pl2_sppt);
-+
-+static ssize_t ppt_pl2_sppt_show(struct device *dev,
-+				       struct device_attribute *attr,
-+				       char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->ppt_pl2_sppt);
-+}
-+static DEVICE_ATTR_RW(ppt_pl2_sppt);
- 
- /* Tunable: PPT, Intel=PL1, AMD=SPL ******************************************/
- static ssize_t ppt_pl1_spl_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1056,22 +1071,30 @@ static ssize_t ppt_pl1_spl_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->ppt_pl1_spl = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_pl1_spl");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(ppt_pl1_spl);
-+static ssize_t ppt_pl1_spl_show(struct device *dev,
-+				 struct device_attribute *attr,
-+				 char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->ppt_pl1_spl);
-+}
-+static DEVICE_ATTR_RW(ppt_pl1_spl);
- 
- /* Tunable: PPT APU FPPT ******************************************************/
- static ssize_t ppt_fppt_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1090,22 +1113,31 @@ static ssize_t ppt_fppt_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->ppt_fppt = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_fpu_sppt");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(ppt_fppt);
-+
-+static ssize_t ppt_fppt_show(struct device *dev,
-+				struct device_attribute *attr,
-+				char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->ppt_fppt);
-+}
-+static DEVICE_ATTR_RW(ppt_fppt);
- 
- /* Tunable: PPT APU SPPT *****************************************************/
- static ssize_t ppt_apu_sppt_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1124,22 +1156,31 @@ static ssize_t ppt_apu_sppt_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->ppt_apu_sppt = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_apu_sppt");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(ppt_apu_sppt);
-+
-+static ssize_t ppt_apu_sppt_show(struct device *dev,
-+			     struct device_attribute *attr,
-+			     char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->ppt_apu_sppt);
-+}
-+static DEVICE_ATTR_RW(ppt_apu_sppt);
- 
- /* Tunable: PPT platform SPPT ************************************************/
- static ssize_t ppt_platform_sppt_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1158,22 +1199,31 @@ static ssize_t ppt_platform_sppt_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->ppt_platform_sppt = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "ppt_platform_sppt");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(ppt_platform_sppt);
-+
-+static ssize_t ppt_platform_sppt_show(struct device *dev,
-+				 struct device_attribute *attr,
-+				 char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->ppt_platform_sppt);
-+}
-+static DEVICE_ATTR_RW(ppt_platform_sppt);
- 
- /* Tunable: NVIDIA dynamic boost *********************************************/
- static ssize_t nv_dynamic_boost_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1192,22 +1242,31 @@ static ssize_t nv_dynamic_boost_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->nv_dynamic_boost = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "nv_dynamic_boost");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(nv_dynamic_boost);
-+
-+static ssize_t nv_dynamic_boost_show(struct device *dev,
-+				      struct device_attribute *attr,
-+				      char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->nv_dynamic_boost);
-+}
-+static DEVICE_ATTR_RW(nv_dynamic_boost);
- 
- /* Tunable: NVIDIA temperature target ****************************************/
- static ssize_t nv_temp_target_store(struct device *dev,
- 				    struct device_attribute *attr,
- 				    const char *buf, size_t count)
- {
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
- 	int result, err;
- 	u32 value;
- 
--	struct asus_wmi *asus = dev_get_drvdata(dev);
--
- 	result = kstrtou32(buf, 10, &value);
- 	if (result)
- 		return result;
-@@ -1226,11 +1285,21 @@ static ssize_t nv_temp_target_store(struct device *dev,
- 		return -EIO;
- 	}
- 
-+	asus->nv_temp_target = value;
- 	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "nv_temp_target");
- 
- 	return count;
- }
--static DEVICE_ATTR_WO(nv_temp_target);
-+
-+static ssize_t nv_temp_target_show(struct device *dev,
-+				     struct device_attribute *attr,
-+				     char *buf)
-+{
-+	struct asus_wmi *asus = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", asus->nv_temp_target);
-+}
-+static DEVICE_ATTR_RW(nv_temp_target);
- 
- /* Battery ********************************************************************/
- 
-@@ -4301,11 +4370,11 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
- 	else if (attr == &dev_attr_ppt_apu_sppt.attr)
- 		ok = asus->ppt_apu_sppt_available;
- 	else if (attr == &dev_attr_ppt_platform_sppt.attr)
--		ok = asus->ppt_plat_sppt_available;
-+		ok = asus->ppt_platform_sppt_available;
- 	else if (attr == &dev_attr_nv_dynamic_boost.attr)
--		ok = asus->nv_dyn_boost_available;
-+		ok = asus->nv_dynamic_boost_available;
- 	else if (attr == &dev_attr_nv_temp_target.attr)
--		ok = asus->nv_temp_tgt_available;
-+		ok = asus->nv_temp_target_available;
- 	else if (attr == &dev_attr_boot_sound.attr)
- 		ok = asus->boot_sound_available;
- 	else if (attr == &dev_attr_panel_od.attr)
-@@ -4566,6 +4635,15 @@ static int asus_wmi_add(struct platform_device *pdev)
- 	if (err)
- 		goto fail_platform;
- 
-+	/* ensure defaults for tunables */
-+	asus->ppt_pl2_sppt = 5;
-+	asus->ppt_pl1_spl = 5;
-+	asus->ppt_apu_sppt = 5;
-+	asus->ppt_platform_sppt = 5;
-+	asus->ppt_fppt = 5;
-+	asus->nv_dynamic_boost = 5;
-+	asus->nv_temp_target = 75;
-+
- 	asus->charge_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CHARGE_MODE);
- 	asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU);
- 	asus->egpu_connect_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU_CONNECTED);
-@@ -4576,9 +4654,12 @@ static int asus_wmi_add(struct platform_device *pdev)
- 	asus->ppt_pl1_spl_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PL1_SPL);
- 	asus->ppt_fppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_FPPT);
- 	asus->ppt_apu_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_APU_SPPT);
--	asus->ppt_plat_sppt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PPT_PLAT_SPPT);
--	asus->nv_dyn_boost_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_DYN_BOOST);
--	asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET);
-+	asus->ppt_platform_sppt_available = asus_wmi_dev_is_present(asus,
-+								    ASUS_WMI_DEVID_PPT_PLAT_SPPT);
-+	asus->nv_dynamic_boost_available = asus_wmi_dev_is_present(asus,
-+								   ASUS_WMI_DEVID_NV_DYN_BOOST);
-+	asus->nv_temp_target_available = asus_wmi_dev_is_present(asus,
-+								 ASUS_WMI_DEVID_NV_THERM_TARGET);
- 	asus->boot_sound_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_BOOT_SOUND);
- 	asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD);
- 	asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE)
--- 
-2.44.0
-
diff --git a/patches/cachyos/0001-bore-cachy.patch b/patches/cachyos/0001-bore-cachy.patch
deleted file mode 100644
index a49989b..0000000
--- a/patches/cachyos/0001-bore-cachy.patch
+++ /dev/null
@@ -1,929 +0,0 @@
-From fea4a499d6783faff756fe852c645f90aa73ccf7 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:57:19 +0200
-Subject: [PATCH] bore-cachy
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- include/linux/sched.h   |  10 ++
- init/Kconfig            |  17 +++
- kernel/Kconfig.hz       |  16 +++
- kernel/sched/core.c     | 143 ++++++++++++++++++
- kernel/sched/debug.c    |  60 +++++++-
- kernel/sched/fair.c     | 310 ++++++++++++++++++++++++++++++++++++----
- kernel/sched/features.h |  22 ++-
- kernel/sched/sched.h    |   7 +
- 8 files changed, 555 insertions(+), 30 deletions(-)
-
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index a5f4b48fca18..df62c56b13ae 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -547,6 +547,16 @@ struct sched_entity {
- 	u64				sum_exec_runtime;
- 	u64				prev_sum_exec_runtime;
- 	u64				vruntime;
-+#ifdef CONFIG_SCHED_BORE
-+	u64				burst_time;
-+	u8				prev_burst_penalty;
-+	u8				curr_burst_penalty;
-+	u8				burst_penalty;
-+	u8				burst_score;
-+	u8				child_burst;
-+	u32				child_burst_cnt;
-+	u64				child_burst_last_cached;
-+#endif // CONFIG_SCHED_BORE
- 	s64				vlag;
- 	u64				slice;
- 
-diff --git a/init/Kconfig b/init/Kconfig
-index 3ba6142f2f42..2966dec64df7 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1303,6 +1303,23 @@ config CHECKPOINT_RESTORE
- 
- 	  If unsure, say N here.
- 
-+config SCHED_BORE
-+	bool "Burst-Oriented Response Enhancer"
-+	default y
-+	help
-+	  In Desktop and Mobile computing, one might prefer interactive
-+	  tasks to keep responsive no matter what they run in the background.
-+
-+	  Enabling this kernel feature modifies the scheduler to discriminate
-+	  tasks by their burst time (runtime since it last went sleeping or
-+	  yielding state) and prioritize those that run less bursty.
-+	  Such tasks usually include window compositor, widgets backend,
-+	  terminal emulator, video playback, games and so on.
-+	  With a little impact to scheduling fairness, it may improve
-+	  responsiveness especially under heavy background workload.
-+
-+	  If unsure, say Y here.
-+
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
- 	select CGROUPS
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 0f78364efd4f..b50189ee5b93 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -79,5 +79,21 @@ config HZ
- 	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
-+config MIN_BASE_SLICE_NS
-+	int "Default value for min_base_slice_ns"
-+	default 2000000
-+	help
-+	 The BORE Scheduler automatically calculates the optimal base
-+	 slice for the configured HZ using the following equation:
-+	 
-+	 base_slice_ns = max(min_base_slice_ns, 1000000000/HZ)
-+	 
-+	 This option sets the default lower bound limit of the base slice
-+	 to prevent the loss of task throughput due to overscheduling.
-+	 
-+	 Setting this value too high can cause the system to boot with
-+	 an unnecessarily large base slice, resulting in high scheduling
-+	 latency and poor system responsiveness.
-+
- config SCHED_HRTICK
- 	def_bool HIGH_RES_TIMERS
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 59ce0841eb1f..c5d10b464779 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -4515,6 +4515,138 @@ int wake_up_state(struct task_struct *p, unsigned int state)
- 	return try_to_wake_up(p, state, 0);
- }
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern u8   sched_burst_fork_atavistic;
-+extern uint sched_burst_cache_lifetime;
-+
-+static void __init sched_init_bore(void) {
-+	init_task.se.burst_time = 0;
-+	init_task.se.prev_burst_penalty = 0;
-+	init_task.se.curr_burst_penalty = 0;
-+	init_task.se.burst_penalty = 0;
-+	init_task.se.burst_score = 0;
-+	init_task.se.child_burst_last_cached = 0;
-+}
-+
-+inline void sched_fork_bore(struct task_struct *p) {
-+	p->se.burst_time = 0;
-+	p->se.curr_burst_penalty = 0;
-+	p->se.burst_score = 0;
-+	p->se.child_burst_last_cached = 0;
-+}
-+
-+static u32 count_child_tasks(struct task_struct *p) {
-+	struct task_struct *child;
-+	u32 cnt = 0;
-+	list_for_each_entry(child, &p->children, sibling) {cnt++;}
-+	return cnt;
-+}
-+
-+static inline bool task_is_inheritable(struct task_struct *p) {
-+	return (p->sched_class == &fair_sched_class);
-+}
-+
-+static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) {
-+	u64 expiration_time =
-+		p->se.child_burst_last_cached + sched_burst_cache_lifetime;
-+	return ((s64)(expiration_time - now) < 0);
-+}
-+
-+static void __update_child_burst_cache(
-+	struct task_struct *p, u32 cnt, u32 sum, u64 now) {
-+	u8 avg = 0;
-+	if (cnt) avg = sum / cnt;
-+	p->se.child_burst = max(avg, p->se.burst_penalty);
-+	p->se.child_burst_cnt = cnt;
-+	p->se.child_burst_last_cached = now;
-+}
-+
-+static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
-+	struct task_struct *child;
-+	u32 cnt = 0;
-+	u32 sum = 0;
-+
-+	list_for_each_entry(child, &p->children, sibling) {
-+		if (!task_is_inheritable(child)) continue;
-+		cnt++;
-+		sum += child->se.burst_penalty;
-+	}
-+
-+	__update_child_burst_cache(p, cnt, sum, now);
-+}
-+
-+static inline u8 __inherit_burst_direct(struct task_struct *p, u64 now) {
-+	struct task_struct *parent = p->real_parent;
-+	if (child_burst_cache_expired(parent, now))
-+		update_child_burst_direct(parent, now);
-+
-+	return parent->se.child_burst;
-+}
-+
-+static void update_child_burst_topological(
-+	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
-+	struct task_struct *child, *dec;
-+	u32 cnt = 0, dcnt = 0;
-+	u32 sum = 0;
-+
-+	list_for_each_entry(child, &p->children, sibling) {
-+		dec = child;
-+		while ((dcnt = count_child_tasks(dec)) == 1)
-+			dec = list_first_entry(&dec->children, struct task_struct, sibling);
-+		
-+		if (!dcnt || !depth) {
-+			if (!task_is_inheritable(dec)) continue;
-+			cnt++;
-+			sum += dec->se.burst_penalty;
-+			continue;
-+		}
-+		if (!child_burst_cache_expired(dec, now)) {
-+			cnt += dec->se.child_burst_cnt;
-+			sum += (u32)dec->se.child_burst * dec->se.child_burst_cnt;
-+			continue;
-+		}
-+		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
-+	}
-+
-+	__update_child_burst_cache(p, cnt, sum, now);
-+	*acnt += cnt;
-+	*asum += sum;
-+}
-+
-+static inline u8 __inherit_burst_topological(struct task_struct *p, u64 now) {
-+	struct task_struct *anc = p->real_parent;
-+	u32 cnt = 0, sum = 0;
-+
-+	while (anc->real_parent != anc && count_child_tasks(anc) == 1)
-+		anc = anc->real_parent;
-+
-+	if (child_burst_cache_expired(anc, now))
-+		update_child_burst_topological(
-+			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
-+
-+	return anc->se.child_burst;
-+}
-+
-+static inline void inherit_burst(struct task_struct *p) {
-+	u8 burst_cache;
-+	u64 now = ktime_get_ns();
-+
-+	read_lock(&tasklist_lock);
-+	burst_cache = likely(sched_burst_fork_atavistic)?
-+		__inherit_burst_topological(p, now):
-+		__inherit_burst_direct(p, now);
-+	read_unlock(&tasklist_lock);
-+
-+	p->se.prev_burst_penalty = max(p->se.prev_burst_penalty, burst_cache);
-+}
-+
-+static void sched_post_fork_bore(struct task_struct *p) {
-+	if (p->sched_class == &fair_sched_class)
-+		inherit_burst(p);
-+	p->se.burst_penalty = p->se.prev_burst_penalty;
-+}
-+#endif // CONFIG_SCHED_BORE
-+
- /*
-  * Perform scheduler related setup for a newly forked process p.
-  * p is forked by current.
-@@ -4531,6 +4663,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
- 	p->se.prev_sum_exec_runtime	= 0;
- 	p->se.nr_migrations		= 0;
- 	p->se.vruntime			= 0;
-+#ifdef CONFIG_SCHED_BORE
-+	sched_fork_bore(p);
-+#endif // CONFIG_SCHED_BORE
- 	p->se.vlag			= 0;
- 	p->se.slice			= sysctl_sched_base_slice;
- 	INIT_LIST_HEAD(&p->se.group_node);
-@@ -4846,6 +4981,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
- 
- void sched_post_fork(struct task_struct *p)
- {
-+#ifdef CONFIG_SCHED_BORE
-+	sched_post_fork_bore(p);
-+#endif // CONFIG_SCHED_BORE
- 	uclamp_post_fork(p);
- }
- 
-@@ -9933,6 +10071,11 @@ void __init sched_init(void)
- 	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	sched_init_bore();
-+	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.5 by Masahito Suzuki");
-+#endif // CONFIG_SCHED_BORE
-+
- 	wait_bit_init();
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
-diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index c1eb9a1afd13..e2da8d773877 100644
---- a/kernel/sched/debug.c
-+++ b/kernel/sched/debug.c
-@@ -167,7 +167,52 @@ static const struct file_operations sched_feat_fops = {
- };
- 
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+static ssize_t sched_min_base_slice_write(struct file *filp, const char __user *ubuf,
-+				   size_t cnt, loff_t *ppos)
-+{
-+	char buf[16];
-+	unsigned int value;
-+
-+	if (cnt > 15)
-+		cnt = 15;
-+
-+	if (copy_from_user(&buf, ubuf, cnt))
-+		return -EFAULT;
-+	buf[cnt] = '\0';
-+
-+	if (kstrtouint(buf, 10, &value))
-+		return -EINVAL;
- 
-+	if (!value)
-+		return -EINVAL;
-+
-+	sysctl_sched_min_base_slice = value;
-+	sched_update_min_base_slice();
-+
-+	*ppos += cnt;
-+	return cnt;
-+}
-+
-+static int sched_min_base_slice_show(struct seq_file *m, void *v)
-+{
-+	seq_printf(m, "%d\n", sysctl_sched_min_base_slice);
-+	return 0;
-+}
-+
-+static int sched_min_base_slice_open(struct inode *inode, struct file *filp)
-+{
-+	return single_open(filp, sched_min_base_slice_show, NULL);
-+}
-+
-+static const struct file_operations sched_min_base_slice_fops = {
-+	.open		= sched_min_base_slice_open,
-+	.write		= sched_min_base_slice_write,
-+	.read		= seq_read,
-+	.llseek		= seq_lseek,
-+	.release	= single_release,
-+};
-+#else // !CONFIG_SCHED_BORE
- static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
- 				   size_t cnt, loff_t *ppos)
- {
-@@ -213,7 +258,7 @@ static const struct file_operations sched_scaling_fops = {
- 	.llseek		= seq_lseek,
- 	.release	= single_release,
- };
--
-+#endif // CONFIG_SCHED_BORE
- #endif /* SMP */
- 
- #ifdef CONFIG_PREEMPT_DYNAMIC
-@@ -347,13 +392,20 @@ static __init int sched_init_debug(void)
- 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
-+	debugfs_create_u32("base_slice_ns", 0400, debugfs_sched, &sysctl_sched_base_slice);
-+#else // !CONFIG_SCHED_BORE
- 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
-+#endif // CONFIG_SCHED_BORE
- 
- 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
- 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
- 
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
-+#endif // CONFIG_SCHED_BORE
- 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
- 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
- 
-@@ -596,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
- 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
- 
-+#ifdef CONFIG_SCHED_BORE
-+	SEQ_printf(m, " %2d", p->se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- #ifdef CONFIG_NUMA_BALANCING
- 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
- #endif
-@@ -1069,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
- 
- 	P(se.load.weight);
- #ifdef CONFIG_SMP
-+#ifdef CONFIG_SCHED_BORE
-+	P(se.burst_score);
-+#endif // CONFIG_SCHED_BORE
- 	P(se.avg.load_sum);
- 	P(se.avg.runnable_sum);
- 	P(se.avg.util_sum);
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index c2bb8eb1d6ba..9e8b220f27e6 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -19,6 +19,9 @@
-  *
-  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
-  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
-+ *
-+ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
-+ *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
-  */
- #include <linux/energy_model.h>
- #include <linux/mmap_lock.h>
-@@ -64,28 +67,126 @@
-  *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
-  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
-  *
-- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
-+ * (BORE  default SCHED_TUNABLESCALING_NONE = *1 constant)
-+ * (EEVDF default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
-  */
-+#ifdef CONFIG_SCHED_BORE
-+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
-+#endif // CONFIG_SCHED_BORE
- 
- /*
-  * Minimal preemption granularity for CPU-bound tasks:
-  *
-- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-+ * (BORE  default: max(1 sec / HZ, min_base_slice) constant, units: nanoseconds)
-+ * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
--#ifdef CONFIG_CACHY
--unsigned int sysctl_sched_base_slice			= 350000ULL;
--static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
--#else
-+#ifdef CONFIG_SCHED_BORE
-+unsigned int            sysctl_sched_base_slice = 1000000000ULL / HZ;
-+static unsigned int configured_sched_base_slice = 1000000000ULL / HZ;
-+unsigned int        sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS;
-+#else // !CONFIG_SCHED_BORE
- unsigned int sysctl_sched_base_slice			= 750000ULL;
- static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
--#endif
-+#endif // CONFIG_SCHED_BORE
- 
--#ifdef CONFIG_CACHY
--const_debug unsigned int sysctl_sched_migration_cost	= 300000UL;
--#else
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
--#endif
-+
-+#ifdef CONFIG_SCHED_BORE
-+u8   __read_mostly sched_bore                   = 1;
-+u8   __read_mostly sched_burst_smoothness_long  = 1;
-+u8   __read_mostly sched_burst_smoothness_short = 0;
-+u8   __read_mostly sched_burst_fork_atavistic   = 2;
-+u8   __read_mostly sched_burst_penalty_offset   = 22;
-+uint __read_mostly sched_burst_penalty_scale    = 1280;
-+uint __read_mostly sched_burst_cache_lifetime   = 60000000;
-+uint __read_mostly sched_deadline_boost_mask    = 0x81; // ENQUEUE_INITIAL | ENQUEUE_WAKEUP
-+uint __read_mostly sched_deadline_preserve_mask = 0x42; // ENQUEUE_RESTORE | ENQUEUE_MIGRATED
-+static int __maybe_unused sixty_four     = 64;
-+static int __maybe_unused maxval_12_bits = 4095;
-+
-+#define MAX_BURST_PENALTY (39U <<2)
-+
-+static inline u32 log2plus1_u64_u32f8(u64 v) {
-+	u32 msb = fls64(v);
-+	s32 excess_bits = msb - 9;
-+    u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits;
-+	return msb << 8 | fractional;
-+}
-+
-+static inline u32 calc_burst_penalty(u64 burst_time) {
-+	u32 greed, tolerance, penalty, scaled_penalty;
-+	
-+	greed = log2plus1_u64_u32f8(burst_time);
-+	tolerance = sched_burst_penalty_offset << 8;
-+	penalty = max(0, (s32)greed - (s32)tolerance);
-+	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
-+
-+	return min(MAX_BURST_PENALTY, scaled_penalty);
-+}
-+
-+static inline u64 scale_slice(u64 delta, struct sched_entity *se) {
-+	return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->burst_score], 22);
-+}
-+
-+static inline u64 __unscale_slice(u64 delta, u8 score) {
-+	return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);
-+}
-+
-+static inline u64 unscale_slice(u64 delta, struct sched_entity *se) {
-+	return __unscale_slice(delta, se->burst_score);
-+}
-+
-+void reweight_task(struct task_struct *p, int prio);
-+
-+static void update_burst_score(struct sched_entity *se) {
-+	if (!entity_is_task(se)) return;
-+	struct task_struct *p = task_of(se);
-+	u8 prio = p->static_prio - MAX_RT_PRIO;
-+	u8 prev_prio = min(39, prio + se->burst_score);
-+
-+	se->burst_score = se->burst_penalty >> 2;
-+
-+	u8 new_prio = min(39, prio + se->burst_score);
-+	if (new_prio != prev_prio)
-+		reweight_task(p, new_prio);
-+}
-+
-+static void update_burst_penalty(struct sched_entity *se) {
-+	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
-+	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
-+	update_burst_score(se);
-+}
-+
-+static inline u32 binary_smooth(u32 new, u32 old) {
-+  int increment = new - old;
-+  return (0 <= increment)?
-+    old + ( increment >> (int)sched_burst_smoothness_long):
-+    old - (-increment >> (int)sched_burst_smoothness_short);
-+}
-+
-+static void restart_burst(struct sched_entity *se) {
-+	se->burst_penalty = se->prev_burst_penalty =
-+		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
-+	se->curr_burst_penalty = 0;
-+	se->burst_time = 0;
-+	update_burst_score(se);
-+}
-+
-+static void restart_burst_rescale_deadline(struct sched_entity *se) {
-+	s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
-+	u8 prev_score = se->burst_score;
-+	restart_burst(se);
-+	if (prev_score > se->burst_score) {
-+		wremain = __unscale_slice(abs(vremain), prev_score);
-+		vscaled = scale_slice(wremain, se);
-+		if (unlikely(vremain < 0))
-+			vscaled = -vscaled;
-+		se->deadline = se->vruntime + vscaled;
-+	}
-+}
-+#endif // CONFIG_SCHED_BORE
- 
- static int __init setup_sched_thermal_decay_shift(char *str)
- {
-@@ -130,12 +231,8 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
--#ifdef CONFIG_CACHY
--static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
--#else
- static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
--#endif
- 
- #ifdef CONFIG_NUMA_BALANCING
- /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
-@@ -144,6 +241,83 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
- 
- #ifdef CONFIG_SYSCTL
- static struct ctl_table sched_fair_sysctls[] = {
-+#ifdef CONFIG_SCHED_BORE
-+	{
-+		.procname	= "sched_bore",
-+		.data		= &sched_bore,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ONE,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_long",
-+		.data		= &sched_burst_smoothness_long,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_smoothness_short",
-+		.data		= &sched_burst_smoothness_short,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_ONE,
-+	},
-+	{
-+		.procname	= "sched_burst_fork_atavistic",
-+		.data		= &sched_burst_fork_atavistic,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_THREE,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_offset",
-+		.data		= &sched_burst_penalty_offset,
-+		.maxlen		= sizeof(u8),
-+		.mode		= 0644,
-+		.proc_handler = proc_dou8vec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &sixty_four,
-+	},
-+	{
-+		.procname	= "sched_burst_penalty_scale",
-+		.data		= &sched_burst_penalty_scale,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= &maxval_12_bits,
-+	},
-+	{
-+		.procname	= "sched_burst_cache_lifetime",
-+		.data		= &sched_burst_cache_lifetime,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_deadline_boost_mask",
-+		.data		= &sched_deadline_boost_mask,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+	{
-+		.procname	= "sched_deadline_preserve_mask",
-+		.data		= &sched_deadline_preserve_mask,
-+		.maxlen		= sizeof(uint),
-+		.mode		= 0644,
-+		.proc_handler = proc_douintvec,
-+	},
-+#endif // CONFIG_SCHED_BORE
- #ifdef CONFIG_CFS_BANDWIDTH
- 	{
- 		.procname       = "sched_cfs_bandwidth_slice_us",
-@@ -201,6 +375,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
-  *
-  * This idea comes from the SD scheduler of Con Kolivas:
-  */
-+#ifdef CONFIG_SCHED_BORE
-+static void update_sysctl(void) {
-+	sysctl_sched_base_slice =
-+		max(sysctl_sched_min_base_slice, configured_sched_base_slice);
-+}
-+void sched_update_min_base_slice(void) { update_sysctl(); }
-+#else // !CONFIG_SCHED_BORE
- static unsigned int get_update_sysctl_factor(void)
- {
- 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
-@@ -231,6 +412,7 @@ static void update_sysctl(void)
- 	SET_SYSCTL(sched_base_slice);
- #undef SET_SYSCTL
- }
-+#endif // CONFIG_SCHED_BORE
- 
- void __init sched_init_granularity(void)
- {
-@@ -708,6 +890,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
- 
- 	vlag = avruntime - se->vruntime;
- 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-+#ifdef CONFIG_SCHED_BORE
-+	limit >>= 1;
-+#endif // CONFIG_SCHED_BORE
- 
- 	return clamp(vlag, -limit, limit);
- }
-@@ -868,6 +1053,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
- 	return __node_2_se(left);
- }
- 
-+static inline bool pick_curr(struct cfs_rq *cfs_rq,
-+			     struct sched_entity *curr, struct sched_entity *wakee)
-+{
-+	/*
-+	 * Nothing to preserve...
-+	 */
-+	if (!curr || !sched_feat(RESPECT_SLICE))
-+		return false;
-+
-+	/*
-+	 * Allow preemption at the 0-lag point -- even if not all of the slice
-+	 * is consumed. Note: placement of positive lag can push V left and render
-+	 * @curr instantly ineligible irrespective the time on-cpu.
-+	 */
-+	if (sched_feat(RUN_TO_PARITY) && !entity_eligible(cfs_rq, curr))
-+		return false;
-+
-+	/*
-+	 * Don't preserve @curr when the @wakee has a shorter slice and earlier
-+	 * deadline. IOW, explicitly allow preemption.
-+	 */
-+	if (sched_feat(PREEMPT_SHORT) && wakee &&
-+	    wakee->slice < curr->slice &&
-+	    (s64)(wakee->deadline - curr->deadline) < 0)
-+		return false;
-+
-+	/*
-+	 * Preserve @curr to allow it to finish its first slice.
-+	 * See the HACK in set_next_entity().
-+	 */
-+	return curr->vlag == curr->deadline;
-+}
-+
- /*
-  * Earliest Eligible Virtual Deadline First
-  *
-@@ -887,28 +1105,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
-  *
-  * Which allows tree pruning through eligibility.
-  */
--static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
-+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *wakee)
- {
- 	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
- 	struct sched_entity *se = __pick_first_entity(cfs_rq);
- 	struct sched_entity *curr = cfs_rq->curr;
- 	struct sched_entity *best = NULL;
- 
-+	if (curr && !curr->on_rq)
-+		curr = NULL;
-+
- 	/*
- 	 * We can safely skip eligibility check if there is only one entity
- 	 * in this cfs_rq, saving some cycles.
- 	 */
- 	if (cfs_rq->nr_running == 1)
--		return curr && curr->on_rq ? curr : se;
--
--	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
--		curr = NULL;
-+		return curr ?: se;
- 
- 	/*
--	 * Once selected, run a task until it either becomes non-eligible or
--	 * until it gets a new slice. See the HACK in set_next_entity().
-+	 * Preserve @curr to let it finish its slice.
- 	 */
--	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
-+	if (pick_curr(cfs_rq, curr, wakee))
- 		return curr;
- 
- 	/* Pick the leftmost entity if it's eligible */
-@@ -967,6 +1184,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
-  * Scheduling class statistics methods:
-  */
- #ifdef CONFIG_SMP
-+#if !defined(CONFIG_SCHED_BORE)
- int sched_update_scaling(void)
- {
- 	unsigned int factor = get_update_sysctl_factor();
-@@ -978,6 +1196,7 @@ int sched_update_scaling(void)
- 
- 	return 0;
- }
-+#endif // CONFIG_SCHED_BORE
- #endif
- #endif
- 
-@@ -1178,7 +1397,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
- 	if (unlikely(delta_exec <= 0))
- 		return;
- 
-+#ifdef CONFIG_SCHED_BORE
-+	curr->burst_time += delta_exec;
-+	update_burst_penalty(curr);
-+	curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
-+#else // !CONFIG_SCHED_BORE
- 	curr->vruntime += calc_delta_fair(delta_exec, curr);
-+#endif // CONFIG_SCHED_BORE
- 	update_deadline(cfs_rq, curr);
- 	update_min_vruntime(cfs_rq);
- 
-@@ -5193,6 +5418,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 	s64 lag = 0;
- 
- 	se->slice = sysctl_sched_base_slice;
-+#ifdef CONFIG_SCHED_BORE
-+	if (flags & ~sched_deadline_boost_mask & sched_deadline_preserve_mask)
-+		vslice = se->deadline - se->vruntime;
-+	else
-+#endif // CONFIG_SCHED_BORE
- 	vslice = calc_delta_fair(se->slice, se);
- 
- 	/*
-@@ -5203,6 +5433,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 	 *
- 	 * EEVDF: placement strategy #1 / #2
- 	 */
-+#ifdef CONFIG_SCHED_BORE
-+	if (se->vlag)
-+#endif // CONFIG_SCHED_BORE
- 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
- 		struct sched_entity *curr = cfs_rq->curr;
- 		unsigned long load;
-@@ -5278,7 +5511,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 	 * on average, halfway through their slice, as such start tasks
- 	 * off with half a slice to ease into the competition.
- 	 */
-+#if !defined(CONFIG_SCHED_BORE)
- 	if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
-+#else // CONFIG_SCHED_BORE
-+	if (flags & sched_deadline_boost_mask)
-+#endif // CONFIG_SCHED_BORE
- 		vslice /= 2;
- 
- 	/*
-@@ -5492,7 +5729,7 @@ pick_next_entity(struct cfs_rq *cfs_rq)
- 	    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
- 		return cfs_rq->next;
- 
--	return pick_eevdf(cfs_rq);
-+	return pick_eevdf(cfs_rq, NULL);
- }
- 
- static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
-@@ -6860,6 +7097,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
- 	bool was_sched_idle = sched_idle_rq(rq);
- 
- 	util_est_dequeue(&rq->cfs, p);
-+#ifdef CONFIG_SCHED_BORE
-+	if (task_sleep) {
-+		cfs_rq = cfs_rq_of(se);
-+		if (cfs_rq->curr == se)
-+			update_curr(cfs_rq);
-+		restart_burst(se);
-+	}
-+#endif // CONFIG_SCHED_BORE
- 
- 	for_each_sched_entity(se) {
- 		cfs_rq = cfs_rq_of(se);
-@@ -8425,10 +8670,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
- 	cfs_rq = cfs_rq_of(se);
- 	update_curr(cfs_rq);
- 
--	/*
--	 * XXX pick_eevdf(cfs_rq) != se ?
--	 */
--	if (pick_eevdf(cfs_rq) == pse)
-+	if (pick_eevdf(cfs_rq, pse) == pse)
- 		goto preempt;
- 
- 	return;
-@@ -8646,16 +8888,25 @@ static void yield_task_fair(struct rq *rq)
- 	/*
- 	 * Are we the only task in the tree?
- 	 */
-+#if !defined(CONFIG_SCHED_BORE)
- 	if (unlikely(rq->nr_running == 1))
- 		return;
- 
- 	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 
- 	update_rq_clock(rq);
- 	/*
- 	 * Update run-time statistics of the 'current'.
- 	 */
- 	update_curr(cfs_rq);
-+#ifdef CONFIG_SCHED_BORE
-+	restart_burst_rescale_deadline(se);
-+	if (unlikely(rq->nr_running == 1))
-+		return;
-+
-+	clear_buddies(cfs_rq, se);
-+#endif // CONFIG_SCHED_BORE
- 	/*
- 	 * Tell update_rq_clock() that we've just updated,
- 	 * so we don't do microscopic update in schedule()
-@@ -12723,6 +12974,9 @@ static void task_fork_fair(struct task_struct *p)
- 	curr = cfs_rq->curr;
- 	if (curr)
- 		update_curr(cfs_rq);
-+#ifdef CONFIG_SCHED_BORE
-+	update_burst_score(se);
-+#endif // CONFIG_SCHED_BORE
- 	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
- 	rq_unlock(rq, &rf);
- }
-diff --git a/kernel/sched/features.h b/kernel/sched/features.h
-index 143f55df890b..3aad8900c35e 100644
---- a/kernel/sched/features.h
-+++ b/kernel/sched/features.h
-@@ -5,8 +5,28 @@
-  * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
-  */
- SCHED_FEAT(PLACE_LAG, true)
-+/*
-+ * Give new tasks half a slice to ease into the competition.
-+ */
-+#if !defined(CONFIG_SCHED_BORE)
- SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
--SCHED_FEAT(RUN_TO_PARITY, true)
-+#endif // CONFIG_SCHED_BORE
-+/*
-+ * Inhibit (wakeup) preemption until the current task has exhausted its slice.
-+ */
-+#ifdef CONFIG_SCHED_BORE
-+SCHED_FEAT(RESPECT_SLICE, false)
-+#else // !CONFIG_SCHED_BORE
-+SCHED_FEAT(RESPECT_SLICE, true)
-+#endif // CONFIG_SCHED_BORE
-+/*
-+ * Relax RESPECT_SLICE to allow preemption once current has reached 0-lag.
-+ */
-+SCHED_FEAT(RUN_TO_PARITY, false)
-+/*
-+ * Allow tasks with a shorter slice to disregard RESPECT_SLICE
-+ */
-+SCHED_FEAT(PREEMPT_SHORT, true)
- 
- /*
-  * Prefer to schedule the task we woke last (assuming it failed
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 10c1caff5e06..5d845dbd0cf9 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -1969,7 +1969,11 @@ static inline void dirty_sched_domain_sysctl(int cpu)
- }
- #endif
- 
-+#ifdef CONFIG_SCHED_BORE
-+extern void sched_update_min_base_slice(void);
-+#else // !CONFIG_SCHED_BORE
- extern int sched_update_scaling(void);
-+#endif // CONFIG_SCHED_BORE
- 
- static inline const struct cpumask *task_user_cpus(struct task_struct *p)
- {
-@@ -2554,6 +2558,9 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
- extern const_debug unsigned int sysctl_sched_migration_cost;
- 
- extern unsigned int sysctl_sched_base_slice;
-+#ifdef CONFIG_SCHED_BORE
-+extern unsigned int sysctl_sched_min_base_slice;
-+#endif // CONFIG_SCHED_BORE
- 
- #ifdef CONFIG_SCHED_DEBUG
- extern int sysctl_resched_latency_warn_ms;
--- 
-2.46.0.rc0
diff --git a/patches/cachyos/0001-cachyos-base-all.patch b/patches/cachyos/0001-cachyos-base-all.patch
deleted file mode 100644
index b5b57c9..0000000
--- a/patches/cachyos/0001-cachyos-base-all.patch
+++ /dev/null
@@ -1,53760 +0,0 @@
-From 35b09dfe053ff6308ab58d44175727d0d20f4ce0 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:23:07 +0200
-Subject: [PATCH 01/11] amd-pstate
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- Documentation/admin-guide/pm/amd-pstate.rst |  18 +-
- arch/x86/include/asm/cpufeatures.h          |   1 +
- arch/x86/include/asm/msr-index.h            |   2 +
- arch/x86/kernel/cpu/scattered.c             |   1 +
- drivers/cpufreq/Kconfig.x86                 |   1 +
- drivers/cpufreq/acpi-cpufreq.c              |   3 +-
- drivers/cpufreq/amd-pstate-ut.c             |  12 +-
- drivers/cpufreq/amd-pstate.c                | 350 ++++++++++++++------
- drivers/cpufreq/amd-pstate.h                |   2 +
- drivers/cpufreq/cpufreq.c                   |  11 +-
- 10 files changed, 281 insertions(+), 120 deletions(-)
-
-diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
-index 1e0d101b020a..d0324d44f548 100644
---- a/Documentation/admin-guide/pm/amd-pstate.rst
-+++ b/Documentation/admin-guide/pm/amd-pstate.rst
-@@ -281,6 +281,22 @@ integer values defined between 0 to 255 when EPP feature is enabled by platform
- firmware, if EPP feature is disabled, driver will ignore the written value
- This attribute is read-write.
- 
-+``boost``
-+The `boost` sysfs attribute provides control over the CPU core
-+performance boost, allowing users to manage the maximum frequency limitation
-+of the CPU. This attribute can be used to enable or disable the boost feature
-+on individual CPUs.
-+
-+When the boost feature is enabled, the CPU can dynamically increase its frequency
-+beyond the base frequency, providing enhanced performance for demanding workloads.
-+On the other hand, disabling the boost feature restricts the CPU to operate at the
-+base frequency, which may be desirable in certain scenarios to prioritize power
-+efficiency or manage temperature.
-+
-+To manipulate the `boost` attribute, users can write a value of `0` to disable the
-+boost or `1` to enable it, for the respective CPU using the sysfs path
-+`/sys/devices/system/cpu/cpuX/cpufreq/boost`, where `X` represents the CPU number.
-+
- Other performance and frequency values can be read back from
- ``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
- 
-@@ -406,7 +422,7 @@ control its functionality at the system level.  They are located in the
- ``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs.
- 
- ``status``
--	Operation mode of the driver: "active", "passive" or "disable".
-+	Operation mode of the driver: "active", "passive", "guided" or "disable".
- 
- 	"active"
- 		The driver is functional and in the ``active mode``
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 3c7434329661..6c128d463a14 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -470,6 +470,7 @@
- #define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* "" BHI_DIS_S HW control available */
- #define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* "" BHI_DIS_S HW control enabled */
- #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
-+#define X86_FEATURE_FAST_CPPC		(21*32 + 5) /* "" AMD Fast CPPC */
- 
- /*
-  * BUG word(s)
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index e022e6eb766c..384739d592af 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -781,6 +781,8 @@
- #define MSR_K7_HWCR_IRPERF_EN		BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
- #define MSR_K7_FID_VID_CTL		0xc0010041
- #define MSR_K7_FID_VID_STATUS		0xc0010042
-+#define MSR_K7_HWCR_CPB_DIS_BIT		25
-+#define MSR_K7_HWCR_CPB_DIS		BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
- 
- /* K6 MSRs */
- #define MSR_K6_WHCR			0xc0000082
-diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
-index af5aa2c754c2..c84c30188fdf 100644
---- a/arch/x86/kernel/cpu/scattered.c
-+++ b/arch/x86/kernel/cpu/scattered.c
-@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = {
- 	{ X86_FEATURE_HW_PSTATE,	CPUID_EDX,  7, 0x80000007, 0 },
- 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
- 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
-+	{ X86_FEATURE_FAST_CPPC, 	CPUID_EDX, 15, 0x80000007, 0 },
- 	{ X86_FEATURE_MBA,		CPUID_EBX,  6, 0x80000008, 0 },
- 	{ X86_FEATURE_SMBA,		CPUID_EBX,  2, 0x80000020, 0 },
- 	{ X86_FEATURE_BMEC,		CPUID_EBX,  3, 0x80000020, 0 },
-diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
-index 438c9e75a04d..97c2d4f15d76 100644
---- a/drivers/cpufreq/Kconfig.x86
-+++ b/drivers/cpufreq/Kconfig.x86
-@@ -71,6 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE
- config X86_AMD_PSTATE_UT
- 	tristate "selftest for AMD Processor P-State driver"
- 	depends on X86 && ACPI_PROCESSOR
-+	depends on X86_AMD_PSTATE
- 	default n
- 	help
- 	  This kernel module is used for testing. It's safe to say M here.
-diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
-index 4ac3a35dcd98..f4f8587c4ea0 100644
---- a/drivers/cpufreq/acpi-cpufreq.c
-+++ b/drivers/cpufreq/acpi-cpufreq.c
-@@ -50,8 +50,6 @@ enum {
- #define AMD_MSR_RANGE		(0x7)
- #define HYGON_MSR_RANGE		(0x7)
- 
--#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
--
- struct acpi_cpufreq_data {
- 	unsigned int resume;
- 	unsigned int cpu_feature;
-@@ -139,6 +137,7 @@ static int set_boost(struct cpufreq_policy *policy, int val)
- 			 (void *)(long)val, 1);
- 	pr_debug("CPU %*pbl: Core Boosting %s.\n",
- 		 cpumask_pr_args(policy->cpus), str_enabled_disabled(val));
-+	policy->boost_enabled = val;
- 
- 	return 0;
- }
-diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
-index fc275d41d51e..66b73c308ce6 100644
---- a/drivers/cpufreq/amd-pstate-ut.c
-+++ b/drivers/cpufreq/amd-pstate-ut.c
-@@ -202,6 +202,7 @@ static void amd_pstate_ut_check_freq(u32 index)
- 	int cpu = 0;
- 	struct cpufreq_policy *policy = NULL;
- 	struct amd_cpudata *cpudata = NULL;
-+	u32 nominal_freq_khz;
- 
- 	for_each_possible_cpu(cpu) {
- 		policy = cpufreq_cpu_get(cpu);
-@@ -209,13 +210,14 @@ static void amd_pstate_ut_check_freq(u32 index)
- 			break;
- 		cpudata = policy->driver_data;
- 
--		if (!((cpudata->max_freq >= cpudata->nominal_freq) &&
--			(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
-+		nominal_freq_khz = cpudata->nominal_freq*1000;
-+		if (!((cpudata->max_freq >= nominal_freq_khz) &&
-+			(nominal_freq_khz > cpudata->lowest_nonlinear_freq) &&
- 			(cpudata->lowest_nonlinear_freq > cpudata->min_freq) &&
- 			(cpudata->min_freq > 0))) {
- 			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
- 			pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
--				__func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
-+				__func__, cpu, cpudata->max_freq, nominal_freq_khz,
- 				cpudata->lowest_nonlinear_freq, cpudata->min_freq);
- 			goto skip_test;
- 		}
-@@ -229,13 +231,13 @@ static void amd_pstate_ut_check_freq(u32 index)
- 
- 		if (cpudata->boost_supported) {
- 			if ((policy->max == cpudata->max_freq) ||
--					(policy->max == cpudata->nominal_freq))
-+					(policy->max == nominal_freq_khz))
- 				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- 			else {
- 				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
- 				pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
- 					__func__, cpu, policy->max, cpudata->max_freq,
--					cpudata->nominal_freq);
-+					nominal_freq_khz);
- 				goto skip_test;
- 			}
- 		} else {
-diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
-index 9ad62dbe8bfb..804fab4ebb26 100644
---- a/drivers/cpufreq/amd-pstate.c
-+++ b/drivers/cpufreq/amd-pstate.c
-@@ -51,6 +51,7 @@
- 
- #define AMD_PSTATE_TRANSITION_LATENCY	20000
- #define AMD_PSTATE_TRANSITION_DELAY	1000
-+#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
- #define CPPC_HIGHEST_PERF_PERFORMANCE	196
- #define CPPC_HIGHEST_PERF_DEFAULT	166
- 
-@@ -85,15 +86,6 @@ struct quirk_entry {
- 	u32 lowest_freq;
- };
- 
--/*
-- * TODO: We need more time to fine tune processors with shared memory solution
-- * with community together.
-- *
-- * There are some performance drops on the CPU benchmarks which reports from
-- * Suse. We are co-working with them to fine tune the shared memory solution. So
-- * we disable it by default to go acpi-cpufreq on these processors and add a
-- * module parameter to be able to enable it manually for debugging.
-- */
- static struct cpufreq_driver *current_pstate_driver;
- static struct cpufreq_driver amd_pstate_driver;
- static struct cpufreq_driver amd_pstate_epp_driver;
-@@ -157,7 +149,7 @@ static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
- 	 * broken BIOS lack of nominal_freq and lowest_freq capabilities
- 	 * definition in ACPI tables
- 	 */
--	if (boot_cpu_has(X86_FEATURE_ZEN2)) {
-+	if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
- 		quirks = dmi->driver_data;
- 		pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
- 		return 1;
-@@ -199,7 +191,7 @@ static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
- 	u64 epp;
- 	int ret;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		if (!cppc_req_cached) {
- 			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
- 					&cppc_req_cached);
-@@ -247,12 +239,32 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
- 	return index;
- }
- 
-+static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
-+			       u32 des_perf, u32 max_perf, bool fast_switch)
-+{
-+	if (fast_switch)
-+		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
-+	else
-+		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
-+			      READ_ONCE(cpudata->cppc_req_cached));
-+}
-+
-+DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
-+
-+static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
-+					  u32 min_perf, u32 des_perf,
-+					  u32 max_perf, bool fast_switch)
-+{
-+	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
-+					    max_perf, fast_switch);
-+}
-+
- static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
- {
- 	int ret;
- 	struct cppc_perf_ctrls perf_ctrls;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		u64 value = READ_ONCE(cpudata->cppc_req_cached);
- 
- 		value &= ~GENMASK_ULL(31, 24);
-@@ -263,6 +275,9 @@ static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
- 		if (!ret)
- 			cpudata->epp_cached = epp;
- 	} else {
-+		amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
-+					     cpudata->max_limit_perf, false);
-+
- 		perf_ctrls.energy_perf = epp;
- 		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
- 		if (ret) {
-@@ -281,10 +296,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
- 	int epp = -EINVAL;
- 	int ret;
- 
--	if (!pref_index) {
--		pr_debug("EPP pref_index is invalid\n");
--		return -EINVAL;
--	}
-+	if (!pref_index)
-+		epp = cpudata->epp_default;
- 
- 	if (epp == -EINVAL)
- 		epp = epp_values[pref_index];
-@@ -452,16 +465,6 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
- 	return static_call(amd_pstate_init_perf)(cpudata);
- }
- 
--static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
--			       u32 des_perf, u32 max_perf, bool fast_switch)
--{
--	if (fast_switch)
--		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
--	else
--		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
--			      READ_ONCE(cpudata->cppc_req_cached));
--}
--
- static void cppc_update_perf(struct amd_cpudata *cpudata,
- 			     u32 min_perf, u32 des_perf,
- 			     u32 max_perf, bool fast_switch)
-@@ -475,16 +478,6 @@ static void cppc_update_perf(struct amd_cpudata *cpudata,
- 	cppc_set_perf(cpudata->cpu, &perf_ctrls);
- }
- 
--DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
--
--static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
--					  u32 min_perf, u32 des_perf,
--					  u32 max_perf, bool fast_switch)
--{
--	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
--					    max_perf, fast_switch);
--}
--
- static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
- {
- 	u64 aperf, mperf, tsc;
-@@ -521,7 +514,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
- static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- 			      u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
- {
-+	unsigned long max_freq;
-+	struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
- 	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
-+	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
- 	u64 value = prev;
- 
- 	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
-@@ -530,6 +526,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- 			cpudata->max_limit_perf);
- 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
- 
-+	max_freq = READ_ONCE(cpudata->max_limit_freq);
-+	policy->cur = div_u64(des_perf * max_freq, max_perf);
-+
- 	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
- 		min_perf = des_perf;
- 		des_perf = 0;
-@@ -541,6 +540,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- 	value &= ~AMD_CPPC_DES_PERF(~0L);
- 	value |= AMD_CPPC_DES_PERF(des_perf);
- 
-+	/* limit the max perf when core performance boost feature is disabled */
-+	if (!cpudata->boost_supported)
-+		max_perf = min_t(unsigned long, nominal_perf, max_perf);
-+
- 	value &= ~AMD_CPPC_MAX_PERF(~0L);
- 	value |= AMD_CPPC_MAX_PERF(max_perf);
- 
-@@ -651,10 +654,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
- 				   unsigned long capacity)
- {
- 	unsigned long max_perf, min_perf, des_perf,
--		      cap_perf, lowest_nonlinear_perf, max_freq;
-+		      cap_perf, lowest_nonlinear_perf;
- 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- 	struct amd_cpudata *cpudata = policy->driver_data;
--	unsigned int target_freq;
- 
- 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
- 		amd_pstate_update_min_max_limit(policy);
-@@ -662,7 +664,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
- 
- 	cap_perf = READ_ONCE(cpudata->highest_perf);
- 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
--	max_freq = READ_ONCE(cpudata->max_freq);
- 
- 	des_perf = cap_perf;
- 	if (target_perf < capacity)
-@@ -680,51 +681,111 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
- 		max_perf = min_perf;
- 
- 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
--	target_freq = div_u64(des_perf * max_freq, max_perf);
--	policy->cur = target_freq;
- 
- 	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
- 			policy->governor->flags);
- 	cpufreq_cpu_put(policy);
- }
- 
--static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
-+static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
- {
- 	struct amd_cpudata *cpudata = policy->driver_data;
-+	struct cppc_perf_ctrls perf_ctrls;
-+	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
- 	int ret;
- 
--	if (!cpudata->boost_supported) {
--		pr_err("Boost mode is not supported by this processor or SBIOS\n");
--		return -EINVAL;
-+	highest_perf = READ_ONCE(cpudata->highest_perf);
-+	nominal_perf = READ_ONCE(cpudata->nominal_perf);
-+	nominal_freq = READ_ONCE(cpudata->nominal_freq);
-+	max_freq = READ_ONCE(cpudata->max_freq);
-+
-+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+		u64 value = READ_ONCE(cpudata->cppc_req_cached);
-+
-+		value &= ~GENMASK_ULL(7, 0);
-+		value |= on ? highest_perf : nominal_perf;
-+		WRITE_ONCE(cpudata->cppc_req_cached, value);
-+
-+		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
-+	} else {
-+		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
-+		ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
-+		if (ret) {
-+			cpufreq_cpu_release(policy);
-+			pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
-+				cpudata->cpu, ret);
-+			return ret;
-+		}
- 	}
- 
--	if (state)
--		policy->cpuinfo.max_freq = cpudata->max_freq;
--	else
--		policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
-+	if (on)
-+		policy->cpuinfo.max_freq = max_freq;
-+	else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
-+		policy->cpuinfo.max_freq = nominal_freq * 1000;
- 
- 	policy->max = policy->cpuinfo.max_freq;
- 
--	ret = freq_qos_update_request(&cpudata->req[1],
--				      policy->cpuinfo.max_freq);
--	if (ret < 0)
--		return ret;
-+	if (cppc_state == AMD_PSTATE_PASSIVE) {
-+		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
-+		if (ret < 0)
-+			pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
-+	}
- 
--	return 0;
-+	return ret < 0 ? ret : 0;
- }
- 
--static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
-+static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
- {
--	u32 highest_perf, nominal_perf;
-+	struct amd_cpudata *cpudata = policy->driver_data;
-+	int ret;
- 
--	highest_perf = READ_ONCE(cpudata->highest_perf);
--	nominal_perf = READ_ONCE(cpudata->nominal_perf);
-+	if (!cpudata->boost_supported) {
-+		pr_err("Boost mode is not supported by this processor or SBIOS\n");
-+		return -EOPNOTSUPP;
-+	}
-+	mutex_lock(&amd_pstate_driver_lock);
-+	ret = amd_pstate_cpu_boost_update(policy, state);
-+	WRITE_ONCE(cpudata->boost_state, !ret ? state : false);
-+	policy->boost_enabled = !ret ? state : false;
-+	refresh_frequency_limits(policy);
-+	mutex_unlock(&amd_pstate_driver_lock);
- 
--	if (highest_perf <= nominal_perf)
--		return;
-+	return ret;
-+}
-+
-+static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
-+{
-+	u64 boost_val;
-+	int ret = -1;
-+
-+	/*
-+	 * If platform has no CPB support or disable it, initialize current driver
-+	 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
-+	 */
-+	if (!cpu_feature_enabled(X86_FEATURE_CPB)) {
-+		pr_debug_once("Boost CPB capabilities not present in the processor\n");
-+		ret = 0;
-+		goto exit_err;
-+	}
- 
--	cpudata->boost_supported = true;
-+	/* at least one CPU supports CPB, even if others fail later on to set up */
- 	current_pstate_driver->boost_enabled = true;
-+
-+	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
-+	if (ret) {
-+		pr_err_once("failed to read initial CPU boost state!\n");
-+		ret = -EIO;
-+		goto exit_err;
-+	}
-+
-+	if (!(boost_val & MSR_K7_HWCR_CPB_DIS))
-+		cpudata->boost_supported = true;
-+
-+	return 0;
-+
-+exit_err:
-+	cpudata->boost_supported = false;
-+	return ret;
- }
- 
- static void amd_perf_ctl_reset(unsigned int cpu)
-@@ -753,7 +814,7 @@ static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
- {
- 	int ret;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		u64 cap1;
- 
- 		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
-@@ -849,8 +910,12 @@ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
- 	u32 transition_delay_ns;
- 
- 	transition_delay_ns = cppc_get_transition_latency(cpu);
--	if (transition_delay_ns == CPUFREQ_ETERNAL)
--		return AMD_PSTATE_TRANSITION_DELAY;
-+	if (transition_delay_ns == CPUFREQ_ETERNAL) {
-+		if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
-+			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
-+		else
-+			return AMD_PSTATE_TRANSITION_DELAY;
-+	}
- 
- 	return transition_delay_ns / NSEC_PER_USEC;
- }
-@@ -921,12 +986,30 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
- 	WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- 	WRITE_ONCE(cpudata->max_freq, max_freq);
- 
-+	/**
-+	 * Below values need to be initialized correctly, otherwise driver will fail to load
-+	 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
-+	 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
-+	 * Check _CPC in ACPI table objects if any values are incorrect
-+	 */
-+	if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
-+		pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
-+			min_freq, max_freq, nominal_freq * 1000);
-+		return -EINVAL;
-+	}
-+
-+	if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) {
-+		pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
-+			lowest_nonlinear_freq, min_freq, nominal_freq * 1000);
-+		return -EINVAL;
-+	}
-+
- 	return 0;
- }
- 
- static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
- {
--	int min_freq, max_freq, nominal_freq, ret;
-+	int min_freq, max_freq, ret;
- 	struct device *dev;
- 	struct amd_cpudata *cpudata;
- 
-@@ -955,18 +1038,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
- 	if (ret)
- 		goto free_cpudata1;
- 
-+	ret = amd_pstate_init_boost_support(cpudata);
-+	if (ret)
-+		goto free_cpudata1;
-+
- 	min_freq = READ_ONCE(cpudata->min_freq);
- 	max_freq = READ_ONCE(cpudata->max_freq);
--	nominal_freq = READ_ONCE(cpudata->nominal_freq);
--
--	if (min_freq <= 0 || max_freq <= 0 ||
--	    nominal_freq <= 0 || min_freq > max_freq) {
--		dev_err(dev,
--			"min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n",
--			min_freq, max_freq, nominal_freq);
--		ret = -EINVAL;
--		goto free_cpudata1;
--	}
- 
- 	policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
- 	policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
-@@ -977,10 +1054,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
- 	policy->cpuinfo.min_freq = min_freq;
- 	policy->cpuinfo.max_freq = max_freq;
- 
-+	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
-+
- 	/* It will be updated by governor */
- 	policy->cur = policy->cpuinfo.min_freq;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC))
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC))
- 		policy->fast_switch_possible = true;
- 
- 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
-@@ -1002,7 +1081,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
- 
- 	policy->driver_data = cpudata;
- 
--	amd_pstate_boost_init(cpudata);
- 	if (!current_pstate_driver->adjust_perf)
- 		current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
- 
-@@ -1213,7 +1291,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode)
- 
- 	cppc_state = mode;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
- 		return 0;
- 
- 	for_each_present_cpu(cpu) {
-@@ -1386,7 +1464,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void)
- 
- static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- {
--	int min_freq, max_freq, nominal_freq, ret;
-+	int min_freq, max_freq, ret;
- 	struct amd_cpudata *cpudata;
- 	struct device *dev;
- 	u64 value;
-@@ -1417,17 +1495,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- 	if (ret)
- 		goto free_cpudata1;
- 
-+	ret = amd_pstate_init_boost_support(cpudata);
-+	if (ret)
-+		goto free_cpudata1;
-+
- 	min_freq = READ_ONCE(cpudata->min_freq);
- 	max_freq = READ_ONCE(cpudata->max_freq);
--	nominal_freq = READ_ONCE(cpudata->nominal_freq);
--	if (min_freq <= 0 || max_freq <= 0 ||
--	    nominal_freq <= 0 || min_freq > max_freq) {
--		dev_err(dev,
--			"min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n",
--			min_freq, max_freq, nominal_freq);
--		ret = -EINVAL;
--		goto free_cpudata1;
--	}
- 
- 	policy->cpuinfo.min_freq = min_freq;
- 	policy->cpuinfo.max_freq = max_freq;
-@@ -1436,11 +1509,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- 
- 	policy->driver_data = cpudata;
- 
--	cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
-+	cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0);
- 
- 	policy->min = policy->cpuinfo.min_freq;
- 	policy->max = policy->cpuinfo.max_freq;
- 
-+	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
-+
- 	/*
- 	 * Set the policy to provide a valid fallback value in case
- 	 * the default cpufreq governor is neither powersave nor performance.
-@@ -1451,7 +1526,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- 	else
- 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
- 		if (ret)
- 			return ret;
-@@ -1462,7 +1537,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
- 			return ret;
- 		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
- 	}
--	amd_pstate_boost_init(cpudata);
- 
- 	return 0;
- 
-@@ -1541,7 +1615,7 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
- 		epp = 0;
- 
- 	/* Set initial EPP value */
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		value &= ~GENMASK_ULL(31, 24);
- 		value |= (u64)epp << 24;
- 	}
-@@ -1564,6 +1638,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
- 
- 	amd_pstate_epp_update_limit(policy);
- 
-+	/*
-+	 * policy->cur is never updated with the amd_pstate_epp driver, but it
-+	 * is used as a stale frequency value. So, keep it within limits.
-+	 */
-+	policy->cur = policy->min;
-+
- 	return 0;
- }
- 
-@@ -1580,7 +1660,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
- 	value = READ_ONCE(cpudata->cppc_req_cached);
- 	max_perf = READ_ONCE(cpudata->highest_perf);
- 
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
- 	} else {
- 		perf_ctrls.max_perf = max_perf;
-@@ -1614,7 +1694,7 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
- 	value = READ_ONCE(cpudata->cppc_req_cached);
- 
- 	mutex_lock(&amd_pstate_limits_lock);
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
- 
- 		/* Set max perf same as min perf */
-@@ -1718,6 +1798,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
- 	.suspend	= amd_pstate_epp_suspend,
- 	.resume		= amd_pstate_epp_resume,
- 	.update_limits	= amd_pstate_update_limits,
-+	.set_boost	= amd_pstate_set_boost,
- 	.name		= "amd-pstate-epp",
- 	.attr		= amd_pstate_epp_attr,
- };
-@@ -1741,6 +1822,46 @@ static int __init amd_pstate_set_driver(int mode_idx)
- 	return -EINVAL;
- }
- 
-+/**
-+ * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
-+ * show the debug message that helps to check if the CPU has CPPC support for loading issue.
-+ */
-+static bool amd_cppc_supported(void)
-+{
-+	struct cpuinfo_x86 *c = &cpu_data(0);
-+	bool warn = false;
-+
-+	if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) {
-+		pr_debug_once("CPPC feature is not supported by the processor\n");
-+		return false;
-+	}
-+
-+	/*
-+	 * If the CPPC feature is disabled in the BIOS for processors that support MSR-based CPPC,
-+	 * the AMD Pstate driver may not function correctly.
-+	 * Check the CPPC flag and display a warning message if the platform supports CPPC.
-+	 * Note: below checking code will not abort the driver registeration process because of
-+	 * the code is added for debugging purposes.
-+	 */
-+	if (!cpu_feature_enabled(X86_FEATURE_CPPC)) {
-+		if (cpu_feature_enabled(X86_FEATURE_ZEN1) || cpu_feature_enabled(X86_FEATURE_ZEN2)) {
-+			if (c->x86_model > 0x60 && c->x86_model < 0xaf)
-+				warn = true;
-+		} else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || cpu_feature_enabled(X86_FEATURE_ZEN4)) {
-+			if ((c->x86_model > 0x10 && c->x86_model < 0x1F) ||
-+					(c->x86_model > 0x40 && c->x86_model < 0xaf))
-+				warn = true;
-+		} else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) {
-+			warn = true;
-+		}
-+	}
-+
-+	if (warn)
-+		pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
-+					"Please enable it if your BIOS has the CPPC option.\n");
-+	return true;
-+}
-+
- static int __init amd_pstate_init(void)
- {
- 	struct device *dev_root;
-@@ -1749,6 +1870,11 @@ static int __init amd_pstate_init(void)
- 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- 		return -ENODEV;
- 
-+	/* show debug message only if CPPC is not supported */
-+	if (!amd_cppc_supported())
-+		return -EOPNOTSUPP;
-+
-+	/* show warning message when BIOS broken or ACPI disabled */
- 	if (!acpi_cpc_valid()) {
- 		pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
- 		return -ENODEV;
-@@ -1763,35 +1889,43 @@ static int __init amd_pstate_init(void)
- 	/* check if this machine need CPPC quirks */
- 	dmi_check_system(amd_pstate_quirks_table);
- 
--	switch (cppc_state) {
--	case AMD_PSTATE_UNDEFINED:
-+	/*
-+	* determine the driver mode from the command line or kernel config.
-+	* If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
-+	* command line options will override the kernel config settings.
-+	*/
-+
-+	if (cppc_state == AMD_PSTATE_UNDEFINED) {
- 		/* Disable on the following configs by default:
- 		 * 1. Undefined platforms
- 		 * 2. Server platforms
--		 * 3. Shared memory designs
- 		 */
- 		if (amd_pstate_acpi_pm_profile_undefined() ||
--		    amd_pstate_acpi_pm_profile_server() ||
--		    !boot_cpu_has(X86_FEATURE_CPPC)) {
-+		    amd_pstate_acpi_pm_profile_server()) {
- 			pr_info("driver load is disabled, boot with specific mode to enable this\n");
- 			return -ENODEV;
- 		}
--		ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
--		if (ret)
--			return ret;
--		break;
-+		/* get driver mode from kernel config option [1:4] */
-+		cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
-+	}
-+
-+	switch (cppc_state) {
- 	case AMD_PSTATE_DISABLE:
-+		pr_info("driver load is disabled, boot with specific mode to enable this\n");
- 		return -ENODEV;
- 	case AMD_PSTATE_PASSIVE:
- 	case AMD_PSTATE_ACTIVE:
- 	case AMD_PSTATE_GUIDED:
-+		ret = amd_pstate_set_driver(cppc_state);
-+		if (ret)
-+			return ret;
- 		break;
- 	default:
- 		return -EINVAL;
- 	}
- 
- 	/* capability check */
--	if (boot_cpu_has(X86_FEATURE_CPPC)) {
-+	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- 		pr_debug("AMD CPPC MSR based functionality is supported\n");
- 		if (cppc_state != AMD_PSTATE_ACTIVE)
- 			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
-@@ -1805,13 +1939,15 @@ static int __init amd_pstate_init(void)
- 	/* enable amd pstate feature */
- 	ret = amd_pstate_enable(true);
- 	if (ret) {
--		pr_err("failed to enable with return %d\n", ret);
-+		pr_err("failed to enable driver mode(%d)\n", cppc_state);
- 		return ret;
- 	}
- 
- 	ret = cpufreq_register_driver(current_pstate_driver);
--	if (ret)
-+	if (ret) {
- 		pr_err("failed to register with return %d\n", ret);
-+		goto disable_driver;
-+	}
- 
- 	dev_root = bus_get_dev_root(&cpu_subsys);
- 	if (dev_root) {
-@@ -1827,6 +1963,8 @@ static int __init amd_pstate_init(void)
- 
- global_attr_free:
- 	cpufreq_unregister_driver(current_pstate_driver);
-+disable_driver:
-+	amd_pstate_enable(false);
- 	return ret;
- }
- device_initcall(amd_pstate_init);
-diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
-index e6a28e7f4dbf..cc8bb2bc325a 100644
---- a/drivers/cpufreq/amd-pstate.h
-+++ b/drivers/cpufreq/amd-pstate.h
-@@ -99,6 +99,8 @@ struct amd_cpudata {
- 	u32	policy;
- 	u64	cppc_cap1_cached;
- 	bool	suspended;
-+	s16	epp_default;
-+	bool	boost_state;
- };
- 
- #endif /* _LINUX_AMD_PSTATE_H */
-diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
-index 9e5060b27864..270ea04fb616 100644
---- a/drivers/cpufreq/cpufreq.c
-+++ b/drivers/cpufreq/cpufreq.c
-@@ -614,10 +614,9 @@ static ssize_t show_boost(struct kobject *kobj,
- static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr,
- 			   const char *buf, size_t count)
- {
--	int ret, enable;
-+	bool enable;
- 
--	ret = sscanf(buf, "%d", &enable);
--	if (ret != 1 || enable < 0 || enable > 1)
-+	if (kstrtobool(buf, &enable))
- 		return -EINVAL;
- 
- 	if (cpufreq_boost_trigger_state(enable)) {
-@@ -641,10 +640,10 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
- static ssize_t store_local_boost(struct cpufreq_policy *policy,
- 				 const char *buf, size_t count)
- {
--	int ret, enable;
-+	int ret;
-+	bool enable;
- 
--	ret = kstrtoint(buf, 10, &enable);
--	if (ret || enable < 0 || enable > 1)
-+	if (kstrtobool(buf, &enable))
- 		return -EINVAL;
- 
- 	if (!cpufreq_driver->boost_enabled)
--- 
-2.46.0.rc1
-
-From fdecce0ee8a06092cd381604a8f4f26ef0c9561a Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:23:19 +0200
-Subject: [PATCH 02/11] bbr3
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- include/linux/tcp.h                |    4 +-
- include/net/inet_connection_sock.h |    4 +-
- include/net/tcp.h                  |   72 +-
- include/uapi/linux/inet_diag.h     |   23 +
- include/uapi/linux/rtnetlink.h     |    4 +-
- include/uapi/linux/tcp.h           |    1 +
- net/ipv4/Kconfig                   |   21 +-
- net/ipv4/bpf_tcp_ca.c              |    9 +-
- net/ipv4/tcp.c                     |    3 +
- net/ipv4/tcp_bbr.c                 | 2230 +++++++++++++++++++++-------
- net/ipv4/tcp_cong.c                |    1 +
- net/ipv4/tcp_input.c               |   40 +-
- net/ipv4/tcp_minisocks.c           |    2 +
- net/ipv4/tcp_output.c              |   48 +-
- net/ipv4/tcp_rate.c                |   30 +-
- net/ipv4/tcp_timer.c               |    1 +
- 16 files changed, 1940 insertions(+), 553 deletions(-)
-
-diff --git a/include/linux/tcp.h b/include/linux/tcp.h
-index 6a5e08b937b3..27aab715490e 100644
---- a/include/linux/tcp.h
-+++ b/include/linux/tcp.h
-@@ -369,7 +369,9 @@ struct tcp_sock {
- 	u8	compressed_ack;
- 	u8	dup_ack_counter:2,
- 		tlp_retrans:1,	/* TLP is a retransmission */
--		unused:5;
-+		fast_ack_mode:2, /* which fast ack mode ? */
-+		tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */
-+		unused:2;
- 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */
- 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
- 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
-diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
-index c0deaafebfdc..d53f042d936e 100644
---- a/include/net/inet_connection_sock.h
-+++ b/include/net/inet_connection_sock.h
-@@ -137,8 +137,8 @@ struct inet_connection_sock {
- 	u32			  icsk_probes_tstamp;
- 	u32			  icsk_user_timeout;
- 
--	u64			  icsk_ca_priv[104 / sizeof(u64)];
--#define ICSK_CA_PRIV_SIZE	  sizeof_field(struct inet_connection_sock, icsk_ca_priv)
-+#define ICSK_CA_PRIV_SIZE      (144)
-+	u64			  icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)];
- };
- 
- #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
-diff --git a/include/net/tcp.h b/include/net/tcp.h
-index 060e95b331a2..953244eefe7d 100644
---- a/include/net/tcp.h
-+++ b/include/net/tcp.h
-@@ -375,6 +375,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
- #define	TCP_ECN_QUEUE_CWR	2
- #define	TCP_ECN_DEMAND_CWR	4
- #define	TCP_ECN_SEEN		8
-+#define	TCP_ECN_LOW		16
-+#define	TCP_ECN_ECT_PERMANENT	32
- 
- enum tcp_tw_status {
- 	TCP_TW_SUCCESS = 0,
-@@ -778,6 +780,15 @@ static inline void tcp_fast_path_check(struct sock *sk)
- 
- u32 tcp_delack_max(const struct sock *sk);
- 
-+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
-+					    const struct dst_entry *dst)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+
-+	if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
-+		tp->ecn_flags |= TCP_ECN_LOW;
-+}
-+
- /* Compute the actual rto_min value */
- static inline u32 tcp_rto_min(const struct sock *sk)
- {
-@@ -883,6 +894,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
- 	return max_t(s64, t1 - t0, 0);
- }
- 
-+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
-+{
-+	return max_t(s32, t1 - t0, 0);
-+}
-+
- /* provide the departure time in us unit */
- static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
- {
-@@ -972,9 +988,14 @@ struct tcp_skb_cb {
- 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
- 			__u32 delivered;
- 			/* start of send pipeline phase */
--			u64 first_tx_mstamp;
-+			u32 first_tx_mstamp;
- 			/* when we reached the "delivered" count */
--			u64 delivered_mstamp;
-+			u32 delivered_mstamp;
-+#define TCPCB_IN_FLIGHT_BITS 20
-+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
-+			u32 in_flight:20,   /* packets in flight at transmit */
-+			    unused2:12;
-+			u32 lost;	/* packets lost so far upon tx of skb */
- 		} tx;   /* only used for outgoing skbs */
- 		union {
- 			struct inet_skb_parm	h4;
-@@ -1078,6 +1099,7 @@ enum tcp_ca_event {
- 	CA_EVENT_LOSS,		/* loss timeout */
- 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
- 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
-+	CA_EVENT_TLP_RECOVERY,	/* a lost segment was repaired by TLP probe */
- };
- 
- /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
-@@ -1100,7 +1122,11 @@ enum tcp_ca_ack_event_flags {
- #define TCP_CONG_NON_RESTRICTED 0x1
- /* Requires ECN/ECT set on all packets */
- #define TCP_CONG_NEEDS_ECN	0x2
--#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
-+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
-+#define TCP_CONG_WANTS_CE_EVENTS	0x4
-+#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | \
-+			 TCP_CONG_NEEDS_ECN | \
-+			 TCP_CONG_WANTS_CE_EVENTS)
- 
- union tcp_cc_info;
- 
-@@ -1120,10 +1146,13 @@ struct ack_sample {
-  */
- struct rate_sample {
- 	u64  prior_mstamp; /* starting timestamp for interval */
-+	u32  prior_lost;	/* tp->lost at "prior_mstamp" */
- 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
- 	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
-+	u32 tx_in_flight;	/* packets in flight at starting timestamp */
-+	s32  lost;		/* number of packets lost over interval */
- 	s32  delivered;		/* number of packets delivered over interval */
--	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
-+	s32  delivered_ce;	/* packets delivered w/ CE mark over interval */
- 	long interval_us;	/* time for tp->delivered to incr "delivered" */
- 	u32 snd_interval_us;	/* snd interval for delivered packets */
- 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
-@@ -1134,7 +1163,9 @@ struct rate_sample {
- 	u32  last_end_seq;	/* end_seq of most recently ACKed packet */
- 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
- 	bool is_retrans;	/* is sample from retransmission? */
-+	bool is_acking_tlp_retrans_seq;  /* ACKed a TLP retransmit sequence? */
- 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
-+	bool is_ece;		/* did this ACK have ECN marked? */
- };
- 
- struct tcp_congestion_ops {
-@@ -1158,8 +1189,11 @@ struct tcp_congestion_ops {
- 	/* hook for packet ack accounting (optional) */
- 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- 
--	/* override sysctl_tcp_min_tso_segs */
--	u32 (*min_tso_segs)(struct sock *sk);
-+	/* pick target number of segments per TSO/GSO skb (optional): */
-+	u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
-+
-+	/* react to a specific lost skb (optional) */
-+	void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
- 
- 	/* call when packets are delivered to update cwnd and pacing rate,
- 	 * after all the ca_state processing. (optional)
-@@ -1225,6 +1259,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
- }
- #endif
- 
-+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
-+{
-+	const struct inet_connection_sock *icsk = inet_csk(sk);
-+
-+	return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
-+					   TCP_CONG_WANTS_CE_EVENTS);
-+}
-+
- static inline bool tcp_ca_needs_ecn(const struct sock *sk)
- {
- 	const struct inet_connection_sock *icsk = inet_csk(sk);
-@@ -1244,6 +1286,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
- void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
- 
- /* From tcp_rate.c */
-+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
- void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
- void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
- 			    struct rate_sample *rs);
-@@ -1256,6 +1299,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
- 	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
- }
- 
-+/* If a retransmit failed due to local qdisc congestion or other local issues,
-+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
-+ * segments in the skb without increasing the tx.in_flight. In all other cases,
-+ * the tx.in_flight should be at least as big as the pcount of the sk_buff.  We
-+ * do not have the state to know whether a retransmit failed due to local qdisc
-+ * congestion or other local issues, so to avoid spurious warnings we consider
-+ * that any skb marked lost may have suffered that fate.
-+ */
-+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
-+						      u32 skb_sacked_flags,
-+						      u32 tx_in_flight)
-+{
-+	return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
-+}
-+
- /* These functions determine how the current flow behaves in respect of SACK
-  * handling. SACK is negotiated with the peer, and therefore it can vary
-  * between different flows.
-@@ -2418,7 +2476,7 @@ struct tcp_plb_state {
- 	u8	consec_cong_rounds:5, /* consecutive congested rounds */
- 		unused:3;
- 	u32	pause_until; /* jiffies32 when PLB can resume rerouting */
--};
-+} __attribute__ ((__packed__));
- 
- static inline void tcp_plb_init(const struct sock *sk,
- 				struct tcp_plb_state *plb)
-diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
-index 50655de04c9b..82f8bd8f0d16 100644
---- a/include/uapi/linux/inet_diag.h
-+++ b/include/uapi/linux/inet_diag.h
-@@ -229,6 +229,29 @@ struct tcp_bbr_info {
- 	__u32	bbr_min_rtt;		/* min-filtered RTT in uSec */
- 	__u32	bbr_pacing_gain;	/* pacing gain shifted left 8 bits */
- 	__u32	bbr_cwnd_gain;		/* cwnd gain shifted left 8 bits */
-+	__u32	bbr_bw_hi_lsb;		/* lower 32 bits of bw_hi */
-+	__u32	bbr_bw_hi_msb;		/* upper 32 bits of bw_hi */
-+	__u32	bbr_bw_lo_lsb;		/* lower 32 bits of bw_lo */
-+	__u32	bbr_bw_lo_msb;		/* upper 32 bits of bw_lo */
-+	__u8	bbr_mode;		/* current bbr_mode in state machine */
-+	__u8	bbr_phase;		/* current state machine phase */
-+	__u8	unused1;		/* alignment padding; not used yet */
-+	__u8	bbr_version;		/* BBR algorithm version */
-+	__u32	bbr_inflight_lo;	/* lower short-term data volume bound */
-+	__u32	bbr_inflight_hi;	/* higher long-term data volume bound */
-+	__u32	bbr_extra_acked;	/* max excess packets ACKed in epoch */
-+};
-+
-+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */
-+enum tcp_bbr_phase {
-+	BBR_PHASE_INVALID		= 0,
-+	BBR_PHASE_STARTUP		= 1,
-+	BBR_PHASE_DRAIN			= 2,
-+	BBR_PHASE_PROBE_RTT		= 3,
-+	BBR_PHASE_PROBE_BW_UP		= 4,
-+	BBR_PHASE_PROBE_BW_DOWN		= 5,
-+	BBR_PHASE_PROBE_BW_CRUISE	= 6,
-+	BBR_PHASE_PROBE_BW_REFILL	= 7,
- };
- 
- union tcp_cc_info {
-diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
-index 3b687d20c9ed..a7c30c243b54 100644
---- a/include/uapi/linux/rtnetlink.h
-+++ b/include/uapi/linux/rtnetlink.h
-@@ -507,12 +507,14 @@ enum {
- #define RTAX_FEATURE_TIMESTAMP		(1 << 2) /* unused */
- #define RTAX_FEATURE_ALLFRAG		(1 << 3) /* unused */
- #define RTAX_FEATURE_TCP_USEC_TS	(1 << 4)
-+#define RTAX_FEATURE_ECN_LOW		(1 << 5)
- 
- #define RTAX_FEATURE_MASK	(RTAX_FEATURE_ECN |		\
- 				 RTAX_FEATURE_SACK |		\
- 				 RTAX_FEATURE_TIMESTAMP |	\
- 				 RTAX_FEATURE_ALLFRAG |		\
--				 RTAX_FEATURE_TCP_USEC_TS)
-+				 RTAX_FEATURE_TCP_USEC_TS |	\
-+				 RTAX_FEATURE_ECN_LOW)
- 
- struct rta_session {
- 	__u8	proto;
-diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
-index dbf896f3146c..4702cd2f1ffc 100644
---- a/include/uapi/linux/tcp.h
-+++ b/include/uapi/linux/tcp.h
-@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
- #define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
- #define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
- #define TCPI_OPT_USEC_TS	64 /* usec timestamps */
-+#define TCPI_OPT_ECN_LOW	128 /* Low-latency ECN configured at init */
- 
- /*
-  * Sender's congestion state indicating normal or abnormal situations
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 8e94ed7c56a0..50dc9970cad2 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -668,15 +668,18 @@ config TCP_CONG_BBR
- 	default n
- 	help
- 
--	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
--	  maximize network utilization and minimize queues. It builds an explicit
--	  model of the bottleneck delivery rate and path round-trip propagation
--	  delay. It tolerates packet loss and delay unrelated to congestion. It
--	  can operate over LAN, WAN, cellular, wifi, or cable modem links. It can
--	  coexist with flows that use loss-based congestion control, and can
--	  operate with shallow buffers, deep buffers, bufferbloat, policers, or
--	  AQM schemes that do not provide a delay signal. It requires the fq
--	  ("Fair Queue") pacing packet scheduler.
-+	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a
-+	  model-based congestion control algorithm that aims to maximize
-+	  network utilization, keep queues and retransmit rates low, and to be
-+	  able to coexist with Reno/CUBIC in common scenarios. It builds an
-+	  explicit model of the network path.  It tolerates a targeted degree
-+	  of random packet loss and delay. It can operate over LAN, WAN,
-+	  cellular, wifi, or cable modem links, and can use shallow-threshold
-+	  ECN signals. It can coexist to some degree with flows that use
-+	  loss-based congestion control, and can operate with shallow buffers,
-+	  deep buffers, bufferbloat, policers, or AQM schemes that do not
-+	  provide a delay signal. It requires pacing, using either TCP internal
-+	  pacing or the fq ("Fair Queue") pacing packet scheduler.
- 
- choice
- 	prompt "Default TCP congestion control"
-diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
-index 18227757ec0c..f180befc28bd 100644
---- a/net/ipv4/bpf_tcp_ca.c
-+++ b/net/ipv4/bpf_tcp_ca.c
-@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp
- {
- }
- 
--static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
-+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
- {
- 	return 0;
- }
- 
-+static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
-+{
-+}
-+
- static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
- 				    const struct rate_sample *rs)
- {
-@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
- 	.cwnd_event = bpf_tcp_ca_cwnd_event,
- 	.in_ack_event = bpf_tcp_ca_in_ack_event,
- 	.pkts_acked = bpf_tcp_ca_pkts_acked,
--	.min_tso_segs = bpf_tcp_ca_min_tso_segs,
-+	.tso_segs = bpf_tcp_ca_tso_segs,
-+	.skb_marked_lost = bpf_tcp_ca_skb_marked_lost,
- 	.cong_control = bpf_tcp_ca_cong_control,
- 	.undo_cwnd = bpf_tcp_ca_undo_cwnd,
- 	.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index e6790ea74877..b63e27eba536 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -3120,6 +3120,7 @@ int tcp_disconnect(struct sock *sk, int flags)
- 	tp->rx_opt.dsack = 0;
- 	tp->rx_opt.num_sacks = 0;
- 	tp->rcv_ooopack = 0;
-+	tp->fast_ack_mode = 0;
- 
- 
- 	/* Clean up fastopen related fields */
-@@ -3846,6 +3847,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
- 		info->tcpi_options |= TCPI_OPT_ECN;
- 	if (tp->ecn_flags & TCP_ECN_SEEN)
- 		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
-+	if (tp->ecn_flags & TCP_ECN_LOW)
-+		info->tcpi_options |= TCPI_OPT_ECN_LOW;
- 	if (tp->syn_data_acked)
- 		info->tcpi_options |= TCPI_OPT_SYN_DATA;
- 	if (tp->tcp_usec_ts)
-diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
-index 760941e55153..a180fa648d5e 100644
---- a/net/ipv4/tcp_bbr.c
-+++ b/net/ipv4/tcp_bbr.c
-@@ -1,18 +1,19 @@
--/* Bottleneck Bandwidth and RTT (BBR) congestion control
-+/* BBR (Bottleneck Bandwidth and RTT) congestion control
-  *
-- * BBR congestion control computes the sending rate based on the delivery
-- * rate (throughput) estimated from ACKs. In a nutshell:
-+ * BBR is a model-based congestion control algorithm that aims for low queues,
-+ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the
-+ * network path, it uses measurements of bandwidth and RTT, as well as (if they
-+ * occur) packet loss and/or shallow-threshold ECN signals. Note that although
-+ * it can use ECN or loss signals explicitly, it does not require either; it
-+ * can bound its in-flight data based on its estimate of the BDP.
-  *
-- *   On each ACK, update our model of the network path:
-- *      bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
-- *      min_rtt = windowed_min(rtt, 10 seconds)
-- *   pacing_rate = pacing_gain * bottleneck_bandwidth
-- *   cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
-- *
-- * The core algorithm does not react directly to packet losses or delays,
-- * although BBR may adjust the size of next send per ACK when loss is
-- * observed, or adjust the sending rate if it estimates there is a
-- * traffic policer, in order to keep the drop rate reasonable.
-+ * The model has both higher and lower bounds for the operating range:
-+ *   lo: bw_lo, inflight_lo: conservative short-term lower bound
-+ *   hi: bw_hi, inflight_hi: robust long-term upper bound
-+ * The bandwidth-probing time scale is (a) extended dynamically based on
-+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by
-+ * an interactive wall-clock time-scale to be more scalable and responsive
-+ * than Reno and CUBIC.
-  *
-  * Here is a state transition diagram for BBR:
-  *
-@@ -65,6 +66,13 @@
- #include <linux/random.h>
- #include <linux/win_minmax.h>
- 
-+#include <trace/events/tcp.h>
-+#include "tcp_dctcp.h"
-+
-+#define BBR_VERSION		3
-+
-+#define bbr_param(sk,name)	(bbr_ ## name)
-+
- /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
-  * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
-  * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
-@@ -85,36 +93,41 @@ enum bbr_mode {
- 	BBR_PROBE_RTT,	/* cut inflight to min to probe min_rtt */
- };
- 
-+/* How does the incoming ACK stream relate to our bandwidth probing? */
-+enum bbr_ack_phase {
-+	BBR_ACKS_INIT,		  /* not probing; not getting probe feedback */
-+	BBR_ACKS_REFILLING,	  /* sending at est. bw to fill pipe */
-+	BBR_ACKS_PROBE_STARTING,  /* inflight rising to probe bw */
-+	BBR_ACKS_PROBE_FEEDBACK,  /* getting feedback from bw probing */
-+	BBR_ACKS_PROBE_STOPPING,  /* stopped probing; still getting feedback */
-+};
-+
- /* BBR congestion control block */
- struct bbr {
- 	u32	min_rtt_us;	        /* min RTT in min_rtt_win_sec window */
- 	u32	min_rtt_stamp;	        /* timestamp of min_rtt_us */
- 	u32	probe_rtt_done_stamp;   /* end time for BBR_PROBE_RTT mode */
--	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
--	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
-+	u32	probe_rtt_min_us;	/* min RTT in probe_rtt_win_ms win */
-+	u32	probe_rtt_min_stamp;	/* timestamp of probe_rtt_min_us*/
- 	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
- 	u64	cycle_mstamp;	     /* time of this cycle phase start */
--	u32     mode:3,		     /* current bbr_mode in state machine */
-+	u32     mode:2,		     /* current bbr_mode in state machine */
- 		prev_ca_state:3,     /* CA state on previous ACK */
--		packet_conservation:1,  /* use packet conservation? */
- 		round_start:1,	     /* start of packet-timed tx->ack round? */
-+		ce_state:1,          /* If most recent data has CE bit set */
-+		bw_probe_up_rounds:5,   /* cwnd-limited rounds in PROBE_UP */
-+		try_fast_path:1,	/* can we take fast path? */
- 		idle_restart:1,	     /* restarting after idle? */
- 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
--		unused:13,
--		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
--		lt_rtt_cnt:7,	     /* round trips in long-term interval */
--		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
--	u32	lt_bw;		     /* LT est delivery rate in pkts/uS << 24 */
--	u32	lt_last_delivered;   /* LT intvl start: tp->delivered */
--	u32	lt_last_stamp;	     /* LT intvl start: tp->delivered_mstamp */
--	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
-+		init_cwnd:7,         /* initial cwnd */
-+		unused_1:10;
- 	u32	pacing_gain:10,	/* current gain for setting pacing rate */
- 		cwnd_gain:10,	/* current gain for setting cwnd */
- 		full_bw_reached:1,   /* reached full bw in Startup? */
- 		full_bw_cnt:2,	/* number of rounds without large bw gains */
--		cycle_idx:3,	/* current index in pacing_gain cycle array */
-+		cycle_idx:2,	/* current index in pacing_gain cycle array */
- 		has_seen_rtt:1, /* have we seen an RTT sample yet? */
--		unused_b:5;
-+		unused_2:6;
- 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
- 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
- 
-@@ -124,19 +137,67 @@ struct bbr {
- 	u32	ack_epoch_acked:20,	/* packets (S)ACKed in sampling epoch */
- 		extra_acked_win_rtts:5,	/* age of extra_acked, in round trips */
- 		extra_acked_win_idx:1,	/* current index in extra_acked array */
--		unused_c:6;
-+	/* BBR v3 state: */
-+		full_bw_now:1,		/* recently reached full bw plateau? */
-+		startup_ecn_rounds:2,	/* consecutive hi ECN STARTUP rounds */
-+		loss_in_cycle:1,	/* packet loss in this cycle? */
-+		ecn_in_cycle:1,		/* ECN in this cycle? */
-+		unused_3:1;
-+	u32	loss_round_delivered; /* scb->tx.delivered ending loss round */
-+	u32	undo_bw_lo;	     /* bw_lo before latest losses */
-+	u32	undo_inflight_lo;    /* inflight_lo before latest losses */
-+	u32	undo_inflight_hi;    /* inflight_hi before latest losses */
-+	u32	bw_latest;	 /* max delivered bw in last round trip */
-+	u32	bw_lo;		 /* lower bound on sending bandwidth */
-+	u32	bw_hi[2];	 /* max recent measured bw sample */
-+	u32	inflight_latest; /* max delivered data in last round trip */
-+	u32	inflight_lo;	 /* lower bound of inflight data range */
-+	u32	inflight_hi;	 /* upper bound of inflight data range */
-+	u32	bw_probe_up_cnt; /* packets delivered per inflight_hi incr */
-+	u32	bw_probe_up_acks;  /* packets (S)ACKed since inflight_hi incr */
-+	u32	probe_wait_us;	 /* PROBE_DOWN until next clock-driven probe */
-+	u32	prior_rcv_nxt;	/* tp->rcv_nxt when CE state last changed */
-+	u32	ecn_eligible:1,	/* sender can use ECN (RTT, handshake)? */
-+		ecn_alpha:9,	/* EWMA delivered_ce/delivered; 0..256 */
-+		bw_probe_samples:1,    /* rate samples reflect bw probing? */
-+		prev_probe_too_high:1, /* did last PROBE_UP go too high? */
-+		stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */
-+		rounds_since_probe:8,  /* packet-timed rounds since probed bw */
-+		loss_round_start:1,    /* loss_round_delivered round trip? */
-+		loss_in_round:1,       /* loss marked in this round trip? */
-+		ecn_in_round:1,	       /* ECN marked in this round trip? */
-+		ack_phase:3,	       /* bbr_ack_phase: meaning of ACKs */
-+		loss_events_in_round:4,/* losses in STARTUP round */
-+		initialized:1;	       /* has bbr_init() been called? */
-+	u32	alpha_last_delivered;	 /* tp->delivered    at alpha update */
-+	u32	alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
-+
-+	u8	unused_4;		/* to preserve alignment */
-+	struct tcp_plb_state plb;
- };
- 
--#define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
-+struct bbr_context {
-+	u32 sample_bw;
-+};
- 
--/* Window length of bw filter (in rounds): */
--static const int bbr_bw_rtts = CYCLE_LEN + 2;
- /* Window length of min_rtt filter (in sec): */
- static const u32 bbr_min_rtt_win_sec = 10;
- /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
- static const u32 bbr_probe_rtt_mode_ms = 200;
--/* Skip TSO below the following bandwidth (bits/sec): */
--static const int bbr_min_tso_rate = 1200000;
-+/* Window length of probe_rtt_min_us filter (in ms), and consequently the
-+ * typical interval between PROBE_RTT mode entries. The default is 5000ms.
-+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC
-+ */
-+static const u32 bbr_probe_rtt_win_ms = 5000;
-+/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */
-+static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2;
-+
-+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
-+ * in bigger TSO bursts. We cut the RTT-based allowance in half
-+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
-+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
-+ */
-+static const u32 bbr_tso_rtt_shift = 9;
- 
- /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
-  * In order to help drive the network toward lower queues and low latency while
-@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000;
-  */
- static const int bbr_pacing_margin_percent = 1;
- 
--/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
-+/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value
-  * that will allow a smoothly increasing pacing rate that will double each RTT
-  * and send the same number of packets per RTT that an un-paced, slow-starting
-  * Reno or CUBIC flow would:
-  */
--static const int bbr_high_gain  = BBR_UNIT * 2885 / 1000 + 1;
--/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
-+static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1;
-+/* The gain for deriving startup cwnd: */
-+static const int bbr_startup_cwnd_gain = BBR_UNIT * 2;
-+/* The pacing gain in BBR_DRAIN is calculated to typically drain
-  * the queue created in BBR_STARTUP in a single round:
-  */
- static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
-@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
- static const int bbr_cwnd_gain  = BBR_UNIT * 2;
- /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
- static const int bbr_pacing_gain[] = {
--	BBR_UNIT * 5 / 4,	/* probe for more available bw */
--	BBR_UNIT * 3 / 4,	/* drain queue and/or yield bw to other flows */
--	BBR_UNIT, BBR_UNIT, BBR_UNIT,	/* cruise at 1.0*bw to utilize pipe, */
--	BBR_UNIT, BBR_UNIT, BBR_UNIT	/* without creating excess queue... */
-+	BBR_UNIT * 5 / 4,	/* UP: probe for more available bw */
-+	BBR_UNIT * 91 / 100,	/* DOWN: drain queue and/or yield bw */
-+	BBR_UNIT,		/* CRUISE: try to use pipe w/ some headroom */
-+	BBR_UNIT,		/* REFILL: refill pipe to estimated 100% */
-+};
-+enum bbr_pacing_gain_phase {
-+	BBR_BW_PROBE_UP		= 0,  /* push up inflight to probe for bw/vol */
-+	BBR_BW_PROBE_DOWN	= 1,  /* drain excess inflight from the queue */
-+	BBR_BW_PROBE_CRUISE	= 2,  /* use pipe, w/ headroom in queue/pipe */
-+	BBR_BW_PROBE_REFILL	= 3,  /* v2: refill the pipe again to 100% */
- };
--/* Randomize the starting gain cycling phase over N phases: */
--static const u32 bbr_cycle_rand = 7;
- 
- /* Try to keep at least this many packets in flight, if things go smoothly. For
-  * smooth functioning, a sliding window protocol ACKing every other packet
-@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7;
-  */
- static const u32 bbr_cwnd_min_target = 4;
- 
--/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
-+/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */
- /* If bw has increased significantly (1.25x), there may be more bw available: */
- static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
- /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
- static const u32 bbr_full_bw_cnt = 3;
- 
--/* "long-term" ("LT") bandwidth estimator parameters... */
--/* The minimum number of rounds in an LT bw sampling interval: */
--static const u32 bbr_lt_intvl_min_rtts = 4;
--/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
--static const u32 bbr_lt_loss_thresh = 50;
--/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
--static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
--/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
--static const u32 bbr_lt_bw_diff = 4000 / 8;
--/* If we estimate we're policed, use lt_bw for this many round trips: */
--static const u32 bbr_lt_bw_max_rtts = 48;
--
- /* Gain factor for adding extra_acked to target cwnd: */
- static const int bbr_extra_acked_gain = BBR_UNIT;
- /* Window length of extra_acked window. */
-@@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
- /* Time period for clamping cwnd increment due to ack aggregation */
- static const u32 bbr_extra_acked_max_us = 100 * 1000;
- 
-+/* Flags to control BBR ECN-related behavior... */
-+
-+/* Ensure ACKs only ACK packets with consistent ECN CE status? */
-+static const bool bbr_precise_ece_ack = true;
-+
-+/* Max RTT (in usec) at which to use sender-side ECN logic.
-+ * Disabled when 0 (ECN allowed at any RTT).
-+ */
-+static const u32 bbr_ecn_max_rtt_us = 5000;
-+
-+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE.
-+ * No loss response when 0.
-+ */
-+static const u32 bbr_beta = BBR_UNIT * 30 / 100;
-+
-+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */
-+static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16;
-+
-+/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly
-+ * to congestion if the bottleneck is congested when the flow starts up.
-+ */
-+static const u32 bbr_ecn_alpha_init = BBR_UNIT;
-+
-+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE.
-+ * No ECN based bounding when 0.
-+ */
-+static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3;	 /* 1/3 = 33% */
-+
-+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold.
-+ * Scaled by BBR_SCALE. Disabled when 0.
-+ */
-+static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2;  /* 1/2 = 50% */
-+
-+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN
-+ * clears then make the first round's increment to inflight_hi the following
-+ * fraction of inflight_hi.
-+ */
-+static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2;
-+
-+/* Estimate bw probing has gone too far if loss rate exceeds this level. */
-+static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100;  /* 2% loss */
-+
-+/* Slow down for a packet loss recovered by TLP? */
-+static const bool bbr_loss_probe_recovery = true;
-+
-+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N,
-+ * and loss rate is higher than bbr_loss_thresh.
-+ * Disabled if 0.
-+ */
-+static const u32 bbr_full_loss_cnt = 6;
-+
-+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh
-+ * meets this count.
-+ */
-+static const u32 bbr_full_ecn_cnt = 2;
-+
-+/* Fraction of unutilized headroom to try to leave in path upon high loss. */
-+static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100;
-+
-+/* How much do we increase cwnd_gain when probing for bandwidth in
-+ * BBR_BW_PROBE_UP? This specifies the increment in units of
-+ * BBR_UNIT/4. The default is 1, meaning 0.25.
-+ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75).
-+ */
-+static const u32 bbr_bw_probe_cwnd_gain = 1;
-+
-+/* Max number of packet-timed rounds to wait before probing for bandwidth.  If
-+ * we want to tolerate 1% random loss per round, and not have this cut our
-+ * inflight too much, we must probe for bw periodically on roughly this scale.
-+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance.
-+ * We aim to be fair with Reno/CUBIC up to a BDP of at least:
-+ *  BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
-+ */
-+static const u32 bbr_bw_probe_max_rounds = 63;
-+
-+/* Max amount of randomness to inject in round counting for Reno-coexistence.
-+ */
-+static const u32 bbr_bw_probe_rand_rounds = 2;
-+
-+/* Use BBR-native probe time scale starting at this many usec.
-+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least:
-+ *  BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs
-+ */
-+static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC;  /* 2 secs */
-+
-+/* Use BBR-native probes spread over this many usec: */
-+static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC;  /* 1 secs */
-+
-+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */
-+static const bool bbr_fast_path = true;
-+
-+/* Use fast ack mode? */
-+static const bool bbr_fast_ack_mode = true;
-+
-+static u32 bbr_max_bw(const struct sock *sk);
-+static u32 bbr_bw(const struct sock *sk);
-+static void bbr_exit_probe_rtt(struct sock *sk);
-+static void bbr_reset_congestion_signals(struct sock *sk);
-+static void bbr_run_loss_probe_recovery(struct sock *sk);
-+
- static void bbr_check_probe_rtt_done(struct sock *sk);
- 
-+/* This connection can use ECN if both endpoints have signaled ECN support in
-+ * the handshake and the per-route settings indicated this is a
-+ * shallow-threshold ECN environment, meaning both:
-+ *  (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and
-+ *  (b) TCP endpoints provide precise ACKs that only ACK data segments
-+ *      with consistent ECN CE status
-+ */
-+static bool bbr_can_use_ecn(const struct sock *sk)
-+{
-+	return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) &&
-+	       (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW);
-+}
-+
- /* Do we estimate that STARTUP filled the pipe? */
- static bool bbr_full_bw_reached(const struct sock *sk)
- {
-@@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const struct sock *sk)
- /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
- static u32 bbr_max_bw(const struct sock *sk)
- {
--	struct bbr *bbr = inet_csk_ca(sk);
-+	const struct bbr *bbr = inet_csk_ca(sk);
- 
--	return minmax_get(&bbr->bw);
-+	return max(bbr->bw_hi[0], bbr->bw_hi[1]);
- }
- 
- /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
- static u32 bbr_bw(const struct sock *sk)
- {
--	struct bbr *bbr = inet_csk_ca(sk);
-+	const struct bbr *bbr = inet_csk_ca(sk);
- 
--	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
-+	return min(bbr_max_bw(sk), bbr->bw_lo);
- }
- 
- /* Return maximum extra acked in past k-2k round trips,
-@@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct sock *sk)
-  * The order here is chosen carefully to avoid overflow of u64. This should
-  * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
-  */
--static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
-+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain,
-+				  int margin)
- {
- 	unsigned int mss = tcp_sk(sk)->mss_cache;
- 
- 	rate *= mss;
- 	rate *= gain;
- 	rate >>= BBR_SCALE;
--	rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
--	return rate >> BW_SCALE;
-+	rate *= USEC_PER_SEC / 100 * (100 - margin);
-+	rate >>= BW_SCALE;
-+	rate = max(rate, 1ULL);
-+	return rate;
-+}
-+
-+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate)
-+{
-+	return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0);
- }
- 
- /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
-@@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
- {
- 	u64 rate = bw;
- 
--	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
-+	rate = bbr_rate_bytes_per_sec(sk, rate, gain,
-+				      bbr_pacing_margin_percent);
- 	rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate));
- 	return rate;
- }
- 
--/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
-+/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */
- static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
-@@ -279,7 +456,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
- 	bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
- 	do_div(bw, rtt_us);
- 	WRITE_ONCE(sk->sk_pacing_rate,
--		   bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain));
-+		   bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain)));
- }
- 
- /* Pace using current bw estimate and a gain factor. */
-@@ -295,26 +472,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
- 		WRITE_ONCE(sk->sk_pacing_rate, rate);
- }
- 
--/* override sysctl_tcp_min_tso_segs */
--__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
-+/* Return the number of segments BBR would like in a TSO/GSO skb, given a
-+ * particular max gso size as a constraint. TODO: make this simpler and more
-+ * consistent by switching bbr to just call tcp_tso_autosize().
-+ */
-+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
-+				u32 gso_max_size)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 segs, r;
-+	u64 bytes;
-+
-+	/* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
-+	bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
-+
-+	/* Budget a TSO/GSO burst size allowance based on min_rtt. For every
-+	 * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst.
-+	 * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K)
-+	 */
-+	if (bbr_param(sk, tso_rtt_shift)) {
-+		r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift);
-+		if (r < BITS_PER_TYPE(u32))   /* prevent undefined behavior */
-+			bytes += GSO_LEGACY_MAX_SIZE >> r;
-+	}
-+
-+	bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
-+	segs = max_t(u32, bytes / mss_now,
-+		     sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
-+	return segs;
-+}
-+
-+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
-+__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
- {
--	return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
-+	return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
- }
- 
-+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
- static u32 bbr_tso_segs_goal(struct sock *sk)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
--	u32 segs, bytes;
--
--	/* Sort of tcp_tso_autosize() but ignoring
--	 * driver provided sk_gso_max_size.
--	 */
--	bytes = min_t(unsigned long,
--		      READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
--		      GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
--	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- 
--	return min(segs, 0x7FU);
-+	return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE);
- }
- 
- /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
-@@ -334,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
- 
--	if (event == CA_EVENT_TX_START && tp->app_limited) {
-+	if (event == CA_EVENT_TX_START) {
-+		if (!tp->app_limited)
-+			return;
- 		bbr->idle_restart = 1;
- 		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
- 		bbr->ack_epoch_acked = 0;
-@@ -345,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
- 			bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
- 		else if (bbr->mode == BBR_PROBE_RTT)
- 			bbr_check_probe_rtt_done(sk);
-+	} else if ((event == CA_EVENT_ECN_IS_CE ||
-+		    event == CA_EVENT_ECN_NO_CE) &&
-+		   bbr_can_use_ecn(sk) &&
-+		   bbr_param(sk, precise_ece_ack)) {
-+		u32 state = bbr->ce_state;
-+		dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state);
-+		bbr->ce_state = state;
-+	} else if (event == CA_EVENT_TLP_RECOVERY &&
-+		   bbr_param(sk, loss_probe_recovery)) {
-+		bbr_run_loss_probe_recovery(sk);
- 	}
- }
- 
-@@ -367,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
- 	 * default. This should only happen when the connection is not using TCP
- 	 * timestamps and has retransmitted all of the SYN/SYNACK/data packets
- 	 * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
--	 * case we need to slow-start up toward something safe: TCP_INIT_CWND.
-+	 * case we need to slow-start up toward something safe: initial cwnd.
- 	 */
- 	if (unlikely(bbr->min_rtt_us == ~0U))	 /* no valid RTT samples yet? */
--		return TCP_INIT_CWND;  /* be safe: cap at default initial cwnd*/
-+		return bbr->init_cwnd;  /* be safe: cap at initial cwnd */
- 
- 	w = (u64)bw * bbr->min_rtt_us;
- 
-@@ -387,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
-  *   - one skb in sending host Qdisc,
-  *   - one skb in sending host TSO/GSO engine
-  *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
-- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
-- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
-+ * Don't worry, at low rates this won't bloat cwnd because
-+ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets,
-  * which allows 2 outstanding 2-packet sequences, to try to keep pipe
-  * full even with ACK-every-other-packet delayed ACKs.
-  */
- static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
- {
- 	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 tso_segs_goal;
- 
--	/* Allow enough full-sized skbs in flight to utilize end systems. */
--	cwnd += 3 * bbr_tso_segs_goal(sk);
--
--	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
--	cwnd = (cwnd + 1) & ~1U;
-+	tso_segs_goal = 3 * bbr_tso_segs_goal(sk);
- 
-+	/* Allow enough full-sized skbs in flight to utilize end systems. */
-+	cwnd = max_t(u32, cwnd, tso_segs_goal);
-+	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
- 	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
--	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
-+	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
- 		cwnd += 2;
- 
- 	return cwnd;
-@@ -458,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
- {
- 	u32 max_aggr_cwnd, aggr_cwnd = 0;
- 
--	if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) {
-+	if (bbr_param(sk, extra_acked_gain)) {
- 		max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
- 				/ BW_UNIT;
--		aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk))
-+		aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk))
- 			     >> BBR_SCALE;
- 		aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
- 	}
-@@ -469,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
- 	return aggr_cwnd;
- }
- 
--/* An optimization in BBR to reduce losses: On the first round of recovery, we
-- * follow the packet conservation principle: send P packets per P packets acked.
-- * After that, we slow-start and send at most 2*P packets per P packets acked.
-- * After recovery finishes, or upon undo, we restore the cwnd we had when
-- * recovery started (capped by the target cwnd based on estimated BDP).
-- *
-- * TODO(ycheng/ncardwell): implement a rate-based approach.
-- */
--static bool bbr_set_cwnd_to_recover_or_restore(
--	struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
-+/* Returns the cwnd for PROBE_RTT mode. */
-+static u32 bbr_probe_rtt_cwnd(struct sock *sk)
- {
--	struct tcp_sock *tp = tcp_sk(sk);
--	struct bbr *bbr = inet_csk_ca(sk);
--	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
--	u32 cwnd = tcp_snd_cwnd(tp);
--
--	/* An ACK for P pkts should release at most 2*P packets. We do this
--	 * in two steps. First, here we deduct the number of lost packets.
--	 * Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
--	 */
--	if (rs->losses > 0)
--		cwnd = max_t(s32, cwnd - rs->losses, 1);
--
--	if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
--		/* Starting 1st round of Recovery, so do packet conservation. */
--		bbr->packet_conservation = 1;
--		bbr->next_rtt_delivered = tp->delivered;  /* start round now */
--		/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
--		cwnd = tcp_packets_in_flight(tp) + acked;
--	} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
--		/* Exiting loss recovery; restore cwnd saved before recovery. */
--		cwnd = max(cwnd, bbr->prior_cwnd);
--		bbr->packet_conservation = 0;
--	}
--	bbr->prev_ca_state = state;
--
--	if (bbr->packet_conservation) {
--		*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
--		return true;	/* yes, using packet conservation */
--	}
--	*new_cwnd = cwnd;
--	return false;
-+	return max_t(u32, bbr_param(sk, cwnd_min_target),
-+		     bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain)));
- }
- 
- /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
-  * has drawn us down below target), or snap down to target if we're above it.
-  */
- static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
--			 u32 acked, u32 bw, int gain)
-+			 u32 acked, u32 bw, int gain, u32 cwnd,
-+			 struct bbr_context *ctx)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
--	u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
-+	u32 target_cwnd = 0;
- 
- 	if (!acked)
- 		goto done;  /* no packet fully ACKed; just apply caps */
- 
--	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
--		goto done;
--
- 	target_cwnd = bbr_bdp(sk, bw, gain);
- 
- 	/* Increment the cwnd to account for excess ACKed data that seems
-@@ -537,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
- 	target_cwnd += bbr_ack_aggregation_cwnd(sk);
- 	target_cwnd = bbr_quantization_budget(sk, target_cwnd);
- 
--	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
--	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
--		cwnd = min(cwnd + acked, target_cwnd);
--	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
--		cwnd = cwnd + acked;
--	cwnd = max(cwnd, bbr_cwnd_min_target);
-+	/* Update cwnd and enable fast path if cwnd reaches target_cwnd. */
-+	bbr->try_fast_path = 0;
-+	if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */
-+		cwnd += acked;
-+		if (cwnd >= target_cwnd) {
-+			cwnd = target_cwnd;
-+			bbr->try_fast_path = 1;
-+		}
-+	} else if (cwnd < target_cwnd || cwnd  < 2 * bbr->init_cwnd) {
-+		cwnd += acked;
-+	} else {
-+		bbr->try_fast_path = 1;
-+	}
- 
-+	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
- done:
--	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));	/* apply global cap */
-+	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));  /* global cap */
- 	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
--		tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
--}
--
--/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
--static bool bbr_is_next_cycle_phase(struct sock *sk,
--				    const struct rate_sample *rs)
--{
--	struct tcp_sock *tp = tcp_sk(sk);
--	struct bbr *bbr = inet_csk_ca(sk);
--	bool is_full_length =
--		tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
--		bbr->min_rtt_us;
--	u32 inflight, bw;
--
--	/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
--	 * use the pipe without increasing the queue.
--	 */
--	if (bbr->pacing_gain == BBR_UNIT)
--		return is_full_length;		/* just use wall clock time */
--
--	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
--	bw = bbr_max_bw(sk);
--
--	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
--	 * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
--	 * small (e.g. on a LAN). We do not persist if packets are lost, since
--	 * a path with small buffers may not hold that much.
--	 */
--	if (bbr->pacing_gain > BBR_UNIT)
--		return is_full_length &&
--			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
--			 inflight >= bbr_inflight(sk, bw, bbr->pacing_gain));
--
--	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
--	 * probing didn't find more bw. If inflight falls to match BDP then we
--	 * estimate queue is drained; persisting would underutilize the pipe.
--	 */
--	return is_full_length ||
--		inflight <= bbr_inflight(sk, bw, BBR_UNIT);
--}
--
--static void bbr_advance_cycle_phase(struct sock *sk)
--{
--	struct tcp_sock *tp = tcp_sk(sk);
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
--	bbr->cycle_mstamp = tp->delivered_mstamp;
--}
--
--/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
--static void bbr_update_cycle_phase(struct sock *sk,
--				   const struct rate_sample *rs)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
--		bbr_advance_cycle_phase(sk);
-+		tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp),
-+					   bbr_probe_rtt_cwnd(sk)));
- }
- 
- static void bbr_reset_startup_mode(struct sock *sk)
-@@ -614,191 +738,49 @@ static void bbr_reset_startup_mode(struct sock *sk)
- 	bbr->mode = BBR_STARTUP;
- }
- 
--static void bbr_reset_probe_bw_mode(struct sock *sk)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	bbr->mode = BBR_PROBE_BW;
--	bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand);
--	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
--}
--
--static void bbr_reset_mode(struct sock *sk)
--{
--	if (!bbr_full_bw_reached(sk))
--		bbr_reset_startup_mode(sk);
--	else
--		bbr_reset_probe_bw_mode(sk);
--}
--
--/* Start a new long-term sampling interval. */
--static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
--{
--	struct tcp_sock *tp = tcp_sk(sk);
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
--	bbr->lt_last_delivered = tp->delivered;
--	bbr->lt_last_lost = tp->lost;
--	bbr->lt_rtt_cnt = 0;
--}
--
--/* Completely reset long-term bandwidth sampling. */
--static void bbr_reset_lt_bw_sampling(struct sock *sk)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	bbr->lt_bw = 0;
--	bbr->lt_use_bw = 0;
--	bbr->lt_is_sampling = false;
--	bbr_reset_lt_bw_sampling_interval(sk);
--}
--
--/* Long-term bw sampling interval is done. Estimate whether we're policed. */
--static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--	u32 diff;
--
--	if (bbr->lt_bw) {  /* do we have bw from a previous interval? */
--		/* Is new bw close to the lt_bw from the previous interval? */
--		diff = abs(bw - bbr->lt_bw);
--		if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
--		    (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
--		     bbr_lt_bw_diff)) {
--			/* All criteria are met; estimate we're policed. */
--			bbr->lt_bw = (bw + bbr->lt_bw) >> 1;  /* avg 2 intvls */
--			bbr->lt_use_bw = 1;
--			bbr->pacing_gain = BBR_UNIT;  /* try to avoid drops */
--			bbr->lt_rtt_cnt = 0;
--			return;
--		}
--	}
--	bbr->lt_bw = bw;
--	bbr_reset_lt_bw_sampling_interval(sk);
--}
--
--/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
-- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
-- * explicitly models their policed rate, to reduce unnecessary losses. We
-- * estimate that we're policed if we see 2 consecutive sampling intervals with
-- * consistent throughput and high packet loss. If we think we're being policed,
-- * set lt_bw to the "long-term" average delivery rate from those 2 intervals.
-+/* See if we have reached next round trip. Upon start of the new round,
-+ * returns packets delivered since previous round start plus this ACK.
-  */
--static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
--{
--	struct tcp_sock *tp = tcp_sk(sk);
--	struct bbr *bbr = inet_csk_ca(sk);
--	u32 lost, delivered;
--	u64 bw;
--	u32 t;
--
--	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
--		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
--		    ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
--			bbr_reset_lt_bw_sampling(sk);    /* stop using lt_bw */
--			bbr_reset_probe_bw_mode(sk);  /* restart gain cycling */
--		}
--		return;
--	}
--
--	/* Wait for the first loss before sampling, to let the policer exhaust
--	 * its tokens and estimate the steady-state rate allowed by the policer.
--	 * Starting samples earlier includes bursts that over-estimate the bw.
--	 */
--	if (!bbr->lt_is_sampling) {
--		if (!rs->losses)
--			return;
--		bbr_reset_lt_bw_sampling_interval(sk);
--		bbr->lt_is_sampling = true;
--	}
--
--	/* To avoid underestimates, reset sampling if we run out of data. */
--	if (rs->is_app_limited) {
--		bbr_reset_lt_bw_sampling(sk);
--		return;
--	}
--
--	if (bbr->round_start)
--		bbr->lt_rtt_cnt++;	/* count round trips in this interval */
--	if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
--		return;		/* sampling interval needs to be longer */
--	if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
--		bbr_reset_lt_bw_sampling(sk);  /* interval is too long */
--		return;
--	}
--
--	/* End sampling interval when a packet is lost, so we estimate the
--	 * policer tokens were exhausted. Stopping the sampling before the
--	 * tokens are exhausted under-estimates the policed rate.
--	 */
--	if (!rs->losses)
--		return;
--
--	/* Calculate packets lost and delivered in sampling interval. */
--	lost = tp->lost - bbr->lt_last_lost;
--	delivered = tp->delivered - bbr->lt_last_delivered;
--	/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
--	if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
--		return;
--
--	/* Find average delivery rate in this sampling interval. */
--	t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
--	if ((s32)t < 1)
--		return;		/* interval is less than one ms, so wait */
--	/* Check if can multiply without overflow */
--	if (t >= ~0U / USEC_PER_MSEC) {
--		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
--		return;
--	}
--	t *= USEC_PER_MSEC;
--	bw = (u64)delivered * BW_UNIT;
--	do_div(bw, t);
--	bbr_lt_bw_interval_done(sk, bw);
--}
--
--/* Estimate the bandwidth based on how fast packets are delivered */
--static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
-+static u32 bbr_update_round_start(struct sock *sk,
-+		const struct rate_sample *rs, struct bbr_context *ctx)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
--	u64 bw;
-+	u32 round_delivered = 0;
- 
- 	bbr->round_start = 0;
--	if (rs->delivered < 0 || rs->interval_us <= 0)
--		return; /* Not a valid observation */
- 
- 	/* See if we've reached the next RTT */
--	if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
-+	if (rs->interval_us > 0 &&
-+	    !before(rs->prior_delivered, bbr->next_rtt_delivered)) {
-+		round_delivered = tp->delivered - bbr->next_rtt_delivered;
- 		bbr->next_rtt_delivered = tp->delivered;
--		bbr->rtt_cnt++;
- 		bbr->round_start = 1;
--		bbr->packet_conservation = 0;
- 	}
-+	return round_delivered;
-+}
- 
--	bbr_lt_bw_sampling(sk, rs);
-+/* Calculate the bandwidth based on how fast packets are delivered */
-+static void bbr_calculate_bw_sample(struct sock *sk,
-+			const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+	u64 bw = 0;
- 
- 	/* Divide delivered by the interval to find a (lower bound) bottleneck
- 	 * bandwidth sample. Delivered is in packets and interval_us in uS and
- 	 * ratio will be <<1 for most connections. So delivered is first scaled.
-+	 * Round up to allow growth at low rates, even with integer division.
- 	 */
--	bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us);
--
--	/* If this sample is application-limited, it is likely to have a very
--	 * low delivered count that represents application behavior rather than
--	 * the available network rate. Such a sample could drag down estimated
--	 * bw, causing needless slow-down. Thus, to continue to send at the
--	 * last measured network rate, we filter out app-limited samples unless
--	 * they describe the path bw at least as well as our bw model.
--	 *
--	 * So the goal during app-limited phase is to proceed with the best
--	 * network rate no matter how long. We automatically leave this
--	 * phase when app writes faster than the network can deliver :)
--	 */
--	if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
--		/* Incorporate new sample into our max bw filter. */
--		minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
-+	if (rs->interval_us > 0) {
-+		if (WARN_ONCE(rs->delivered < 0,
-+			      "negative delivered: %d interval_us: %ld\n",
-+			      rs->delivered, rs->interval_us))
-+			return;
-+
-+		bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us);
- 	}
-+
-+	ctx->sample_bw = bw;
- }
- 
- /* Estimates the windowed max degree of ack aggregation.
-@@ -812,7 +794,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
-  *
-  * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
-  * Max filter is an approximate sliding window of 5-10 (packet timed) round
-- * trips.
-+ * trips for non-startup phase, and 1-2 round trips for startup.
-  */
- static void bbr_update_ack_aggregation(struct sock *sk,
- 				       const struct rate_sample *rs)
-@@ -820,15 +802,19 @@ static void bbr_update_ack_aggregation(struct sock *sk,
- 	u32 epoch_us, expected_acked, extra_acked;
- 	struct bbr *bbr = inet_csk_ca(sk);
- 	struct tcp_sock *tp = tcp_sk(sk);
-+	u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts);
- 
--	if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 ||
-+	if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 ||
- 	    rs->delivered < 0 || rs->interval_us <= 0)
- 		return;
- 
- 	if (bbr->round_start) {
- 		bbr->extra_acked_win_rtts = min(0x1F,
- 						bbr->extra_acked_win_rtts + 1);
--		if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) {
-+		if (!bbr_full_bw_reached(sk))
-+			extra_acked_win_rtts_thresh = 1;
-+		if (bbr->extra_acked_win_rtts >=
-+		    extra_acked_win_rtts_thresh) {
- 			bbr->extra_acked_win_rtts = 0;
- 			bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
- 						   0 : 1;
-@@ -862,49 +848,6 @@ static void bbr_update_ack_aggregation(struct sock *sk,
- 		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
- }
- 
--/* Estimate when the pipe is full, using the change in delivery rate: BBR
-- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
-- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
-- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
-- * higher rwin, 3: we get higher delivery rate samples. Or transient
-- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
-- * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
-- */
--static void bbr_check_full_bw_reached(struct sock *sk,
--				      const struct rate_sample *rs)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--	u32 bw_thresh;
--
--	if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
--		return;
--
--	bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
--	if (bbr_max_bw(sk) >= bw_thresh) {
--		bbr->full_bw = bbr_max_bw(sk);
--		bbr->full_bw_cnt = 0;
--		return;
--	}
--	++bbr->full_bw_cnt;
--	bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
--}
--
--/* If pipe is probably full, drain the queue and then enter steady-state. */
--static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
--{
--	struct bbr *bbr = inet_csk_ca(sk);
--
--	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
--		bbr->mode = BBR_DRAIN;	/* drain queue we created */
--		tcp_sk(sk)->snd_ssthresh =
--				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
--	}	/* fall through to check if in-flight is already small: */
--	if (bbr->mode == BBR_DRAIN &&
--	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
--	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
--		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
--}
--
- static void bbr_check_probe_rtt_done(struct sock *sk)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
-@@ -914,9 +857,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
- 	      after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
- 		return;
- 
--	bbr->min_rtt_stamp = tcp_jiffies32;  /* wait a while until PROBE_RTT */
-+	bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */
- 	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
--	bbr_reset_mode(sk);
-+	bbr_exit_probe_rtt(sk);
- }
- 
- /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
-@@ -942,23 +885,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
--	bool filter_expired;
-+	bool probe_rtt_expired, min_rtt_expired;
-+	u32 expire;
- 
--	/* Track min RTT seen in the min_rtt_win_sec filter window: */
--	filter_expired = after(tcp_jiffies32,
--			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
-+	/* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */
-+	expire = bbr->probe_rtt_min_stamp +
-+		 msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms));
-+	probe_rtt_expired = after(tcp_jiffies32, expire);
- 	if (rs->rtt_us >= 0 &&
--	    (rs->rtt_us < bbr->min_rtt_us ||
--	     (filter_expired && !rs->is_ack_delayed))) {
--		bbr->min_rtt_us = rs->rtt_us;
--		bbr->min_rtt_stamp = tcp_jiffies32;
-+	    (rs->rtt_us < bbr->probe_rtt_min_us ||
-+	     (probe_rtt_expired && !rs->is_ack_delayed))) {
-+		bbr->probe_rtt_min_us = rs->rtt_us;
-+		bbr->probe_rtt_min_stamp = tcp_jiffies32;
-+	}
-+	/* Track min RTT seen in the min_rtt_win_sec filter window: */
-+	expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ;
-+	min_rtt_expired = after(tcp_jiffies32, expire);
-+	if (bbr->probe_rtt_min_us <= bbr->min_rtt_us ||
-+	    min_rtt_expired) {
-+		bbr->min_rtt_us = bbr->probe_rtt_min_us;
-+		bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
- 	}
- 
--	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
-+	if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired &&
- 	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
- 		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
- 		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
- 		bbr->probe_rtt_done_stamp = 0;
-+		bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
-+		bbr->next_rtt_delivered = tp->delivered;
- 	}
- 
- 	if (bbr->mode == BBR_PROBE_RTT) {
-@@ -967,9 +922,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
- 			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
- 		/* Maintain min packets in flight for max(200 ms, 1 round). */
- 		if (!bbr->probe_rtt_done_stamp &&
--		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
-+		    tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) {
- 			bbr->probe_rtt_done_stamp = tcp_jiffies32 +
--				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
-+				msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms));
- 			bbr->probe_rtt_round_done = 0;
- 			bbr->next_rtt_delivered = tp->delivered;
- 		} else if (bbr->probe_rtt_done_stamp) {
-@@ -990,18 +945,20 @@ static void bbr_update_gains(struct sock *sk)
- 
- 	switch (bbr->mode) {
- 	case BBR_STARTUP:
--		bbr->pacing_gain = bbr_high_gain;
--		bbr->cwnd_gain	 = bbr_high_gain;
-+		bbr->pacing_gain = bbr_param(sk, startup_pacing_gain);
-+		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);
- 		break;
- 	case BBR_DRAIN:
--		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */
--		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */
-+		bbr->pacing_gain = bbr_param(sk, drain_gain);  /* slow, to drain */
-+		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);  /* keep cwnd */
- 		break;
- 	case BBR_PROBE_BW:
--		bbr->pacing_gain = (bbr->lt_use_bw ?
--				    BBR_UNIT :
--				    bbr_pacing_gain[bbr->cycle_idx]);
--		bbr->cwnd_gain	 = bbr_cwnd_gain;
-+		bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
-+		bbr->cwnd_gain	 = bbr_param(sk, cwnd_gain);
-+		if (bbr_param(sk, bw_probe_cwnd_gain) &&
-+		    bbr->cycle_idx == BBR_BW_PROBE_UP)
-+			bbr->cwnd_gain +=
-+				BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4;
- 		break;
- 	case BBR_PROBE_RTT:
- 		bbr->pacing_gain = BBR_UNIT;
-@@ -1013,144 +970,1387 @@ static void bbr_update_gains(struct sock *sk)
- 	}
- }
- 
--static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
-+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
- {
--	bbr_update_bw(sk, rs);
--	bbr_update_ack_aggregation(sk, rs);
--	bbr_update_cycle_phase(sk, rs);
--	bbr_check_full_bw_reached(sk, rs);
--	bbr_check_drain(sk, rs);
--	bbr_update_min_rtt(sk, rs);
--	bbr_update_gains(sk);
-+	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
-+	return 3;
- }
- 
--__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
-+/* Incorporate a new bw sample into the current window of our max filter. */
-+static void bbr_take_max_bw_sample(struct sock *sk, u32 bw)
- {
- 	struct bbr *bbr = inet_csk_ca(sk);
--	u32 bw;
--
--	bbr_update_model(sk, rs);
- 
--	bw = bbr_bw(sk);
--	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
--	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
-+	bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]);
- }
- 
--__bpf_kfunc static void bbr_init(struct sock *sk)
-+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */
-+static void bbr_advance_max_bw_filter(struct sock *sk)
- {
--	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
- 
--	bbr->prior_cwnd = 0;
--	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
--	bbr->rtt_cnt = 0;
--	bbr->next_rtt_delivered = tp->delivered;
--	bbr->prev_ca_state = TCP_CA_Open;
--	bbr->packet_conservation = 0;
--
--	bbr->probe_rtt_done_stamp = 0;
--	bbr->probe_rtt_round_done = 0;
--	bbr->min_rtt_us = tcp_min_rtt(tp);
--	bbr->min_rtt_stamp = tcp_jiffies32;
--
--	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
-+	if (!bbr->bw_hi[1])
-+		return;  /* no samples in this window; remember old window */
-+	bbr->bw_hi[0] = bbr->bw_hi[1];
-+	bbr->bw_hi[1] = 0;
-+}
- 
--	bbr->has_seen_rtt = 0;
--	bbr_init_pacing_rate_from_rtt(sk);
-+/* Reset the estimator for reaching full bandwidth based on bw plateau. */
-+static void bbr_reset_full_bw(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
- 
--	bbr->round_start = 0;
--	bbr->idle_restart = 0;
--	bbr->full_bw_reached = 0;
- 	bbr->full_bw = 0;
- 	bbr->full_bw_cnt = 0;
--	bbr->cycle_mstamp = 0;
--	bbr->cycle_idx = 0;
--	bbr_reset_lt_bw_sampling(sk);
--	bbr_reset_startup_mode(sk);
-+	bbr->full_bw_now = 0;
-+}
- 
--	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
--	bbr->ack_epoch_acked = 0;
--	bbr->extra_acked_win_rtts = 0;
--	bbr->extra_acked_win_idx = 0;
--	bbr->extra_acked[0] = 0;
--	bbr->extra_acked[1] = 0;
-+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */
-+static u32 bbr_target_inflight(struct sock *sk)
-+{
-+	u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT);
- 
--	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
-+	return min(bdp, tcp_sk(sk)->snd_cwnd);
- }
- 
--__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
-+static bool bbr_is_probing_bandwidth(struct sock *sk)
- {
--	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
--	return 3;
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	return (bbr->mode == BBR_STARTUP) ||
-+		(bbr->mode == BBR_PROBE_BW &&
-+		 (bbr->cycle_idx == BBR_BW_PROBE_REFILL ||
-+		  bbr->cycle_idx == BBR_BW_PROBE_UP));
-+}
-+
-+/* Has the given amount of time elapsed since we marked the phase start? */
-+static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us)
-+{
-+	const struct tcp_sock *tp = tcp_sk(sk);
-+	const struct bbr *bbr = inet_csk_ca(sk);
-+
-+	return tcp_stamp_us_delta(tp->tcp_mstamp,
-+				  bbr->cycle_mstamp + interval_us) > 0;
-+}
-+
-+static void bbr_handle_queue_too_high_in_startup(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 bdp;  /* estimated BDP in packets, with quantization budget */
-+
-+	bbr->full_bw_reached = 1;
-+
-+	bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
-+	bbr->inflight_hi = max(bdp, bbr->inflight_latest);
-+}
-+
-+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */
-+static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible ||
-+	    !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh))
-+		return;
-+
-+	if (ce_ratio >= bbr_param(sk, ecn_thresh))
-+		bbr->startup_ecn_rounds++;
-+	else
-+		bbr->startup_ecn_rounds = 0;
-+
-+	if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) {
-+		bbr_handle_queue_too_high_in_startup(sk);
-+		return;
-+	}
-+}
-+
-+/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */
-+static int bbr_update_ecn_alpha(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct net *net = sock_net(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	s32 delivered, delivered_ce;
-+	u64 alpha, ce_ratio;
-+	u32 gain;
-+	bool want_ecn_alpha;
-+
-+	/* See if we should use ECN sender logic for this connection. */
-+	if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) &&
-+	    bbr_param(sk, ecn_factor) &&
-+	    (bbr->min_rtt_us <= bbr_ecn_max_rtt_us ||
-+	     !bbr_ecn_max_rtt_us))
-+		bbr->ecn_eligible = 1;
-+
-+	/* Skip updating alpha only if not ECN-eligible and PLB is disabled. */
-+	want_ecn_alpha = (bbr->ecn_eligible ||
-+			  (bbr_can_use_ecn(sk) &&
-+			   READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)));
-+	if (!want_ecn_alpha)
-+		return -1;
-+
-+	delivered = tp->delivered - bbr->alpha_last_delivered;
-+	delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
-+
-+	if (delivered == 0 ||		/* avoid divide by zero */
-+	    WARN_ON_ONCE(delivered < 0 || delivered_ce < 0))  /* backwards? */
-+		return -1;
-+
-+	BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE);
-+	ce_ratio = (u64)delivered_ce << BBR_SCALE;
-+	do_div(ce_ratio, delivered);
-+
-+	gain = bbr_param(sk, ecn_alpha_gain);
-+	alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE;
-+	alpha += (gain * ce_ratio) >> BBR_SCALE;
-+	bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT);
-+
-+	bbr->alpha_last_delivered = tp->delivered;
-+	bbr->alpha_last_delivered_ce = tp->delivered_ce;
-+
-+	bbr_check_ecn_too_high_in_startup(sk, ce_ratio);
-+	return (int)ce_ratio;
- }
- 
--/* In theory BBR does not need to undo the cwnd since it does not
-- * always reduce cwnd on losses (see bbr_main()). Keep it for now.
-+/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6
-+ * flow label) if it encounters sustained congestion in the form of ECN marks.
-  */
--__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
-+static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (bbr->round_start && ce_ratio >= 0)
-+		tcp_plb_update_state(sk, &bbr->plb, ce_ratio);
-+
-+	tcp_plb_check_rehash(sk, &bbr->plb);
-+}
-+
-+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
-+static void bbr_raise_inflight_hi_slope(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 growth_this_round, cnt;
-+
-+	/* Calculate "slope": packets S/Acked per inflight_hi increment. */
-+	growth_this_round = 1 << bbr->bw_probe_up_rounds;
-+	bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30);
-+	cnt = tcp_snd_cwnd(tp) / growth_this_round;
-+	cnt = max(cnt, 1U);
-+	bbr->bw_probe_up_cnt = cnt;
-+}
-+
-+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */
-+static void bbr_probe_inflight_hi_upward(struct sock *sk,
-+					  const struct rate_sample *rs)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 delta;
-+
-+	if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi)
-+		return;  /* not fully using inflight_hi, so don't grow it */
-+
-+	/* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */
-+	bbr->bw_probe_up_acks += rs->acked_sacked;
-+	if (bbr->bw_probe_up_acks >=  bbr->bw_probe_up_cnt) {
-+		delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt;
-+		bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt;
-+		bbr->inflight_hi += delta;
-+		bbr->try_fast_path = 0;  /* Need to update cwnd */
-+	}
-+
-+	if (bbr->round_start)
-+		bbr_raise_inflight_hi_slope(sk);
-+}
-+
-+/* Does loss/ECN rate for this sample say inflight is "too high"?
-+ * This is used by both the bbr_check_loss_too_high_in_startup() function,
-+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which
-+ * uses it to notice when loss/ECN rates suggest inflight is too high.
-+ */
-+static bool bbr_is_inflight_too_high(const struct sock *sk,
-+				      const struct rate_sample *rs)
-+{
-+	const struct bbr *bbr = inet_csk_ca(sk);
-+	u32 loss_thresh, ecn_thresh;
-+
-+	if (rs->lost > 0 && rs->tx_in_flight) {
-+		loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >>
-+				BBR_SCALE;
-+		if (rs->lost > loss_thresh) {
-+			return true;
-+		}
-+	}
-+
-+	if (rs->delivered_ce > 0 && rs->delivered > 0 &&
-+	    bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) {
-+		ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >>
-+				BBR_SCALE;
-+		if (rs->delivered_ce > ecn_thresh) {
-+			return true;
-+		}
-+	}
-+
-+	return false;
-+}
-+
-+/* Calculate the tx_in_flight level that corresponded to excessive loss.
-+ * We find "lost_prefix" segs of the skb where loss rate went too high,
-+ * by solving for "lost_prefix" in the following equation:
-+ *   lost                     /  inflight                     >= loss_thresh
-+ *  (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh
-+ * Then we take that equation, convert it to fixed point, and
-+ * round up to the nearest packet.
-+ */
-+static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk,
-+					  const struct rate_sample *rs,
-+					  const struct sk_buff *skb)
-+{
-+	const struct tcp_sock *tp = tcp_sk(sk);
-+	u32 loss_thresh  = bbr_param(sk, loss_thresh);
-+	u32 pcount, divisor, inflight_hi;
-+	s32 inflight_prev, lost_prev;
-+	u64 loss_budget, lost_prefix;
-+
-+	pcount = tcp_skb_pcount(skb);
-+
-+	/* How much data was in flight before this skb? */
-+	inflight_prev = rs->tx_in_flight - pcount;
-+	if (inflight_prev < 0) {
-+		WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
-+				  pcount,
-+				  TCP_SKB_CB(skb)->sacked,
-+				  rs->tx_in_flight),
-+			  "tx_in_flight: %u pcount: %u reneg: %u",
-+			  rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg);
-+		return ~0U;
-+	}
-+
-+	/* How much inflight data was marked lost before this skb? */
-+	lost_prev = rs->lost - pcount;
-+	if (WARN_ONCE(lost_prev < 0,
-+		      "cwnd: %u ca: %d out: %u lost: %u pif: %u "
-+		      "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d "
-+		      "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u",
-+		      tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state,
-+		      tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp),
-+		      rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost,
-+		      rs->lost, lost_prev, pcount,
-+		      TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-+		      tp->is_sack_reneg))
-+		return ~0U;
-+
-+	/* At what prefix of this lost skb did losss rate exceed loss_thresh? */
-+	loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1;
-+	loss_budget >>= BBR_SCALE;
-+	if (lost_prev >= loss_budget) {
-+		lost_prefix = 0;   /* previous losses crossed loss_thresh */
-+	} else {
-+		lost_prefix = loss_budget - lost_prev;
-+		lost_prefix <<= BBR_SCALE;
-+		divisor = BBR_UNIT - loss_thresh;
-+		if (WARN_ON_ONCE(!divisor))  /* loss_thresh is 8 bits */
-+			return ~0U;
-+		do_div(lost_prefix, divisor);
-+	}
-+
-+	inflight_hi = inflight_prev + lost_prefix;
-+	return inflight_hi;
-+}
-+
-+/* If loss/ECN rates during probing indicated we may have overfilled a
-+ * buffer, return an operating point that tries to leave unutilized headroom in
-+ * the path for other flows, for fairness convergence and lower RTTs and loss.
-+ */
-+static u32 bbr_inflight_with_headroom(const struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 headroom, headroom_fraction;
-+
-+	if (bbr->inflight_hi == ~0U)
-+		return ~0U;
-+
-+	headroom_fraction = bbr_param(sk, inflight_headroom);
-+	headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE;
-+	headroom = max(headroom, 1U);
-+	return max_t(s32, bbr->inflight_hi - headroom,
-+		     bbr_param(sk, cwnd_min_target));
-+}
-+
-+/* Bound cwnd to a sensible level, based on our current probing state
-+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi).
-+ */
-+static void bbr_bound_cwnd_for_inflight_model(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 cap;
-+
-+	/* tcp_rcv_synsent_state_process() currently calls tcp_ack()
-+	 * and thus cong_control() without first initializing us(!).
-+	 */
-+	if (!bbr->initialized)
-+		return;
-+
-+	cap = ~0U;
-+	if (bbr->mode == BBR_PROBE_BW &&
-+	    bbr->cycle_idx != BBR_BW_PROBE_CRUISE) {
-+		/* Probe to see if more packets fit in the path. */
-+		cap = bbr->inflight_hi;
-+	} else {
-+		if (bbr->mode == BBR_PROBE_RTT ||
-+		    (bbr->mode == BBR_PROBE_BW &&
-+		     bbr->cycle_idx == BBR_BW_PROBE_CRUISE))
-+			cap = bbr_inflight_with_headroom(sk);
-+	}
-+	/* Adapt to any loss/ECN since our last bw probe. */
-+	cap = min(cap, bbr->inflight_lo);
-+
-+	cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target));
-+	tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp)));
-+}
-+
-+/* How should we multiplicatively cut bw or inflight limits based on ECN? */
-+static u32 bbr_ecn_cut(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	return BBR_UNIT -
-+		((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE);
-+}
-+
-+/* Init lower bounds if have not inited yet. */
-+static void bbr_init_lower_bounds(struct sock *sk, bool init_bw)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (init_bw && bbr->bw_lo == ~0U)
-+		bbr->bw_lo = bbr_max_bw(sk);
-+	if (bbr->inflight_lo == ~0U)
-+		bbr->inflight_lo = tcp_snd_cwnd(tp);
-+}
-+
-+/* Reduce bw and inflight to (1 - beta). */
-+static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight)
-+{
-+	struct bbr* bbr = inet_csk_ca(sk);
-+	u32 loss_cut = BBR_UNIT - bbr_param(sk, beta);
-+
-+	*bw = max_t(u32, bbr->bw_latest,
-+		    (u64)bbr->bw_lo * loss_cut >> BBR_SCALE);
-+	*inflight = max_t(u32, bbr->inflight_latest,
-+			  (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE);
-+}
-+
-+/* Reduce inflight to (1 - alpha*ecn_factor). */
-+static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 ecn_cut = bbr_ecn_cut(sk);
-+
-+	*inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE;
-+}
-+
-+/* Estimate a short-term lower bound on the capacity available now, based
-+ * on measurements of the current delivery process and recent history. When we
-+ * are seeing loss/ECN at times when we are not probing bw, then conservatively
-+ * move toward flow balance by multiplicatively cutting our short-term
-+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a
-+ * multiplicative decrease in order to converge to a lower capacity in time
-+ * logarithmic in the magnitude of the decrease.
-+ *
-+ * However, we do not cut our short-term estimates lower than the current rate
-+ * and volume of delivered data from this round trip, since from the current
-+ * delivery process we can estimate the measured capacity available now.
-+ *
-+ * Anything faster than that approach would knowingly risk high loss, which can
-+ * cause low bw for Reno/CUBIC and high loss recovery latency for
-+ * request/response flows using any congestion control.
-+ */
-+static void bbr_adapt_lower_bounds(struct sock *sk,
-+				    const struct rate_sample *rs)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 ecn_inflight_lo = ~0U;
-+
-+	/* We only use lower-bound estimates when not probing bw.
-+	 * When probing we need to push inflight higher to probe bw.
-+	 */
-+	if (bbr_is_probing_bandwidth(sk))
-+		return;
-+
-+	/* ECN response. */
-+	if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) {
-+		bbr_init_lower_bounds(sk, false);
-+		bbr_ecn_lower_bounds(sk, &ecn_inflight_lo);
-+	}
-+
-+	/* Loss response. */
-+	if (bbr->loss_in_round) {
-+		bbr_init_lower_bounds(sk, true);
-+		bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo);
-+	}
-+
-+	/* Adjust to the lower of the levels implied by loss/ECN. */
-+	bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo);
-+	bbr->bw_lo = max(1U, bbr->bw_lo);
-+}
-+
-+/* Reset any short-term lower-bound adaptation to congestion, so that we can
-+ * push our inflight up.
-+ */
-+static void bbr_reset_lower_bounds(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->bw_lo = ~0U;
-+	bbr->inflight_lo = ~0U;
-+}
-+
-+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state
-+ * machine phase where we adapt our lower bound based on congestion signals.
-+ */
-+static void bbr_reset_congestion_signals(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->loss_in_round = 0;
-+	bbr->ecn_in_round = 0;
-+	bbr->loss_in_cycle = 0;
-+	bbr->ecn_in_cycle = 0;
-+	bbr->bw_latest = 0;
-+	bbr->inflight_latest = 0;
-+}
-+
-+static void bbr_exit_loss_recovery(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
-+	bbr->try_fast_path = 0; /* bound cwnd using latest model */
-+}
-+
-+/* Update rate and volume of delivered data from latest round trip. */
-+static void bbr_update_latest_delivery_signals(
-+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->loss_round_start = 0;
-+	if (rs->interval_us <= 0 || !rs->acked_sacked)
-+		return; /* Not a valid observation */
-+
-+	bbr->bw_latest       = max_t(u32, bbr->bw_latest,       ctx->sample_bw);
-+	bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered);
-+
-+	if (!before(rs->prior_delivered, bbr->loss_round_delivered)) {
-+		bbr->loss_round_delivered = tp->delivered;
-+		bbr->loss_round_start = 1;  /* mark start of new round trip */
-+	}
-+}
-+
-+/* Once per round, reset filter for latest rate and volume of delivered data. */
-+static void bbr_advance_latest_delivery_signals(
-+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	/* If ACK matches a TLP retransmit, persist the filter. If we detect
-+	 * that a TLP retransmit plugged a tail loss, we'll want to remember
-+	 * how much data the path delivered before the tail loss.
-+	 */
-+	if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) {
-+		bbr->bw_latest = ctx->sample_bw;
-+		bbr->inflight_latest = rs->delivered;
-+	}
-+}
-+
-+/* Update (most of) our congestion signals: track the recent rate and volume of
-+ * delivered data, presence of loss, and EWMA degree of ECN marking.
-+ */
-+static void bbr_update_congestion_signals(
-+	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
- {
- 	struct bbr *bbr = inet_csk_ca(sk);
-+	u64 bw;
-+
-+	if (rs->interval_us <= 0 || !rs->acked_sacked)
-+		return; /* Not a valid observation */
-+	bw = ctx->sample_bw;
- 
--	bbr->full_bw = 0;   /* spurious slow-down; reset full pipe detection */
-+	if (!rs->is_app_limited || bw >= bbr_max_bw(sk))
-+		bbr_take_max_bw_sample(sk, bw);
-+
-+	bbr->loss_in_round |= (rs->losses > 0);
-+
-+	if (!bbr->loss_round_start)
-+		return;		/* skip the per-round-trip updates */
-+	/* Now do per-round-trip updates. */
-+	bbr_adapt_lower_bounds(sk, rs);
-+
-+	bbr->loss_in_round = 0;
-+	bbr->ecn_in_round  = 0;
-+}
-+
-+/* Bandwidth probing can cause loss. To help coexistence with loss-based
-+ * congestion control we spread out our probing in a Reno-conscious way. Due to
-+ * the shape of the Reno sawtooth, the time required between loss epochs for an
-+ * idealized Reno flow is a number of round trips that is the BDP of that
-+ * flow. We count packet-timed round trips directly, since measured RTT can
-+ * vary widely, and Reno is driven by packet-timed round trips.
-+ */
-+static bool bbr_is_reno_coexistence_probe_time(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 rounds;
-+
-+	/* Random loss can shave some small percentage off of our inflight
-+	 * in each round. To survive this, flows need robust periodic probes.
-+	 */
-+	rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk));
-+	return bbr->rounds_since_probe >= rounds;
-+}
-+
-+/* How long do we want to wait before probing for bandwidth (and risking
-+ * loss)? We randomize the wait, for better mixing and fairness convergence.
-+ *
-+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips.
-+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow,
-+ * (eg 4K video to a broadband user):
-+ *   BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
-+ *
-+ * We bound the BBR-native inter-bw-probe wall clock time to be:
-+ *  (a) higher than 2 sec: to try to avoid causing loss for a long enough time
-+ *      to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must
-+ *      be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs
-+ *  (b) lower than 3 sec: to ensure flows can start probing in a reasonable
-+ *      amount of time to discover unutilized bw on human-scale interactive
-+ *      time-scales (e.g. perhaps traffic from a web page download that we
-+ *      were competing with is now complete).
-+ */
-+static void bbr_pick_probe_wait(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	/* Decide the random round-trip bound for wait until probe: */
-+	bbr->rounds_since_probe =
-+		get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds));
-+	/* Decide the random wall clock bound for wait until probe: */
-+	bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) +
-+			     get_random_u32_below(bbr_param(sk, bw_probe_rand_us));
-+}
-+
-+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->cycle_idx = cycle_idx;
-+	/* New phase, so need to update cwnd and pacing rate. */
-+	bbr->try_fast_path = 0;
-+}
-+
-+/* Send at estimated bw to fill the pipe, but not queue. We need this phase
-+ * before PROBE_UP, because as soon as we send faster than the available bw
-+ * we will start building a queue, and if the buffer is shallow we can cause
-+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and
-+ * inflight_hi estimates will underestimate.
-+ */
-+static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr_reset_lower_bounds(sk);
-+	bbr->bw_probe_up_rounds = bw_probe_up_rounds;
-+	bbr->bw_probe_up_acks = 0;
-+	bbr->stopped_risky_probe = 0;
-+	bbr->ack_phase = BBR_ACKS_REFILLING;
-+	bbr->next_rtt_delivered = tp->delivered;
-+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL);
-+}
-+
-+/* Now probe max deliverable data rate and volume. */
-+static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->ack_phase = BBR_ACKS_PROBE_STARTING;
-+	bbr->next_rtt_delivered = tp->delivered;
-+	bbr->cycle_mstamp = tp->tcp_mstamp;
-+	bbr_reset_full_bw(sk);
-+	bbr->full_bw = ctx->sample_bw;
-+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP);
-+	bbr_raise_inflight_hi_slope(sk);
-+}
-+
-+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall
-+ * clock time at which to probe beyond an inflight that we think to be
-+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to
-+ * keep packet loss rates low. Also start a round-trip counter, to probe faster
-+ * if we estimate a Reno flow at our BDP would probe faster.
-+ */
-+static void bbr_start_bw_probe_down(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr_reset_congestion_signals(sk);
-+	bbr->bw_probe_up_cnt = ~0U;     /* not growing inflight_hi any more */
-+	bbr_pick_probe_wait(sk);
-+	bbr->cycle_mstamp = tp->tcp_mstamp;		/* start wall clock */
-+	bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
-+	bbr->next_rtt_delivered = tp->delivered;
-+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN);
-+}
-+
-+/* Cruise: maintain what we estimate to be a neutral, conservative
-+ * operating point, without attempting to probe up for bandwidth or down for
-+ * RTT, and only reducing inflight in response to loss/ECN signals.
-+ */
-+static void bbr_start_bw_probe_cruise(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (bbr->inflight_lo != ~0U)
-+		bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi);
-+
-+	bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE);
-+}
-+
-+/* Loss and/or ECN rate is too high while probing.
-+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle.
-+ */
-+static void bbr_handle_inflight_too_high(struct sock *sk,
-+					  const struct rate_sample *rs)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	const u32 beta = bbr_param(sk, beta);
-+
-+	bbr->prev_probe_too_high = 1;
-+	bbr->bw_probe_samples = 0;  /* only react once per probe */
-+	/* If we are app-limited then we are not robustly
-+	 * probing the max volume of inflight data we think
-+	 * might be safe (analogous to how app-limited bw
-+	 * samples are not known to be robustly probing bw).
-+	 */
-+	if (!rs->is_app_limited) {
-+		bbr->inflight_hi = max_t(u32, rs->tx_in_flight,
-+					 (u64)bbr_target_inflight(sk) *
-+					 (BBR_UNIT - beta) >> BBR_SCALE);
-+	}
-+	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
-+		bbr_start_bw_probe_down(sk);
-+}
-+
-+/* If we're seeing bw and loss samples reflecting our bw probing, adapt
-+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt
-+ * inflight_hi downward. If we're able to push inflight higher without such
-+ * signals, push higher: adapt inflight_hi upward.
-+ */
-+static bool bbr_adapt_upper_bounds(struct sock *sk,
-+				    const struct rate_sample *rs,
-+				    struct bbr_context *ctx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	/* Track when we'll see bw/loss samples resulting from our bw probes. */
-+	if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start)
-+		bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK;
-+	if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) {
-+		/* End of samples from bw probing phase. */
-+		bbr->bw_probe_samples = 0;
-+		bbr->ack_phase = BBR_ACKS_INIT;
-+		/* At this point in the cycle, our current bw sample is also
-+		 * our best recent chance at finding the highest available bw
-+		 * for this flow. So now is the best time to forget the bw
-+		 * samples from the previous cycle, by advancing the window.
-+		 */
-+		if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited)
-+			bbr_advance_max_bw_filter(sk);
-+		/* If we had an inflight_hi, then probed and pushed inflight all
-+		 * the way up to hit that inflight_hi without seeing any
-+		 * high loss/ECN in all the resulting ACKs from that probing,
-+		 * then probe up again, this time letting inflight persist at
-+		 * inflight_hi for a round trip, then accelerating beyond.
-+		 */
-+		if (bbr->mode == BBR_PROBE_BW &&
-+		    bbr->stopped_risky_probe && !bbr->prev_probe_too_high) {
-+			bbr_start_bw_probe_refill(sk, 0);
-+			return true;  /* yes, decided state transition */
-+		}
-+	}
-+	if (bbr_is_inflight_too_high(sk, rs)) {
-+		if (bbr->bw_probe_samples)  /*  sample is from bw probing? */
-+			bbr_handle_inflight_too_high(sk, rs);
-+	} else {
-+		/* Loss/ECN rate is declared safe. Adjust upper bound upward. */
-+
-+		if (bbr->inflight_hi == ~0U)
-+			return false;   /* no excess queue signals yet */
-+
-+		/* To be resilient to random loss, we must raise bw/inflight_hi
-+		 * if we observe in any phase that a higher level is safe.
-+		 */
-+		if (rs->tx_in_flight > bbr->inflight_hi) {
-+			bbr->inflight_hi = rs->tx_in_flight;
-+		}
-+
-+		if (bbr->mode == BBR_PROBE_BW &&
-+		    bbr->cycle_idx == BBR_BW_PROBE_UP)
-+			bbr_probe_inflight_hi_upward(sk, rs);
-+	}
-+
-+	return false;
-+}
-+
-+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */
-+static bool bbr_check_time_to_probe_bw(struct sock *sk,
-+					const struct rate_sample *rs)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 n;
-+
-+	/* If we seem to be at an operating point where we are not seeing loss
-+	 * but we are seeing ECN marks, then when the ECN marks cease we reprobe
-+	 * quickly (in case cross-traffic has ceased and freed up bw).
-+	 */
-+	if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible &&
-+	    bbr->ecn_in_cycle && !bbr->loss_in_cycle &&
-+	    inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
-+		/* Calculate n so that when bbr_raise_inflight_hi_slope()
-+		 * computes growth_this_round as 2^n it will be roughly the
-+		 * desired volume of data (inflight_hi*ecn_reprobe_gain).
-+		 */
-+		n = ilog2((((u64)bbr->inflight_hi *
-+			    bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE));
-+		bbr_start_bw_probe_refill(sk, n);
-+		return true;
-+	}
-+
-+	if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) ||
-+	    bbr_is_reno_coexistence_probe_time(sk)) {
-+		bbr_start_bw_probe_refill(sk, 0);
-+		return true;
-+	}
-+	return false;
-+}
-+
-+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */
-+static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw)
-+{
-+	/* Always need to pull inflight down to leave headroom in queue. */
-+	if (inflight > bbr_inflight_with_headroom(sk))
-+		return false;
-+
-+	return inflight <= bbr_inflight(sk, bw, BBR_UNIT);
-+}
-+
-+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */
-+static void bbr_update_cycle_phase(struct sock *sk,
-+				    const struct rate_sample *rs,
-+				    struct bbr_context *ctx)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	bool is_bw_probe_done = false;
-+	u32 inflight, bw;
-+
-+	if (!bbr_full_bw_reached(sk))
-+		return;
-+
-+	/* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */
-+	if (bbr_adapt_upper_bounds(sk, rs, ctx))
-+		return;		/* already decided state transition */
-+
-+	if (bbr->mode != BBR_PROBE_BW)
-+		return;
-+
-+	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
-+	bw = bbr_max_bw(sk);
-+
-+	switch (bbr->cycle_idx) {
-+	/* First we spend most of our time cruising with a pacing_gain of 1.0,
-+	 * which paces at the estimated bw, to try to fully use the pipe
-+	 * without building queue. If we encounter loss/ECN marks, we adapt
-+	 * by slowing down.
-+	 */
-+	case BBR_BW_PROBE_CRUISE:
-+		if (bbr_check_time_to_probe_bw(sk, rs))
-+			return;		/* already decided state transition */
-+		break;
-+
-+	/* After cruising, when it's time to probe, we first "refill": we send
-+	 * at the estimated bw to fill the pipe, before probing higher and
-+	 * knowingly risking overflowing the bottleneck buffer (causing loss).
-+	 */
-+	case BBR_BW_PROBE_REFILL:
-+		if (bbr->round_start) {
-+			/* After one full round trip of sending in REFILL, we
-+			 * start to see bw samples reflecting our REFILL, which
-+			 * may be putting too much data in flight.
-+			 */
-+			bbr->bw_probe_samples = 1;
-+			bbr_start_bw_probe_up(sk, ctx);
-+		}
-+		break;
-+
-+	/* After we refill the pipe, we probe by using a pacing_gain > 1.0, to
-+	 * probe for bw. If we have not seen loss/ECN, we try to raise inflight
-+	 * to at least pacing_gain*BDP; note that this may take more than
-+	 * min_rtt if min_rtt is small (e.g. on a LAN).
-+	 *
-+	 * We terminate PROBE_UP bandwidth probing upon any of the following:
-+	 *
-+	 * (1) We've pushed inflight up to hit the inflight_hi target set in the
-+	 *     most recent previous bw probe phase. Thus we want to start
-+	 *     draining the queue immediately because it's very likely the most
-+	 *     recently sent packets will fill the queue and cause drops.
-+	 * (2) If inflight_hi has not limited bandwidth growth recently, and
-+	 *     yet delivered bandwidth has not increased much recently
-+	 *     (bbr->full_bw_now).
-+	 * (3) Loss filter says loss rate is "too high".
-+	 * (4) ECN filter says ECN mark rate is "too high".
-+	 *
-+	 * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high()
-+	 */
-+	case BBR_BW_PROBE_UP:
-+		if (bbr->prev_probe_too_high &&
-+		    inflight >= bbr->inflight_hi) {
-+			bbr->stopped_risky_probe = 1;
-+			is_bw_probe_done = true;
-+		} else {
-+			if (tp->is_cwnd_limited &&
-+			    tcp_snd_cwnd(tp) >= bbr->inflight_hi) {
-+				/* inflight_hi is limiting bw growth */
-+				bbr_reset_full_bw(sk);
-+				bbr->full_bw = ctx->sample_bw;
-+			} else if (bbr->full_bw_now) {
-+				/* Plateau in estimated bw. Pipe looks full. */
-+				is_bw_probe_done = true;
-+			}
-+		}
-+		if (is_bw_probe_done) {
-+			bbr->prev_probe_too_high = 0;  /* no loss/ECN (yet) */
-+			bbr_start_bw_probe_down(sk);  /* restart w/ down */
-+		}
-+		break;
-+
-+	/* After probing in PROBE_UP, we have usually accumulated some data in
-+	 * the bottleneck buffer (if bw probing didn't find more bw). We next
-+	 * enter PROBE_DOWN to try to drain any excess data from the queue. To
-+	 * do this, we use a pacing_gain < 1.0. We hold this pacing gain until
-+	 * our inflight is less then that target cruising point, which is the
-+	 * minimum of (a) the amount needed to leave headroom, and (b) the
-+	 * estimated BDP. Once inflight falls to match the target, we estimate
-+	 * the queue is drained; persisting would underutilize the pipe.
-+	 */
-+	case BBR_BW_PROBE_DOWN:
-+		if (bbr_check_time_to_probe_bw(sk, rs))
-+			return;		/* already decided state transition */
-+		if (bbr_check_time_to_cruise(sk, inflight, bw))
-+			bbr_start_bw_probe_cruise(sk);
-+		break;
-+
-+	default:
-+		WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx);
-+	}
-+}
-+
-+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */
-+static void bbr_exit_probe_rtt(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr_reset_lower_bounds(sk);
-+	if (bbr_full_bw_reached(sk)) {
-+		bbr->mode = BBR_PROBE_BW;
-+		/* Raising inflight after PROBE_RTT may cause loss, so reset
-+		 * the PROBE_BW clock and schedule the next bandwidth probe for
-+		 * a friendly and randomized future point in time.
-+		 */
-+		bbr_start_bw_probe_down(sk);
-+		/* Since we are exiting PROBE_RTT, we know inflight is
-+		 * below our estimated BDP, so it is reasonable to cruise.
-+		 */
-+		bbr_start_bw_probe_cruise(sk);
-+	} else {
-+		bbr->mode = BBR_STARTUP;
-+	}
-+}
-+
-+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until
-+ * the end of the round in recovery to get a good estimate of how many packets
-+ * have been lost, and how many we need to drain with a low pacing rate.
-+ */
-+static void bbr_check_loss_too_high_in_startup(struct sock *sk,
-+						const struct rate_sample *rs)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (bbr_full_bw_reached(sk))
-+		return;
-+
-+	/* For STARTUP exit, check the loss rate at the end of each round trip
-+	 * of Recovery episodes in STARTUP. We check the loss rate at the end
-+	 * of the round trip to filter out noisy/low loss and have a better
-+	 * sense of inflight (extent of loss), so we can drain more accurately.
-+	 */
-+	if (rs->losses && bbr->loss_events_in_round < 0xf)
-+		bbr->loss_events_in_round++;  /* update saturating counter */
-+	if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start &&
-+	    inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery &&
-+	    bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) &&
-+	    bbr_is_inflight_too_high(sk, rs)) {
-+		bbr_handle_queue_too_high_in_startup(sk);
-+		return;
-+	}
-+	if (bbr->loss_round_start)
-+		bbr->loss_events_in_round = 0;
-+}
-+
-+/* Estimate when the pipe is full, using the change in delivery rate: BBR
-+ * estimates bw probing filled the pipe if the estimated bw hasn't changed by
-+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
-+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
-+ * higher rwin, 3: we get higher delivery rate samples. Or transient
-+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
-+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
-+ */
-+static void bbr_check_full_bw_reached(struct sock *sk,
-+				       const struct rate_sample *rs,
-+				       struct bbr_context *ctx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 bw_thresh, full_cnt, thresh;
-+
-+	if (bbr->full_bw_now || rs->is_app_limited)
-+		return;
-+
-+	thresh = bbr_param(sk, full_bw_thresh);
-+	full_cnt = bbr_param(sk, full_bw_cnt);
-+	bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE;
-+	if (ctx->sample_bw >= bw_thresh) {
-+		bbr_reset_full_bw(sk);
-+		bbr->full_bw = ctx->sample_bw;
-+		return;
-+	}
-+	if (!bbr->round_start)
-+		return;
-+	++bbr->full_bw_cnt;
-+	bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt;
-+	bbr->full_bw_reached |= bbr->full_bw_now;
-+}
-+
-+/* If pipe is probably full, drain the queue and then enter steady-state. */
-+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs,
-+			    struct bbr_context *ctx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
-+		bbr->mode = BBR_DRAIN;	/* drain queue we created */
-+		/* Set ssthresh to export purely for monitoring, to signal
-+		 * completion of initial STARTUP by setting to a non-
-+		 * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR).
-+		 */
-+		tcp_sk(sk)->snd_ssthresh =
-+				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
-+		bbr_reset_congestion_signals(sk);
-+	}	/* fall through to check if in-flight is already small: */
-+	if (bbr->mode == BBR_DRAIN &&
-+	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
-+	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) {
-+		bbr->mode = BBR_PROBE_BW;
-+		bbr_start_bw_probe_down(sk);
-+	}
-+}
-+
-+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs,
-+			      struct bbr_context *ctx)
-+{
-+	bbr_update_congestion_signals(sk, rs, ctx);
-+	bbr_update_ack_aggregation(sk, rs);
-+	bbr_check_loss_too_high_in_startup(sk, rs);
-+	bbr_check_full_bw_reached(sk, rs, ctx);
-+	bbr_check_drain(sk, rs, ctx);
-+	bbr_update_cycle_phase(sk, rs, ctx);
-+	bbr_update_min_rtt(sk, rs);
-+}
-+
-+/* Fast path for app-limited case.
-+ *
-+ * On each ack, we execute bbr state machine, which primarily consists of:
-+ * 1) update model based on new rate sample, and
-+ * 2) update control based on updated model or state change.
-+ *
-+ * There are certain workload/scenarios, e.g. app-limited case, where
-+ * either we can skip updating model or we can skip update of both model
-+ * as well as control. This provides signifcant softirq cpu savings for
-+ * processing incoming acks.
-+ *
-+ * In case of app-limited, if there is no congestion (loss/ecn) and
-+ * if observed bw sample is less than current estimated bw, then we can
-+ * skip some of the computation in bbr state processing:
-+ *
-+ * - if there is no rtt/mode/phase change: In this case, since all the
-+ *   parameters of the network model are constant, we can skip model
-+ *   as well control update.
-+ *
-+ * - else we can skip rest of the model update. But we still need to
-+ *   update the control to account for the new rtt/mode/phase.
-+ *
-+ * Returns whether we can take fast path or not.
-+ */
-+static bool bbr_run_fast_path(struct sock *sk, bool *update_model,
-+		const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	u32 prev_min_rtt_us, prev_mode;
-+
-+	if (bbr_param(sk, fast_path) && bbr->try_fast_path &&
-+	    rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) &&
-+	    !bbr->loss_in_round && !bbr->ecn_in_round ) {
-+		prev_mode = bbr->mode;
-+		prev_min_rtt_us = bbr->min_rtt_us;
-+		bbr_check_drain(sk, rs, ctx);
-+		bbr_update_cycle_phase(sk, rs, ctx);
-+		bbr_update_min_rtt(sk, rs);
-+
-+		if (bbr->mode == prev_mode &&
-+		    bbr->min_rtt_us == prev_min_rtt_us &&
-+		    bbr->try_fast_path) {
-+			return true;
-+		}
-+
-+		/* Skip model update, but control still needs to be updated */
-+		*update_model = false;
-+	}
-+	return false;
-+}
-+
-+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	struct bbr_context ctx = { 0 };
-+	bool update_model = true;
-+	u32 bw, round_delivered;
-+	int ce_ratio = -1;
-+
-+	round_delivered = bbr_update_round_start(sk, rs, &ctx);
-+	if (bbr->round_start) {
-+		bbr->rounds_since_probe =
-+			min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
-+		ce_ratio = bbr_update_ecn_alpha(sk);
-+	}
-+	bbr_plb(sk, rs, ce_ratio);
-+
-+	bbr->ecn_in_round  |= (bbr->ecn_eligible && rs->is_ece);
-+	bbr_calculate_bw_sample(sk, rs, &ctx);
-+	bbr_update_latest_delivery_signals(sk, rs, &ctx);
-+
-+	if (bbr_run_fast_path(sk, &update_model, rs, &ctx))
-+		goto out;
-+
-+	if (update_model)
-+		bbr_update_model(sk, rs, &ctx);
-+
-+	bbr_update_gains(sk);
-+	bw = bbr_bw(sk);
-+	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-+	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain,
-+		     tcp_snd_cwnd(tp), &ctx);
-+	bbr_bound_cwnd_for_inflight_model(sk);
-+
-+out:
-+	bbr_advance_latest_delivery_signals(sk, rs, &ctx);
-+	bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state;
-+	bbr->loss_in_cycle |= rs->lost > 0;
-+	bbr->ecn_in_cycle  |= rs->delivered_ce > 0;
-+}
-+
-+__bpf_kfunc static void bbr_init(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr->initialized = 1;
-+
-+	bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp));
-+	bbr->prior_cwnd = tp->prior_cwnd;
-+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-+	bbr->next_rtt_delivered = tp->delivered;
-+	bbr->prev_ca_state = TCP_CA_Open;
-+
-+	bbr->probe_rtt_done_stamp = 0;
-+	bbr->probe_rtt_round_done = 0;
-+	bbr->probe_rtt_min_us = tcp_min_rtt(tp);
-+	bbr->probe_rtt_min_stamp = tcp_jiffies32;
-+	bbr->min_rtt_us = tcp_min_rtt(tp);
-+	bbr->min_rtt_stamp = tcp_jiffies32;
-+
-+	bbr->has_seen_rtt = 0;
-+	bbr_init_pacing_rate_from_rtt(sk);
-+
-+	bbr->round_start = 0;
-+	bbr->idle_restart = 0;
-+	bbr->full_bw_reached = 0;
-+	bbr->full_bw = 0;
- 	bbr->full_bw_cnt = 0;
--	bbr_reset_lt_bw_sampling(sk);
--	return tcp_snd_cwnd(tcp_sk(sk));
-+	bbr->cycle_mstamp = 0;
-+	bbr->cycle_idx = 0;
-+
-+	bbr_reset_startup_mode(sk);
-+
-+	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
-+	bbr->ack_epoch_acked = 0;
-+	bbr->extra_acked_win_rtts = 0;
-+	bbr->extra_acked_win_idx = 0;
-+	bbr->extra_acked[0] = 0;
-+	bbr->extra_acked[1] = 0;
-+
-+	bbr->ce_state = 0;
-+	bbr->prior_rcv_nxt = tp->rcv_nxt;
-+	bbr->try_fast_path = 0;
-+
-+	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
-+
-+	/* Start sampling ECN mark rate after first full flight is ACKed: */
-+	bbr->loss_round_delivered = tp->delivered + 1;
-+	bbr->loss_round_start = 0;
-+	bbr->undo_bw_lo = 0;
-+	bbr->undo_inflight_lo = 0;
-+	bbr->undo_inflight_hi = 0;
-+	bbr->loss_events_in_round = 0;
-+	bbr->startup_ecn_rounds = 0;
-+	bbr_reset_congestion_signals(sk);
-+	bbr->bw_lo = ~0U;
-+	bbr->bw_hi[0] = 0;
-+	bbr->bw_hi[1] = 0;
-+	bbr->inflight_lo = ~0U;
-+	bbr->inflight_hi = ~0U;
-+	bbr_reset_full_bw(sk);
-+	bbr->bw_probe_up_cnt = ~0U;
-+	bbr->bw_probe_up_acks = 0;
-+	bbr->bw_probe_up_rounds = 0;
-+	bbr->probe_wait_us = 0;
-+	bbr->stopped_risky_probe = 0;
-+	bbr->ack_phase = BBR_ACKS_INIT;
-+	bbr->rounds_since_probe = 0;
-+	bbr->bw_probe_samples = 0;
-+	bbr->prev_probe_too_high = 0;
-+	bbr->ecn_eligible = 0;
-+	bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init);
-+	bbr->alpha_last_delivered = 0;
-+	bbr->alpha_last_delivered_ce = 0;
-+	bbr->plb.pause_until = 0;
-+
-+	tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
-+
-+	if (bbr_can_use_ecn(sk))
-+		tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
-+}
-+
-+/* BBR marks the current round trip as a loss round. */
-+static void bbr_note_loss(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	/* Capture "current" data over the full round trip of loss, to
-+	 * have a better chance of observing the full capacity of the path.
-+	 */
-+	if (!bbr->loss_in_round)  /* first loss in this round trip? */
-+		bbr->loss_round_delivered = tp->delivered;  /* set round trip */
-+	bbr->loss_in_round = 1;
-+	bbr->loss_in_cycle = 1;
- }
- 
--/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
-+/* Core TCP stack informs us that the given skb was just marked lost. */
-+__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk,
-+					    const struct sk_buff *skb)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
-+	struct rate_sample rs = {};
-+
-+	bbr_note_loss(sk);
-+
-+	if (!bbr->bw_probe_samples)
-+		return;  /* not an skb sent while probing for bandwidth */
-+	if (unlikely(!scb->tx.delivered_mstamp))
-+		return;  /* skb was SACKed, reneged, marked lost; ignore it */
-+	/* We are probing for bandwidth. Construct a rate sample that
-+	 * estimates what happened in the flight leading up to this lost skb,
-+	 * then see if the loss rate went too high, and if so at which packet.
-+	 */
-+	rs.tx_in_flight = scb->tx.in_flight;
-+	rs.lost = tp->lost - scb->tx.lost;
-+	rs.is_app_limited = scb->tx.is_app_limited;
-+	if (bbr_is_inflight_too_high(sk, &rs)) {
-+		rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb);
-+		bbr_handle_inflight_too_high(sk, &rs);
-+	}
-+}
-+
-+static void bbr_run_loss_probe_recovery(struct sock *sk)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	struct bbr *bbr = inet_csk_ca(sk);
-+	struct rate_sample rs = {0};
-+
-+	bbr_note_loss(sk);
-+
-+	if (!bbr->bw_probe_samples)
-+		return;  /* not sent while probing for bandwidth */
-+	/* We are probing for bandwidth. Construct a rate sample that
-+	 * estimates what happened in the flight leading up to this
-+	 * loss, then see if the loss rate went too high.
-+	 */
-+	rs.lost = 1;	/* TLP probe repaired loss of a single segment */
-+	rs.tx_in_flight = bbr->inflight_latest + rs.lost;
-+	rs.is_app_limited = tp->tlp_orig_data_app_limited;
-+	if (bbr_is_inflight_too_high(sk, &rs))
-+		bbr_handle_inflight_too_high(sk, &rs);
-+}
-+
-+/* Revert short-term model if current loss recovery event was spurious. */
-+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
-+{
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
-+	bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */
-+	bbr->loss_in_round = 0;
-+
-+	/* Revert to cwnd and other state saved before loss episode. */
-+	bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo);
-+	bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo);
-+	bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi);
-+	bbr->try_fast_path = 0;  /* take slow path to set proper cwnd, pacing */
-+	return bbr->prior_cwnd;
-+}
-+
-+/* Entering loss recovery, so save state for when we undo recovery. */
- __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
- {
-+	struct bbr *bbr = inet_csk_ca(sk);
-+
- 	bbr_save_cwnd(sk);
-+	/* For undo, save state that adapts based on loss signal. */
-+	bbr->undo_bw_lo		= bbr->bw_lo;
-+	bbr->undo_inflight_lo	= bbr->inflight_lo;
-+	bbr->undo_inflight_hi	= bbr->inflight_hi;
- 	return tcp_sk(sk)->snd_ssthresh;
- }
- 
-+static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr)
-+{
-+	switch (bbr->mode) {
-+	case BBR_STARTUP:
-+		return BBR_PHASE_STARTUP;
-+	case BBR_DRAIN:
-+		return BBR_PHASE_DRAIN;
-+	case BBR_PROBE_BW:
-+		break;
-+	case BBR_PROBE_RTT:
-+		return BBR_PHASE_PROBE_RTT;
-+	default:
-+		return BBR_PHASE_INVALID;
-+	}
-+	switch (bbr->cycle_idx) {
-+	case BBR_BW_PROBE_UP:
-+		return BBR_PHASE_PROBE_BW_UP;
-+	case BBR_BW_PROBE_DOWN:
-+		return BBR_PHASE_PROBE_BW_DOWN;
-+	case BBR_BW_PROBE_CRUISE:
-+		return BBR_PHASE_PROBE_BW_CRUISE;
-+	case BBR_BW_PROBE_REFILL:
-+		return BBR_PHASE_PROBE_BW_REFILL;
-+	default:
-+		return BBR_PHASE_INVALID;
-+	}
-+}
-+
- static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
--			   union tcp_cc_info *info)
-+			    union tcp_cc_info *info)
- {
- 	if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
- 	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
--		struct tcp_sock *tp = tcp_sk(sk);
- 		struct bbr *bbr = inet_csk_ca(sk);
--		u64 bw = bbr_bw(sk);
--
--		bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
--		memset(&info->bbr, 0, sizeof(info->bbr));
--		info->bbr.bbr_bw_lo		= (u32)bw;
--		info->bbr.bbr_bw_hi		= (u32)(bw >> 32);
--		info->bbr.bbr_min_rtt		= bbr->min_rtt_us;
--		info->bbr.bbr_pacing_gain	= bbr->pacing_gain;
--		info->bbr.bbr_cwnd_gain		= bbr->cwnd_gain;
-+		u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk));
-+		u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk));
-+		u64 bw_lo = bbr->bw_lo == ~0U ?
-+			~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo);
-+		struct tcp_bbr_info *bbr_info = &info->bbr;
-+
-+		memset(bbr_info, 0, sizeof(*bbr_info));
-+		bbr_info->bbr_bw_lo		= (u32)bw;
-+		bbr_info->bbr_bw_hi		= (u32)(bw >> 32);
-+		bbr_info->bbr_min_rtt		= bbr->min_rtt_us;
-+		bbr_info->bbr_pacing_gain	= bbr->pacing_gain;
-+		bbr_info->bbr_cwnd_gain		= bbr->cwnd_gain;
-+		bbr_info->bbr_bw_hi_lsb		= (u32)bw_hi;
-+		bbr_info->bbr_bw_hi_msb		= (u32)(bw_hi >> 32);
-+		bbr_info->bbr_bw_lo_lsb		= (u32)bw_lo;
-+		bbr_info->bbr_bw_lo_msb		= (u32)(bw_lo >> 32);
-+		bbr_info->bbr_mode		= bbr->mode;
-+		bbr_info->bbr_phase		= (__u8)bbr_get_phase(bbr);
-+		bbr_info->bbr_version		= (__u8)BBR_VERSION;
-+		bbr_info->bbr_inflight_lo	= bbr->inflight_lo;
-+		bbr_info->bbr_inflight_hi	= bbr->inflight_hi;
-+		bbr_info->bbr_extra_acked	= bbr_extra_acked(sk);
- 		*attr = INET_DIAG_BBRINFO;
--		return sizeof(info->bbr);
-+		return sizeof(*bbr_info);
- 	}
- 	return 0;
- }
- 
- __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
- {
-+	struct tcp_sock *tp = tcp_sk(sk);
- 	struct bbr *bbr = inet_csk_ca(sk);
- 
- 	if (new_state == TCP_CA_Loss) {
--		struct rate_sample rs = { .losses = 1 };
- 
- 		bbr->prev_ca_state = TCP_CA_Loss;
--		bbr->full_bw = 0;
--		bbr->round_start = 1;	/* treat RTO like end of a round */
--		bbr_lt_bw_sampling(sk, &rs);
-+		tcp_plb_update_state_upon_rto(sk, &bbr->plb);
-+		/* The tcp_write_timeout() call to sk_rethink_txhash() likely
-+		 * repathed this flow, so re-learn the min network RTT on the
-+		 * new path:
-+		 */
-+		bbr_reset_full_bw(sk);
-+		if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
-+			/* bbr_adapt_lower_bounds() needs cwnd before
-+			 * we suffered an RTO, to update inflight_lo:
-+			 */
-+			bbr->inflight_lo =
-+				max(tcp_snd_cwnd(tp), bbr->prior_cwnd);
-+		}
-+	} else if (bbr->prev_ca_state == TCP_CA_Loss &&
-+		   new_state != TCP_CA_Loss) {
-+		bbr_exit_loss_recovery(sk);
- 	}
- }
- 
-+
- static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
--	.flags		= TCP_CONG_NON_RESTRICTED,
-+	.flags		= TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS,
- 	.name		= "bbr",
- 	.owner		= THIS_MODULE,
- 	.init		= bbr_init,
- 	.cong_control	= bbr_main,
- 	.sndbuf_expand	= bbr_sndbuf_expand,
-+	.skb_marked_lost = bbr_skb_marked_lost,
- 	.undo_cwnd	= bbr_undo_cwnd,
- 	.cwnd_event	= bbr_cwnd_event,
- 	.ssthresh	= bbr_ssthresh,
--	.min_tso_segs	= bbr_min_tso_segs,
-+	.tso_segs	= bbr_tso_segs,
- 	.get_info	= bbr_get_info,
- 	.set_state	= bbr_set_state,
- };
-@@ -1159,10 +2359,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
- BTF_ID_FLAGS(func, bbr_init)
- BTF_ID_FLAGS(func, bbr_main)
- BTF_ID_FLAGS(func, bbr_sndbuf_expand)
-+BTF_ID_FLAGS(func, bbr_skb_marked_lost)
- BTF_ID_FLAGS(func, bbr_undo_cwnd)
- BTF_ID_FLAGS(func, bbr_cwnd_event)
- BTF_ID_FLAGS(func, bbr_ssthresh)
--BTF_ID_FLAGS(func, bbr_min_tso_segs)
-+BTF_ID_FLAGS(func, bbr_tso_segs)
- BTF_ID_FLAGS(func, bbr_set_state)
- BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
- 
-@@ -1195,5 +2396,12 @@ MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
- MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
- MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
- MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
-+MODULE_AUTHOR("Priyaranjan Jha <priyarjha@google.com>");
-+MODULE_AUTHOR("Yousuk Seung <ysseung@google.com>");
-+MODULE_AUTHOR("Kevin Yang <yyd@google.com>");
-+MODULE_AUTHOR("Arjun Roy <arjunroy@google.com>");
-+MODULE_AUTHOR("David Morley <morleyd@google.com>");
-+
- MODULE_LICENSE("Dual BSD/GPL");
- MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
-+MODULE_VERSION(__stringify(BBR_VERSION));
-diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
-index 28ffcfbeef14..7b13915ba288 100644
---- a/net/ipv4/tcp_cong.c
-+++ b/net/ipv4/tcp_cong.c
-@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk)
- 	struct inet_connection_sock *icsk = inet_csk(sk);
- 
- 	tcp_sk(sk)->prior_ssthresh = 0;
-+	tcp_sk(sk)->fast_ack_mode = 0;
- 	if (icsk->icsk_ca_ops->init)
- 		icsk->icsk_ca_ops->init(sk);
- 	if (tcp_ca_needs_ecn(sk))
-diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 38da23f991d6..37d2b393088a 100644
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -365,7 +365,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
- 			tcp_enter_quickack_mode(sk, 2);
- 		break;
- 	case INET_ECN_CE:
--		if (tcp_ca_needs_ecn(sk))
-+		if (tcp_ca_wants_ce_events(sk))
- 			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
- 
- 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
-@@ -376,7 +376,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
- 		tp->ecn_flags |= TCP_ECN_SEEN;
- 		break;
- 	default:
--		if (tcp_ca_needs_ecn(sk))
-+		if (tcp_ca_wants_ce_events(sk))
- 			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
- 		tp->ecn_flags |= TCP_ECN_SEEN;
- 		break;
-@@ -1115,7 +1115,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
-  */
- static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
- {
-+	struct sock *sk = (struct sock *)tp;
-+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-+
- 	tp->lost += tcp_skb_pcount(skb);
-+	if (ca_ops->skb_marked_lost)
-+		ca_ops->skb_marked_lost(sk, skb);
- }
- 
- void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
-@@ -1496,6 +1501,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
- 	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
- 	tcp_skb_pcount_add(skb, -pcount);
- 
-+	/* Adjust tx.in_flight as pcount is shifted from skb to prev. */
-+	if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
-+		      "prev in_flight: %u skb in_flight: %u pcount: %u",
-+		      TCP_SKB_CB(prev)->tx.in_flight,
-+		      TCP_SKB_CB(skb)->tx.in_flight,
-+		      pcount))
-+		TCP_SKB_CB(skb)->tx.in_flight = 0;
-+	else
-+		TCP_SKB_CB(skb)->tx.in_flight -= pcount;
-+	TCP_SKB_CB(prev)->tx.in_flight += pcount;
-+
- 	/* When we're adding to gso_segs == 1, gso_size will be zero,
- 	 * in theory this shouldn't be necessary but as long as DSACK
- 	 * code can come after this skb later on it's better to keep
-@@ -3790,7 +3806,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
- /* This routine deals with acks during a TLP episode and ends an episode by
-  * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
-  */
--static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
-+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
-+				struct rate_sample *rs)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 
-@@ -3807,6 +3824,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
- 		/* ACK advances: there was a loss, so reduce cwnd. Reset
- 		 * tlp_high_seq in tcp_init_cwnd_reduction()
- 		 */
-+		tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
- 		tcp_init_cwnd_reduction(sk);
- 		tcp_set_ca_state(sk, TCP_CA_CWR);
- 		tcp_end_cwnd_reduction(sk);
-@@ -3817,6 +3835,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
- 			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
- 		/* Pure dupack: original and TLP probe arrived; no loss */
- 		tp->tlp_high_seq = 0;
-+	} else {
-+		/* This ACK matches a TLP retransmit. We cannot yet tell if
-+		 * this ACK is for the original or the TLP retransmit.
-+		 */
-+		rs->is_acking_tlp_retrans_seq = 1;
- 	}
- }
- 
-@@ -3925,6 +3948,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
- 
- 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
- 	rs.prior_in_flight = tcp_packets_in_flight(tp);
-+	tcp_rate_check_app_limited(sk);
- 
- 	/* ts_recent update must be made after we are sure that the packet
- 	 * is in window.
-@@ -3999,7 +4023,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
- 	tcp_rack_update_reo_wnd(sk, &rs);
- 
- 	if (tp->tlp_high_seq)
--		tcp_process_tlp_ack(sk, ack, flag);
-+		tcp_process_tlp_ack(sk, ack, flag, &rs);
- 
- 	if (tcp_ack_is_dubious(sk, flag)) {
- 		if (!(flag & (FLAG_SND_UNA_ADVANCED |
-@@ -4023,6 +4047,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
- 	delivered = tcp_newly_delivered(sk, delivered, flag);
- 	lost = tp->lost - lost;			/* freshly marked lost */
- 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
-+	rs.is_ece = !!(flag & FLAG_ECE);
- 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
- 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
- 	tcp_xmit_recovery(sk, rexmit);
-@@ -4042,7 +4067,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
- 	tcp_ack_probe(sk);
- 
- 	if (tp->tlp_high_seq)
--		tcp_process_tlp_ack(sk, ack, flag);
-+		tcp_process_tlp_ack(sk, ack, flag, &rs);
- 	return 1;
- 
- old_ack:
-@@ -5704,13 +5729,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
- 
- 	    /* More than one full frame received... */
- 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
-+	     (tp->fast_ack_mode == 1 ||
- 	     /* ... and right edge of window advances far enough.
- 	      * (tcp_recvmsg() will send ACK otherwise).
- 	      * If application uses SO_RCVLOWAT, we want send ack now if
- 	      * we have not received enough bytes to satisfy the condition.
- 	      */
--	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
--	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
-+	      (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
-+	       __tcp_select_window(sk) >= tp->rcv_wnd))) ||
- 	    /* We ACK each frame or... */
- 	    tcp_in_quickack_mode(sk) ||
- 	    /* Protocol state mandates a one-time immediate ACK */
-diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
-index 538c06f95918..e4c861c071ae 100644
---- a/net/ipv4/tcp_minisocks.c
-+++ b/net/ipv4/tcp_minisocks.c
-@@ -460,6 +460,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
- 	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
- 	bool ca_got_dst = false;
- 
-+	tcp_set_ecn_low_from_dst(sk, dst);
-+
- 	if (ca_key != TCP_CA_UNSPEC) {
- 		const struct tcp_congestion_ops *ca;
- 
-diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
-index 95618d0e78e4..3f4bdd2b6476 100644
---- a/net/ipv4/tcp_output.c
-+++ b/net/ipv4/tcp_output.c
-@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
- 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
- 	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
- 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
-+	const struct dst_entry *dst = __sk_dst_get(sk);
- 
- 	if (!use_ecn) {
--		const struct dst_entry *dst = __sk_dst_get(sk);
--
- 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
- 			use_ecn = true;
- 	}
-@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
- 		tp->ecn_flags = TCP_ECN_OK;
- 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- 			INET_ECN_xmit(sk);
-+
-+		if (dst)
-+			tcp_set_ecn_low_from_dst(sk, dst);
- 	}
- }
- 
-@@ -388,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
- 				th->cwr = 1;
- 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
- 			}
--		} else if (!tcp_ca_needs_ecn(sk)) {
-+		} else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
-+			!tcp_ca_needs_ecn(sk)) {
- 			/* ACK or retransmitted segment: clear ECT|CE */
- 			INET_ECN_dontxmit(sk);
- 		}
-@@ -1601,7 +1604,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	struct sk_buff *buff;
--	int old_factor;
-+	int old_factor, inflight_prev;
- 	long limit;
- 	int nlen;
- 	u8 flags;
-@@ -1676,6 +1679,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
- 
- 		if (diff)
- 			tcp_adjust_pcount(sk, skb, diff);
-+
-+		inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
-+		if (inflight_prev < 0) {
-+			WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
-+					  old_factor,
-+					  TCP_SKB_CB(skb)->sacked,
-+					  TCP_SKB_CB(skb)->tx.in_flight),
-+				  "inconsistent: tx.in_flight: %u "
-+				  "old_factor: %d mss: %u sacked: %u "
-+				  "1st pcount: %d 2nd pcount: %d "
-+				  "1st len: %u 2nd len: %u ",
-+				  TCP_SKB_CB(skb)->tx.in_flight, old_factor,
-+				  mss_now, TCP_SKB_CB(skb)->sacked,
-+				  tcp_skb_pcount(skb), tcp_skb_pcount(buff),
-+				  skb->len, buff->len);
-+			inflight_prev = 0;
-+		}
-+		/* Set 1st tx.in_flight as if 1st were sent by itself: */
-+		TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
-+						 tcp_skb_pcount(skb);
-+		/* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
-+		TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
-+						 tcp_skb_pcount(skb) +
-+						 tcp_skb_pcount(buff);
- 	}
- 
- 	/* Link BUFF into the send queue. */
-@@ -2033,13 +2060,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
- {
- 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
--	u32 min_tso, tso_segs;
--
--	min_tso = ca_ops->min_tso_segs ?
--			ca_ops->min_tso_segs(sk) :
--			READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
-+	u32 tso_segs;
- 
--	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
-+	tso_segs = ca_ops->tso_segs ?
-+		ca_ops->tso_segs(sk, mss_now) :
-+		tcp_tso_autosize(sk, mss_now,
-+				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
- 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
- }
- 
-@@ -2767,6 +2793,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
- 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
- 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
- 			tcp_init_tso_segs(skb, mss_now);
-+			tcp_set_tx_in_flight(sk, skb);
- 			goto repair; /* Skip network transmission */
- 		}
- 
-@@ -2981,6 +3008,7 @@ void tcp_send_loss_probe(struct sock *sk)
- 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
- 		goto rearm_timer;
- 
-+	tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
- 	if (__tcp_retransmit_skb(sk, skb, 1))
- 		goto rearm_timer;
- 
-diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
-index a8f6d9d06f2e..8737f2134648 100644
---- a/net/ipv4/tcp_rate.c
-+++ b/net/ipv4/tcp_rate.c
-@@ -34,6 +34,24 @@
-  * ready to send in the write queue.
-  */
- 
-+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
-+{
-+	struct tcp_sock *tp = tcp_sk(sk);
-+	u32 in_flight;
-+
-+	/* Check, sanitize, and record packets in flight after skb was sent. */
-+	in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
-+	if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
-+		      "insane in_flight %u cc %s mss %u "
-+		      "cwnd %u pif %u %u %u %u\n",
-+		      in_flight, inet_csk(sk)->icsk_ca_ops->name,
-+		      tp->mss_cache, tp->snd_cwnd,
-+		      tp->packets_out, tp->retrans_out,
-+		      tp->sacked_out, tp->lost_out))
-+		in_flight = TCPCB_IN_FLIGHT_MAX;
-+	TCP_SKB_CB(skb)->tx.in_flight = in_flight;
-+}
-+
- /* Snapshot the current delivery information in the skb, to generate
-  * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
-  */
-@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
- 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
- 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
- 	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
-+	TCP_SKB_CB(skb)->tx.lost		= tp->lost;
- 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
-+	tcp_set_tx_in_flight(sk, skb);
- }
- 
- /* When an skb is sacked or acked, we fill in the rate sample with the (prior)
-@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
- 	if (!rs->prior_delivered ||
- 	    tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
- 			       scb->end_seq, rs->last_end_seq)) {
-+		rs->prior_lost	     = scb->tx.lost;
- 		rs->prior_delivered_ce  = scb->tx.delivered_ce;
- 		rs->prior_delivered  = scb->tx.delivered;
- 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
- 		rs->is_app_limited   = scb->tx.is_app_limited;
- 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
-+		rs->tx_in_flight     = scb->tx.in_flight;
- 		rs->last_end_seq     = scb->end_seq;
- 
- 		/* Record send time of most recently ACKed packet: */
- 		tp->first_tx_mstamp  = tx_tstamp;
- 		/* Find the duration of the "send phase" of this window: */
--		rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
--						     scb->tx.first_tx_mstamp);
-+		rs->interval_us      = tcp_stamp32_us_delta(
-+						tp->first_tx_mstamp,
-+						scb->tx.first_tx_mstamp);
- 
- 	}
- 	/* Mark off the skb delivered once it's sacked to avoid being
-@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- 		return;
- 	}
- 	rs->delivered   = tp->delivered - rs->prior_delivered;
-+	rs->lost        = tp->lost - rs->prior_lost;
- 
- 	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
- 	/* delivered_ce occupies less than 32 bits in the skb control block */
-@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- 	 * longer phase.
- 	 */
- 	snd_us = rs->interval_us;				/* send phase */
--	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
-+	ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
- 				    rs->prior_mstamp); /* ack phase */
- 	rs->interval_us = max(snd_us, ack_us);
- 
-diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
-index 892c86657fbc..33c2c9252364 100644
---- a/net/ipv4/tcp_timer.c
-+++ b/net/ipv4/tcp_timer.c
-@@ -693,6 +693,7 @@ void tcp_write_timer_handler(struct sock *sk)
- 		return;
- 	}
- 
-+	tcp_rate_check_app_limited(sk);
- 	tcp_mstamp_refresh(tcp_sk(sk));
- 	event = icsk->icsk_pending;
- 
--- 
-2.46.0.rc1
-
-From 3bf203491864f9a7c6c234128a2d82fb8f448683 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:23:33 +0200
-Subject: [PATCH 03/11] block
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- block/bfq-iosched.c | 120 ++++++++++++++++++++++++++++++++++++--------
- block/bfq-iosched.h |  16 +++++-
- block/mq-deadline.c | 110 +++++++++++++++++++++++++++++++++-------
- 3 files changed, 203 insertions(+), 43 deletions(-)
-
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index 4b88a54a9b76..88df08a246fa 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -467,6 +467,21 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
- 	return icq;
- }
- 
-+static struct bfq_io_cq *bfq_bic_try_lookup(struct request_queue *q)
-+{
-+	if (!current->io_context)
-+		return NULL;
-+	if (spin_trylock_irq(&q->queue_lock)) {
-+		struct bfq_io_cq *icq;
-+
-+		icq = icq_to_bic(ioc_lookup_icq(q));
-+		spin_unlock_irq(&q->queue_lock);
-+		return icq;
-+	}
-+
-+	return NULL;
-+}
-+
- /*
-  * Scheduler run of queue, if there are requests pending and no one in the
-  * driver that will restart queueing.
-@@ -2454,10 +2469,21 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
- 	 * returned by bfq_bic_lookup does not go away before
- 	 * bfqd->lock is taken.
- 	 */
--	struct bfq_io_cq *bic = bfq_bic_lookup(q);
-+	struct bfq_io_cq *bic = bfq_bic_try_lookup(q);
- 	bool ret;
- 
--	spin_lock_irq(&bfqd->lock);
-+	/*
-+	 * bio merging is called for every bio queued, and it's very easy
-+	 * to run into contention because of that. If we fail getting
-+	 * the dd lock, just skip this merge attempt. For related IO, the
-+	 * plug will be the successful merging point. If we get here, we
-+	 * already failed doing the obvious merge. Chances of actually
-+	 * getting a merge off this path is a lot slimmer, so skipping an
-+	 * occassional lookup that will most likely not succeed anyway should
-+	 * not be a problem.
-+	 */
-+	if (!spin_trylock_irq(&bfqd->lock))
-+		return false;
- 
- 	if (bic) {
- 		/*
-@@ -5148,6 +5174,10 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
- {
- 	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
- 
-+	if (!list_empty_careful(&bfqd->at_head) ||
-+	    !list_empty_careful(&bfqd->at_tail))
-+		return true;
-+
- 	/*
- 	 * Avoiding lock: a race on bfqd->queued should cause at
- 	 * most a call to dispatch for nothing
-@@ -5297,15 +5327,61 @@ static inline void bfq_update_dispatch_stats(struct request_queue *q,
- 					     bool idle_timer_disabled) {}
- #endif /* CONFIG_BFQ_CGROUP_DEBUG */
- 
-+static void bfq_insert_request(struct request_queue *q, struct request *rq,
-+			       blk_insert_t flags, struct list_head *free);
-+
-+static void __bfq_do_insert(struct request_queue *q, blk_insert_t flags,
-+			    struct list_head *list, struct list_head *free)
-+{
-+	while (!list_empty(list)) {
-+		struct request *rq;
-+
-+		rq = list_first_entry(list, struct request, queuelist);
-+		list_del_init(&rq->queuelist);
-+		bfq_insert_request(q, rq, flags, free);
-+	}
-+}
-+
-+static void bfq_do_insert(struct request_queue *q, struct list_head *free)
-+{
-+	struct bfq_data *bfqd = q->elevator->elevator_data;
-+	LIST_HEAD(at_head);
-+	LIST_HEAD(at_tail);
-+
-+	spin_lock(&bfqd->insert_lock);
-+	list_splice_init(&bfqd->at_head, &at_head);
-+	list_splice_init(&bfqd->at_tail, &at_tail);
-+	spin_unlock(&bfqd->insert_lock);
-+
-+	__bfq_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
-+	__bfq_do_insert(q, 0, &at_tail, free);
-+}
-+
- static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
- {
--	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
-+	struct request_queue *q = hctx->queue;
-+	struct bfq_data *bfqd = q->elevator->elevator_data;
- 	struct request *rq;
- 	struct bfq_queue *in_serv_queue;
- 	bool waiting_rq, idle_timer_disabled = false;
-+	LIST_HEAD(free);
-+
-+	/*
-+	 * If someone else is already dispatching, skip this one. This will
-+	 * defer the next dispatch event to when something completes, and could
-+	 * potentially lower the queue depth for contended cases.
-+	 *
-+	 * See the logic in blk_mq_do_dispatch_sched(), which loops and
-+	 * retries if nothing is dispatched.
-+	 */
-+	if (test_bit(BFQ_DISPATCHING, &bfqd->run_state) ||
-+	    test_and_set_bit_lock(BFQ_DISPATCHING, &bfqd->run_state))
-+		return NULL;
- 
- 	spin_lock_irq(&bfqd->lock);
- 
-+	bfq_do_insert(hctx->queue, &free);
-+
- 	in_serv_queue = bfqd->in_service_queue;
- 	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
- 
-@@ -5315,7 +5391,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
- 			waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
- 	}
- 
-+	clear_bit_unlock(BFQ_DISPATCHING, &bfqd->run_state);
- 	spin_unlock_irq(&bfqd->lock);
-+	blk_mq_free_requests(&free);
- 	bfq_update_dispatch_stats(hctx->queue, rq,
- 			idle_timer_disabled ? in_serv_queue : NULL,
- 				idle_timer_disabled);
-@@ -6236,27 +6314,21 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
- 
- static struct bfq_queue *bfq_init_rq(struct request *rq);
- 
--static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
--			       blk_insert_t flags)
-+static void bfq_insert_request(struct request_queue *q, struct request *rq,
-+			       blk_insert_t flags, struct list_head *free)
- {
--	struct request_queue *q = hctx->queue;
- 	struct bfq_data *bfqd = q->elevator->elevator_data;
- 	struct bfq_queue *bfqq;
- 	bool idle_timer_disabled = false;
- 	blk_opf_t cmd_flags;
--	LIST_HEAD(free);
- 
- #ifdef CONFIG_BFQ_GROUP_IOSCHED
- 	if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
- 		bfqg_stats_update_legacy_io(q, rq);
- #endif
--	spin_lock_irq(&bfqd->lock);
- 	bfqq = bfq_init_rq(rq);
--	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
--		spin_unlock_irq(&bfqd->lock);
--		blk_mq_free_requests(&free);
-+	if (blk_mq_sched_try_insert_merge(q, rq, free))
- 		return;
--	}
- 
- 	trace_block_rq_insert(rq);
- 
-@@ -6286,8 +6358,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
- 	 * merge).
- 	 */
- 	cmd_flags = rq->cmd_flags;
--	spin_unlock_irq(&bfqd->lock);
--
- 	bfq_update_insert_stats(q, bfqq, idle_timer_disabled,
- 				cmd_flags);
- }
-@@ -6296,13 +6366,15 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
- 				struct list_head *list,
- 				blk_insert_t flags)
- {
--	while (!list_empty(list)) {
--		struct request *rq;
-+	struct request_queue *q = hctx->queue;
-+	struct bfq_data *bfqd = q->elevator->elevator_data;
- 
--		rq = list_first_entry(list, struct request, queuelist);
--		list_del_init(&rq->queuelist);
--		bfq_insert_request(hctx, rq, flags);
--	}
-+	spin_lock_irq(&bfqd->insert_lock);
-+	if (flags & BLK_MQ_INSERT_AT_HEAD)
-+		list_splice_init(list, &bfqd->at_head);
-+	else
-+		list_splice_init(list, &bfqd->at_tail);
-+	spin_unlock_irq(&bfqd->insert_lock);
- }
- 
- static void bfq_update_hw_tag(struct bfq_data *bfqd)
-@@ -7211,6 +7283,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
- 	q->elevator = eq;
- 	spin_unlock_irq(&q->queue_lock);
- 
-+	spin_lock_init(&bfqd->lock);
-+	spin_lock_init(&bfqd->insert_lock);
-+
-+	INIT_LIST_HEAD(&bfqd->at_head);
-+	INIT_LIST_HEAD(&bfqd->at_tail);
-+
- 	/*
- 	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
- 	 * Grab a permanent reference to it, so that the normal code flow
-@@ -7329,8 +7407,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
- 	/* see comments on the definition of next field inside bfq_data */
- 	bfqd->actuator_load_threshold = 4;
- 
--	spin_lock_init(&bfqd->lock);
--
- 	/*
- 	 * The invocation of the next bfq_create_group_hierarchy
- 	 * function is the head of a chain of function calls
-diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
-index 467e8cfc41a2..f44f5d4ec2f4 100644
---- a/block/bfq-iosched.h
-+++ b/block/bfq-iosched.h
-@@ -504,12 +504,26 @@ struct bfq_io_cq {
- 	unsigned int requests;	/* Number of requests this process has in flight */
- };
- 
-+enum {
-+	BFQ_DISPATCHING	= 0,
-+};
-+
- /**
-  * struct bfq_data - per-device data structure.
-  *
-  * All the fields are protected by @lock.
-  */
- struct bfq_data {
-+	struct {
-+		spinlock_t lock;
-+		spinlock_t insert_lock;
-+	} ____cacheline_aligned_in_smp;
-+
-+	unsigned long run_state;
-+
-+	struct list_head at_head;
-+	struct list_head at_tail;
-+
- 	/* device request queue */
- 	struct request_queue *queue;
- 	/* dispatch queue */
-@@ -795,8 +809,6 @@ struct bfq_data {
- 	/* fallback dummy bfqq for extreme OOM conditions */
- 	struct bfq_queue oom_bfqq;
- 
--	spinlock_t lock;
--
- 	/*
- 	 * bic associated with the task issuing current bio for
- 	 * merging. This and the next field are used as a support to
-diff --git a/block/mq-deadline.c b/block/mq-deadline.c
-index 94eede4fb9eb..567fd69a146c 100644
---- a/block/mq-deadline.c
-+++ b/block/mq-deadline.c
-@@ -79,10 +79,23 @@ struct dd_per_prio {
- 	struct io_stats_per_prio stats;
- };
- 
-+enum {
-+	DD_DISPATCHING	= 0,
-+};
-+
- struct deadline_data {
- 	/*
- 	 * run time data
- 	 */
-+	struct {
-+		spinlock_t lock;
-+		spinlock_t insert_lock;
-+	} ____cacheline_aligned_in_smp;
-+
-+	unsigned long run_state;
-+
-+	struct list_head at_head;
-+	struct list_head at_tail;
- 
- 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
- 
-@@ -100,8 +113,6 @@ struct deadline_data {
- 	int front_merges;
- 	u32 async_depth;
- 	int prio_aging_expire;
--
--	spinlock_t lock;
- };
- 
- /* Maps an I/O priority class to a deadline scheduler priority. */
-@@ -112,6 +123,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
- 	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
- };
- 
-+static void dd_insert_request(struct request_queue *q, struct request *rq,
-+			      blk_insert_t flags, struct list_head *free);
-+
- static inline struct rb_root *
- deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
- {
-@@ -451,6 +465,33 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
- 	return NULL;
- }
- 
-+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
-+			   struct list_head *list, struct list_head *free)
-+{
-+	while (!list_empty(list)) {
-+		struct request *rq;
-+
-+		rq = list_first_entry(list, struct request, queuelist);
-+		list_del_init(&rq->queuelist);
-+		dd_insert_request(q, rq, flags, free);
-+	}
-+}
-+
-+static void dd_do_insert(struct request_queue *q, struct list_head *free)
-+{
-+	struct deadline_data *dd = q->elevator->elevator_data;
-+	LIST_HEAD(at_head);
-+	LIST_HEAD(at_tail);
-+
-+	spin_lock(&dd->insert_lock);
-+	list_splice_init(&dd->at_head, &at_head);
-+	list_splice_init(&dd->at_tail, &at_tail);
-+	spin_unlock(&dd->insert_lock);
-+
-+	__dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
-+	__dd_do_insert(q, 0, &at_tail, free);
-+}
-+
- /*
-  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
-  *
-@@ -461,12 +502,27 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
-  */
- static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
- {
--	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
-+	struct request_queue *q = hctx->queue;
-+	struct deadline_data *dd = q->elevator->elevator_data;
- 	const unsigned long now = jiffies;
- 	struct request *rq;
- 	enum dd_prio prio;
-+	LIST_HEAD(free);
-+
-+	/*
-+	 * If someone else is already dispatching, skip this one. This will
-+	 * defer the next dispatch event to when something completes, and could
-+	 * potentially lower the queue depth for contended cases.
-+	 *
-+	 * See the logic in blk_mq_do_dispatch_sched(), which loops and
-+	 * retries if nothing is dispatched.
-+	 */
-+	if (test_bit(DD_DISPATCHING, &dd->run_state) ||
-+	    test_and_set_bit_lock(DD_DISPATCHING, &dd->run_state))
-+		return NULL;
- 
- 	spin_lock(&dd->lock);
-+	dd_do_insert(q, &free);
- 	rq = dd_dispatch_prio_aged_requests(dd, now);
- 	if (rq)
- 		goto unlock;
-@@ -482,8 +538,10 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
- 	}
- 
- unlock:
-+	clear_bit_unlock(DD_DISPATCHING, &dd->run_state);
- 	spin_unlock(&dd->lock);
- 
-+	blk_mq_free_requests(&free);
- 	return rq;
- }
- 
-@@ -571,6 +629,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
- 
- 	eq->elevator_data = dd;
- 
-+	spin_lock_init(&dd->lock);
-+	spin_lock_init(&dd->insert_lock);
-+
-+	INIT_LIST_HEAD(&dd->at_head);
-+	INIT_LIST_HEAD(&dd->at_tail);
-+
- 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
- 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
- 
-@@ -587,7 +651,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
- 	dd->last_dir = DD_WRITE;
- 	dd->fifo_batch = fifo_batch;
- 	dd->prio_aging_expire = prio_aging_expire;
--	spin_lock_init(&dd->lock);
- 
- 	/* We dispatch from request queue wide instead of hw queue */
- 	blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
-@@ -643,7 +706,19 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
- 	struct request *free = NULL;
- 	bool ret;
- 
--	spin_lock(&dd->lock);
-+	/*
-+	 * bio merging is called for every bio queued, and it's very easy
-+	 * to run into contention because of that. If we fail getting
-+	 * the dd lock, just skip this merge attempt. For related IO, the
-+	 * plug will be the successful merging point. If we get here, we
-+	 * already failed doing the obvious merge. Chances of actually
-+	 * getting a merge off this path is a lot slimmer, so skipping an
-+	 * occassional lookup that will most likely not succeed anyway should
-+	 * not be a problem.
-+	 */
-+	if (!spin_trylock(&dd->lock))
-+		return false;
-+
- 	ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
- 	spin_unlock(&dd->lock);
- 
-@@ -656,10 +731,9 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
- /*
-  * add rq to rbtree and fifo
-  */
--static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
-+static void dd_insert_request(struct request_queue *q, struct request *rq,
- 			      blk_insert_t flags, struct list_head *free)
- {
--	struct request_queue *q = hctx->queue;
- 	struct deadline_data *dd = q->elevator->elevator_data;
- 	const enum dd_data_dir data_dir = rq_data_dir(rq);
- 	u16 ioprio = req_get_ioprio(rq);
-@@ -713,19 +787,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
- {
- 	struct request_queue *q = hctx->queue;
- 	struct deadline_data *dd = q->elevator->elevator_data;
--	LIST_HEAD(free);
--
--	spin_lock(&dd->lock);
--	while (!list_empty(list)) {
--		struct request *rq;
--
--		rq = list_first_entry(list, struct request, queuelist);
--		list_del_init(&rq->queuelist);
--		dd_insert_request(hctx, rq, flags, &free);
--	}
--	spin_unlock(&dd->lock);
- 
--	blk_mq_free_requests(&free);
-+	spin_lock(&dd->insert_lock);
-+	if (flags & BLK_MQ_INSERT_AT_HEAD)
-+		list_splice_init(list, &dd->at_head);
-+	else
-+		list_splice_init(list, &dd->at_tail);
-+	spin_unlock(&dd->insert_lock);
- }
- 
- /* Callback from inside blk_mq_rq_ctx_init(). */
-@@ -766,6 +834,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
- 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
- 	enum dd_prio prio;
- 
-+	if (!list_empty_careful(&dd->at_head) ||
-+	    !list_empty_careful(&dd->at_tail))
-+		return true;
-+
- 	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
- 		if (dd_has_work_for_prio(&dd->per_prio[prio]))
- 			return true;
--- 
-2.46.0.rc1
-
-From 3eb49a6c890c1da829c0ac8fe76caec909cb2103 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Fri, 19 Jul 2024 08:04:09 +0200
-Subject: [PATCH 04/11] cachy
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- .../admin-guide/kernel-parameters.txt         |   12 +
- Makefile                                      |    7 +-
- arch/x86/Kconfig.cpu                          |  432 ++-
- arch/x86/Makefile                             |   45 +-
- arch/x86/include/asm/pci.h                    |    6 +
- arch/x86/include/asm/vermagic.h               |   76 +
- arch/x86/pci/common.c                         |    7 +-
- block/bfq-iosched.c                           |    6 +
- block/elevator.c                              |   10 +
- drivers/Makefile                              |   13 +-
- drivers/ata/ahci.c                            |   23 +-
- drivers/cpufreq/Kconfig.x86                   |    2 -
- drivers/cpufreq/intel_pstate.c                |    2 +
- drivers/gpu/drm/amd/amdgpu/amdgpu.h           |    1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   10 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c       |   53 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h       |    1 +
- drivers/gpu/drm/amd/display/Kconfig           |    6 +
- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |    2 +-
- .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |    2 +-
- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |    6 +-
- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |    6 +-
- .../amd/display/dc/optc/dcn10/dcn10_optc.c    |   15 +-
- .../amd/display/dc/optc/dcn20/dcn20_optc.c    |   10 +
- drivers/gpu/drm/amd/pm/amdgpu_pm.c            |    3 +
- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   14 +-
- drivers/gpu/drm/drm_atomic_uapi.c             |   11 +-
- drivers/i2c/busses/Kconfig                    |    9 +
- drivers/i2c/busses/Makefile                   |    1 +
- drivers/i2c/busses/i2c-nct6775.c              |  648 ++++
- drivers/i2c/busses/i2c-piix4.c                |    4 +-
- drivers/input/evdev.c                         |   19 +-
- drivers/md/dm-crypt.c                         |    5 +
- drivers/media/v4l2-core/Kconfig               |    5 +
- drivers/media/v4l2-core/Makefile              |    2 +
- drivers/media/v4l2-core/v4l2loopback.c        | 3184 +++++++++++++++++
- drivers/media/v4l2-core/v4l2loopback.h        |   98 +
- .../media/v4l2-core/v4l2loopback_formats.h    |  445 +++
- drivers/pci/controller/Makefile               |    6 +
- drivers/pci/controller/intel-nvme-remap.c     |  462 +++
- drivers/pci/quirks.c                          |  101 +
- include/linux/pagemap.h                       |    2 +-
- include/linux/user_namespace.h                |    4 +
- init/Kconfig                                  |   26 +
- kernel/Kconfig.hz                             |   24 +
- kernel/fork.c                                 |   14 +
- kernel/sched/fair.c                           |   13 +
- kernel/sched/sched.h                          |    2 +-
- kernel/sysctl.c                               |   12 +
- kernel/user_namespace.c                       |    7 +
- mm/Kconfig                                    |    2 +-
- mm/compaction.c                               |    4 +
- mm/huge_memory.c                              |    4 +
- mm/page-writeback.c                           |    8 +
- mm/page_alloc.c                               |    4 +
- mm/swap.c                                     |    5 +
- mm/vmpressure.c                               |    4 +
- mm/vmscan.c                                   |    8 +
- 58 files changed, 5800 insertions(+), 113 deletions(-)
- create mode 100644 drivers/i2c/busses/i2c-nct6775.c
- create mode 100644 drivers/media/v4l2-core/v4l2loopback.c
- create mode 100644 drivers/media/v4l2-core/v4l2loopback.h
- create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h
- create mode 100644 drivers/pci/controller/intel-nvme-remap.c
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 27ec49af1bf2..07ac4c81a7dd 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2229,6 +2229,9 @@
- 			disable
- 			  Do not enable intel_pstate as the default
- 			  scaling driver for the supported processors
-+			enable
-+			  Enable intel_pstate in-case "disable" was passed
-+			  previously in the kernel boot parameters
-                         active
-                           Use intel_pstate driver to bypass the scaling
-                           governors layer of cpufreq and provides it own
-@@ -4447,6 +4450,15 @@
- 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
- 				enabled, this kernel boot option can be used to
- 				disable the use of MSI interrupts system-wide.
-+		pcie_acs_override =
-+					[PCIE] Override missing PCIe ACS support for:
-+				downstream
-+					All downstream ports - full ACS capabilities
-+				multfunction
-+					All multifunction devices - multifunction ACS subset
-+				id:nnnn:nnnn
-+					Specfic device - full ACS capabilities
-+					Specified as vid:did (vendor/device ID) in hex
- 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
- 				Safety option to keep boot IRQs enabled. This
- 				should never be necessary.
-diff --git a/Makefile b/Makefile
-index 3d10e3aadeda..b9435cef21b0 100644
---- a/Makefile
-+++ b/Makefile
-@@ -817,6 +817,9 @@ KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
- ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
- KBUILD_CFLAGS += -O2
- KBUILD_RUSTFLAGS += -Copt-level=2
-+else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
-+KBUILD_CFLAGS += -O3
-+KBUILD_RUSTFLAGS += -Copt-level=3
- else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
- KBUILD_CFLAGS += -Os
- KBUILD_RUSTFLAGS += -Copt-level=s
-@@ -1005,9 +1008,9 @@ KBUILD_CFLAGS	+= -fno-strict-overflow
- # Make sure -fstack-check isn't enabled (like gentoo apparently did)
- KBUILD_CFLAGS  += -fno-stack-check
- 
--# conserve stack if available
-+# conserve stack, ivopts and modulo-sched if available
- ifdef CONFIG_CC_IS_GCC
--KBUILD_CFLAGS   += -fconserve-stack
-+KBUILD_CFLAGS   += -fconserve-stack -fivopts -fmodulo-sched -fno-tree-vectorize
- endif
- 
- # change __FILE__ to the relative path from the srctree
-diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
-index 2a7279d80460..3b077b9f9291 100644
---- a/arch/x86/Kconfig.cpu
-+++ b/arch/x86/Kconfig.cpu
-@@ -157,7 +157,7 @@ config MPENTIUM4
- 
- 
- config MK6
--	bool "K6/K6-II/K6-III"
-+	bool "AMD K6/K6-II/K6-III"
- 	depends on X86_32
- 	help
- 	  Select this for an AMD K6-family processor.  Enables use of
-@@ -165,7 +165,7 @@ config MK6
- 	  flags to GCC.
- 
- config MK7
--	bool "Athlon/Duron/K7"
-+	bool "AMD Athlon/Duron/K7"
- 	depends on X86_32
- 	help
- 	  Select this for an AMD Athlon K7-family processor.  Enables use of
-@@ -173,12 +173,114 @@ config MK7
- 	  flags to GCC.
- 
- config MK8
--	bool "Opteron/Athlon64/Hammer/K8"
-+	bool "AMD Opteron/Athlon64/Hammer/K8"
- 	help
- 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
- 	  Enables use of some extended instructions, and passes appropriate
- 	  optimization flags to GCC.
- 
-+config MK8SSE3
-+	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
-+	help
-+	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
-+	  Enables use of some extended instructions, and passes appropriate
-+	  optimization flags to GCC.
-+
-+config MK10
-+	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
-+	help
-+	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
-+	  Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
-+	  Enables use of some extended instructions, and passes appropriate
-+	  optimization flags to GCC.
-+
-+config MBARCELONA
-+	bool "AMD Barcelona"
-+	help
-+	  Select this for AMD Family 10h Barcelona processors.
-+
-+	  Enables -march=barcelona
-+
-+config MBOBCAT
-+	bool "AMD Bobcat"
-+	help
-+	  Select this for AMD Family 14h Bobcat processors.
-+
-+	  Enables -march=btver1
-+
-+config MJAGUAR
-+	bool "AMD Jaguar"
-+	help
-+	  Select this for AMD Family 16h Jaguar processors.
-+
-+	  Enables -march=btver2
-+
-+config MBULLDOZER
-+	bool "AMD Bulldozer"
-+	help
-+	  Select this for AMD Family 15h Bulldozer processors.
-+
-+	  Enables -march=bdver1
-+
-+config MPILEDRIVER
-+	bool "AMD Piledriver"
-+	help
-+	  Select this for AMD Family 15h Piledriver processors.
-+
-+	  Enables -march=bdver2
-+
-+config MSTEAMROLLER
-+	bool "AMD Steamroller"
-+	help
-+	  Select this for AMD Family 15h Steamroller processors.
-+
-+	  Enables -march=bdver3
-+
-+config MEXCAVATOR
-+	bool "AMD Excavator"
-+	help
-+	  Select this for AMD Family 15h Excavator processors.
-+
-+	  Enables -march=bdver4
-+
-+config MZEN
-+	bool "AMD Zen"
-+	help
-+	  Select this for AMD Family 17h Zen processors.
-+
-+	  Enables -march=znver1
-+
-+config MZEN2
-+	bool "AMD Zen 2"
-+	help
-+	  Select this for AMD Family 17h Zen 2 processors.
-+
-+	  Enables -march=znver2
-+
-+config MZEN3
-+	bool "AMD Zen 3"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	help
-+	  Select this for AMD Family 19h Zen 3 processors.
-+
-+	  Enables -march=znver3
-+
-+config MZEN4
-+	bool "AMD Zen 4"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
-+	help
-+	  Select this for AMD Family 19h Zen 4 processors.
-+
-+	  Enables -march=znver4
-+
-+config MZEN5
-+	bool "AMD Zen 5"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 140000) || (CC_IS_CLANG && CLANG_VERSION >= 180000)
-+	help
-+	  Select this for AMD Family 1Ah Zen 5 processors.
-+
-+	  Enables -march=znver5
-+
- config MCRUSOE
- 	bool "Crusoe"
- 	depends on X86_32
-@@ -270,7 +372,7 @@ config MPSC
- 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
- 
- config MCORE2
--	bool "Core 2/newer Xeon"
-+	bool "Intel Core 2"
- 	help
- 
- 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -278,6 +380,8 @@ config MCORE2
- 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
- 	  (not a typo)
- 
-+	  Enables -march=core2
-+
- config MATOM
- 	bool "Intel Atom"
- 	help
-@@ -287,6 +391,212 @@ config MATOM
- 	  accordingly optimized code. Use a recent GCC with specific Atom
- 	  support in order to fully benefit from selecting this option.
- 
-+config MNEHALEM
-+	bool "Intel Nehalem"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 1st Gen Core processors in the Nehalem family.
-+
-+	  Enables -march=nehalem
-+
-+config MWESTMERE
-+	bool "Intel Westmere"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Westmere formerly Nehalem-C family.
-+
-+	  Enables -march=westmere
-+
-+config MSILVERMONT
-+	bool "Intel Silvermont"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Silvermont platform.
-+
-+	  Enables -march=silvermont
-+
-+config MGOLDMONT
-+	bool "Intel Goldmont"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
-+
-+	  Enables -march=goldmont
-+
-+config MGOLDMONTPLUS
-+	bool "Intel Goldmont Plus"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Goldmont Plus platform including Gemini Lake.
-+
-+	  Enables -march=goldmont-plus
-+
-+config MSANDYBRIDGE
-+	bool "Intel Sandy Bridge"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
-+
-+	  Enables -march=sandybridge
-+
-+config MIVYBRIDGE
-+	bool "Intel Ivy Bridge"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
-+
-+	  Enables -march=ivybridge
-+
-+config MHASWELL
-+	bool "Intel Haswell"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 4th Gen Core processors in the Haswell family.
-+
-+	  Enables -march=haswell
-+
-+config MBROADWELL
-+	bool "Intel Broadwell"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 5th Gen Core processors in the Broadwell family.
-+
-+	  Enables -march=broadwell
-+
-+config MSKYLAKE
-+	bool "Intel Skylake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 6th Gen Core processors in the Skylake family.
-+
-+	  Enables -march=skylake
-+
-+config MSKYLAKEX
-+	bool "Intel Skylake X"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 6th Gen Core processors in the Skylake X family.
-+
-+	  Enables -march=skylake-avx512
-+
-+config MCANNONLAKE
-+	bool "Intel Cannon Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 8th Gen Core processors
-+
-+	  Enables -march=cannonlake
-+
-+config MICELAKE
-+	bool "Intel Ice Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 10th Gen Core processors in the Ice Lake family.
-+
-+	  Enables -march=icelake-client
-+
-+config MCASCADELAKE
-+	bool "Intel Cascade Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for Xeon processors in the Cascade Lake family.
-+
-+	  Enables -march=cascadelake
-+
-+config MCOOPERLAKE
-+	bool "Intel Cooper Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for Xeon processors in the Cooper Lake family.
-+
-+	  Enables -march=cooperlake
-+
-+config MTIGERLAKE
-+	bool "Intel Tiger Lake"
-+	depends on  (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for third-generation 10 nm process processors in the Tiger Lake family.
-+
-+	  Enables -march=tigerlake
-+
-+config MSAPPHIRERAPIDS
-+	bool "Intel Sapphire Rapids"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for fourth-generation 10 nm process processors in the Sapphire Rapids family.
-+
-+	  Enables -march=sapphirerapids
-+
-+config MROCKETLAKE
-+	bool "Intel Rocket Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for eleventh-generation processors in the Rocket Lake family.
-+
-+	  Enables -march=rocketlake
-+
-+config MALDERLAKE
-+	bool "Intel Alder Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for twelfth-generation processors in the Alder Lake family.
-+
-+	  Enables -march=alderlake
-+
-+config MRAPTORLAKE
-+	bool "Intel Raptor Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for thirteenth-generation processors in the Raptor Lake family.
-+
-+	  Enables -march=raptorlake
-+
-+config MMETEORLAKE
-+	bool "Intel Meteor Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for fourteenth-generation processors in the Meteor Lake family.
-+
-+	  Enables -march=meteorlake
-+
-+config MEMERALDRAPIDS
-+	bool "Intel Emerald Rapids"
-+	depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for fifth-generation 10 nm process processors in the Emerald Rapids family.
-+
-+	  Enables -march=emeraldrapids
-+
- config GENERIC_CPU
- 	bool "Generic-x86-64"
- 	depends on X86_64
-@@ -294,6 +604,50 @@ config GENERIC_CPU
- 	  Generic x86-64 CPU.
- 	  Run equally well on all x86-64 CPUs.
- 
-+config GENERIC_CPU2
-+	bool "Generic-x86-64-v2"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64 CPU.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v2.
-+
-+config GENERIC_CPU3
-+	bool "Generic-x86-64-v3"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64-v3 CPU with v3 instructions.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v3.
-+
-+config GENERIC_CPU4
-+	bool "Generic-x86-64-v4"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64 CPU with v4 instructions.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v4.
-+
-+config MNATIVE_INTEL
-+	bool "Intel-Native optimizations autodetected by the compiler"
-+	help
-+
-+	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
-+	  the optimum settings to use based on your processor. Do NOT use this
-+	  for AMD CPUs.  Intel Only!
-+
-+	  Enables -march=native
-+
-+config MNATIVE_AMD
-+	bool "AMD-Native optimizations autodetected by the compiler"
-+	help
-+
-+	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
-+	  the optimum settings to use based on your processor. Do NOT use this
-+	  for Intel CPUs.  AMD Only!
-+
-+	  Enables -march=native
-+
- endchoice
- 
- config X86_GENERIC
-@@ -318,9 +672,17 @@ config X86_INTERNODE_CACHE_SHIFT
- config X86_L1_CACHE_SHIFT
- 	int
- 	default "7" if MPENTIUM4 || MPSC
--	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 \
-+	|| MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \
-+	|| MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT \
-+	|| MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \
-+	|| MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \
-+	|| MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE \
-+	|| MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 \
-+	|| GENERIC_CPU3 || GENERIC_CPU4
- 	default "4" if MELAN || M486SX || M486 || MGEODEGX1
--	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-+	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII \
-+	|| MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
- 
- config X86_F00F_BUG
- 	def_bool y
-@@ -332,15 +694,27 @@ config X86_INVD_BUG
- 
- config X86_ALIGNMENT_16
- 	def_bool y
--	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1
-+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC \
-+	|| M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1
- 
- config X86_INTEL_USERCOPY
- 	def_bool y
--	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC \
-+	|| MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \
-+	|| MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \
-+	|| MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \
-+	|| MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL
- 
- config X86_USE_PPRO_CHECKSUM
- 	def_bool y
--	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \
-+	|| MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX \
-+	|| MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \
-+	|| MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM \
-+	|| MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE \
-+	|| MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE \
-+	|| MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \
-+	|| MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
- 
- #
- # P6_NOPs are a relatively minor optimization that require a family >=
-@@ -356,11 +730,22 @@ config X86_USE_PPRO_CHECKSUM
- config X86_P6_NOP
- 	def_bool y
- 	depends on X86_64
--	depends on (MCORE2 || MPENTIUM4 || MPSC)
-+	depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \
-+	|| MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE \
-+	|| MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE \
-+	|| MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \
-+	|| MNATIVE_INTEL)
- 
- config X86_TSC
- 	def_bool y
--	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
-+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM \
-+	|| MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 \
-+	|| MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER \
-+	|| MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM \
-+	|| MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL \
-+	|| MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \
-+	|| MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS \
-+	|| MNATIVE_INTEL || MNATIVE_AMD) || X86_64
- 
- config X86_HAVE_PAE
- 	def_bool y
-@@ -368,18 +753,37 @@ config X86_HAVE_PAE
- 
- config X86_CMPXCHG64
- 	def_bool y
--	depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7
-+	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \
-+	|| M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 \
-+	|| MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN \
-+	|| MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS \
-+	|| MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE \
-+	|| MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE \
-+	|| MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
- 
- # this should be set for all -march=.. options where the compiler
- # generates cmov.
- config X86_CMOV
- 	def_bool y
--	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \
-+	|| MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 \
-+	|| MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR \
-+	|| MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT \
-+	|| MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX \
-+	|| MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS \
-+	|| MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD)
- 
- config X86_MINIMUM_CPU_FAMILY
- 	int
- 	default "64" if X86_64
--	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
-+	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 \
-+	|| MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 ||  MK8SSE3 \
-+	|| MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER \
-+	|| MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT \
-+	|| MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL \
-+	|| MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE \
-+	|| MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MRAPTORLAKE \
-+	|| MNATIVE_INTEL || MNATIVE_AMD)
- 	default "5" if X86_32 && X86_CMPXCHG64
- 	default "4"
- 
-diff --git a/arch/x86/Makefile b/arch/x86/Makefile
-index 801fd85c3ef6..93cc88b59cbb 100644
---- a/arch/x86/Makefile
-+++ b/arch/x86/Makefile
-@@ -176,8 +176,49 @@ else
-         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
-         cflags-$(CONFIG_MK8)		+= -march=k8
-         cflags-$(CONFIG_MPSC)		+= -march=nocona
--        cflags-$(CONFIG_MCORE2)		+= -march=core2
--        cflags-$(CONFIG_MATOM)		+= -march=atom
-+        cflags-$(CONFIG_MK8SSE3)	+= -march=k8-sse3
-+        cflags-$(CONFIG_MK10) 		+= -march=amdfam10
-+        cflags-$(CONFIG_MBARCELONA) 	+= -march=barcelona
-+        cflags-$(CONFIG_MBOBCAT) 	+= -march=btver1
-+        cflags-$(CONFIG_MJAGUAR) 	+= -march=btver2
-+        cflags-$(CONFIG_MBULLDOZER) 	+= -march=bdver1
-+        cflags-$(CONFIG_MPILEDRIVER)	+= -march=bdver2 -mno-tbm
-+        cflags-$(CONFIG_MSTEAMROLLER) 	+= -march=bdver3 -mno-tbm
-+        cflags-$(CONFIG_MEXCAVATOR) 	+= -march=bdver4 -mno-tbm
-+        cflags-$(CONFIG_MZEN) 		+= -march=znver1
-+        cflags-$(CONFIG_MZEN2) 	+= -march=znver2
-+        cflags-$(CONFIG_MZEN3) 	+= -march=znver3
-+        cflags-$(CONFIG_MZEN4) 	+= -march=znver4
-+        cflags-$(CONFIG_MZEN5) 	+= -march=znver5
-+        cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
-+        cflags-$(CONFIG_MNATIVE_AMD) 	+= -march=native
-+        cflags-$(CONFIG_MATOM) 	+= -march=bonnell
-+        cflags-$(CONFIG_MCORE2) 	+= -march=core2
-+        cflags-$(CONFIG_MNEHALEM) 	+= -march=nehalem
-+        cflags-$(CONFIG_MWESTMERE) 	+= -march=westmere
-+        cflags-$(CONFIG_MSILVERMONT) 	+= -march=silvermont
-+        cflags-$(CONFIG_MGOLDMONT) 	+= -march=goldmont
-+        cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
-+        cflags-$(CONFIG_MSANDYBRIDGE) 	+= -march=sandybridge
-+        cflags-$(CONFIG_MIVYBRIDGE) 	+= -march=ivybridge
-+        cflags-$(CONFIG_MHASWELL) 	+= -march=haswell
-+        cflags-$(CONFIG_MBROADWELL) 	+= -march=broadwell
-+        cflags-$(CONFIG_MSKYLAKE) 	+= -march=skylake
-+        cflags-$(CONFIG_MSKYLAKEX) 	+= -march=skylake-avx512
-+        cflags-$(CONFIG_MCANNONLAKE) 	+= -march=cannonlake
-+        cflags-$(CONFIG_MICELAKE) 	+= -march=icelake-client
-+        cflags-$(CONFIG_MCASCADELAKE) 	+= -march=cascadelake
-+        cflags-$(CONFIG_MCOOPERLAKE) 	+= -march=cooperlake
-+        cflags-$(CONFIG_MTIGERLAKE) 	+= -march=tigerlake
-+        cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
-+        cflags-$(CONFIG_MROCKETLAKE) 	+= -march=rocketlake
-+        cflags-$(CONFIG_MALDERLAKE) 	+= -march=alderlake
-+        cflags-$(CONFIG_MRAPTORLAKE) 	+= -march=raptorlake
-+        cflags-$(CONFIG_MMETEORLAKE) 	+= -march=meteorlake
-+        cflags-$(CONFIG_MEMERALDRAPIDS)	+= -march=emeraldrapids
-+        cflags-$(CONFIG_GENERIC_CPU2) 	+= -march=x86-64-v2
-+        cflags-$(CONFIG_GENERIC_CPU3) 	+= -march=x86-64-v3
-+        cflags-$(CONFIG_GENERIC_CPU4) 	+= -march=x86-64-v4
-         cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
-         KBUILD_CFLAGS += $(cflags-y)
- 
-diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
-index b3ab80a03365..5e883b397ff3 100644
---- a/arch/x86/include/asm/pci.h
-+++ b/arch/x86/include/asm/pci.h
-@@ -26,6 +26,7 @@ struct pci_sysdata {
- #if IS_ENABLED(CONFIG_VMD)
- 	struct pci_dev	*vmd_dev;	/* VMD Device if in Intel VMD domain */
- #endif
-+	struct pci_dev	*nvme_remap_dev;	/* AHCI Device if NVME remapped bus */
- };
- 
- extern int pci_routeirq;
-@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus)
- #define is_vmd(bus)		false
- #endif /* CONFIG_VMD */
- 
-+static inline bool is_nvme_remap(struct pci_bus *bus)
-+{
-+	return to_pci_sysdata(bus)->nvme_remap_dev != NULL;
-+}
-+
- /* Can be used to override the logic in pci_scan_bus for skipping
-    already-configured bus numbers - to be used for buggy BIOSes
-    or architectures with incomplete PCI setup by the loader */
-diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
-index 75884d2cdec3..7acca9b5a9d5 100644
---- a/arch/x86/include/asm/vermagic.h
-+++ b/arch/x86/include/asm/vermagic.h
-@@ -17,6 +17,54 @@
- #define MODULE_PROC_FAMILY "586MMX "
- #elif defined CONFIG_MCORE2
- #define MODULE_PROC_FAMILY "CORE2 "
-+#elif defined CONFIG_MNATIVE_INTEL
-+#define MODULE_PROC_FAMILY "NATIVE_INTEL "
-+#elif defined CONFIG_MNATIVE_AMD
-+#define MODULE_PROC_FAMILY "NATIVE_AMD "
-+#elif defined CONFIG_MNEHALEM
-+#define MODULE_PROC_FAMILY "NEHALEM "
-+#elif defined CONFIG_MWESTMERE
-+#define MODULE_PROC_FAMILY "WESTMERE "
-+#elif defined CONFIG_MSILVERMONT
-+#define MODULE_PROC_FAMILY "SILVERMONT "
-+#elif defined CONFIG_MGOLDMONT
-+#define MODULE_PROC_FAMILY "GOLDMONT "
-+#elif defined CONFIG_MGOLDMONTPLUS
-+#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
-+#elif defined CONFIG_MSANDYBRIDGE
-+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
-+#elif defined CONFIG_MIVYBRIDGE
-+#define MODULE_PROC_FAMILY "IVYBRIDGE "
-+#elif defined CONFIG_MHASWELL
-+#define MODULE_PROC_FAMILY "HASWELL "
-+#elif defined CONFIG_MBROADWELL
-+#define MODULE_PROC_FAMILY "BROADWELL "
-+#elif defined CONFIG_MSKYLAKE
-+#define MODULE_PROC_FAMILY "SKYLAKE "
-+#elif defined CONFIG_MSKYLAKEX
-+#define MODULE_PROC_FAMILY "SKYLAKEX "
-+#elif defined CONFIG_MCANNONLAKE
-+#define MODULE_PROC_FAMILY "CANNONLAKE "
-+#elif defined CONFIG_MICELAKE
-+#define MODULE_PROC_FAMILY "ICELAKE "
-+#elif defined CONFIG_MCASCADELAKE
-+#define MODULE_PROC_FAMILY "CASCADELAKE "
-+#elif defined CONFIG_MCOOPERLAKE
-+#define MODULE_PROC_FAMILY "COOPERLAKE "
-+#elif defined CONFIG_MTIGERLAKE
-+#define MODULE_PROC_FAMILY "TIGERLAKE "
-+#elif defined CONFIG_MSAPPHIRERAPIDS
-+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
-+#elif defined CONFIG_ROCKETLAKE
-+#define MODULE_PROC_FAMILY "ROCKETLAKE "
-+#elif defined CONFIG_MALDERLAKE
-+#define MODULE_PROC_FAMILY "ALDERLAKE "
-+#elif defined CONFIG_MRAPTORLAKE
-+#define MODULE_PROC_FAMILY "RAPTORLAKE "
-+#elif defined CONFIG_MMETEORLAKE
-+#define MODULE_PROC_FAMILY "METEORLAKE "
-+#elif defined CONFIG_MEMERALDRAPIDS
-+#define MODULE_PROC_FAMILY "EMERALDRAPIDS "
- #elif defined CONFIG_MATOM
- #define MODULE_PROC_FAMILY "ATOM "
- #elif defined CONFIG_M686
-@@ -35,6 +83,34 @@
- #define MODULE_PROC_FAMILY "K7 "
- #elif defined CONFIG_MK8
- #define MODULE_PROC_FAMILY "K8 "
-+#elif defined CONFIG_MK8SSE3
-+#define MODULE_PROC_FAMILY "K8SSE3 "
-+#elif defined CONFIG_MK10
-+#define MODULE_PROC_FAMILY "K10 "
-+#elif defined CONFIG_MBARCELONA
-+#define MODULE_PROC_FAMILY "BARCELONA "
-+#elif defined CONFIG_MBOBCAT
-+#define MODULE_PROC_FAMILY "BOBCAT "
-+#elif defined CONFIG_MBULLDOZER
-+#define MODULE_PROC_FAMILY "BULLDOZER "
-+#elif defined CONFIG_MPILEDRIVER
-+#define MODULE_PROC_FAMILY "PILEDRIVER "
-+#elif defined CONFIG_MSTEAMROLLER
-+#define MODULE_PROC_FAMILY "STEAMROLLER "
-+#elif defined CONFIG_MJAGUAR
-+#define MODULE_PROC_FAMILY "JAGUAR "
-+#elif defined CONFIG_MEXCAVATOR
-+#define MODULE_PROC_FAMILY "EXCAVATOR "
-+#elif defined CONFIG_MZEN
-+#define MODULE_PROC_FAMILY "ZEN "
-+#elif defined CONFIG_MZEN2
-+#define MODULE_PROC_FAMILY "ZEN2 "
-+#elif defined CONFIG_MZEN3
-+#define MODULE_PROC_FAMILY "ZEN3 "
-+#elif defined CONFIG_MZEN4
-+#define MODULE_PROC_FAMILY "ZEN4 "
-+#elif defined CONFIG_MZEN5
-+#define MODULE_PROC_FAMILY "ZEN5 "
- #elif defined CONFIG_MELAN
- #define MODULE_PROC_FAMILY "ELAN "
- #elif defined CONFIG_MCRUSOE
-diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
-index ddb798603201..7c20387d8202 100644
---- a/arch/x86/pci/common.c
-+++ b/arch/x86/pci/common.c
-@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void)
- 		return 0;
- }
- 
--#if IS_ENABLED(CONFIG_VMD)
- struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
- {
-+#if IS_ENABLED(CONFIG_VMD)
- 	if (is_vmd(dev->bus))
- 		return to_pci_sysdata(dev->bus)->vmd_dev;
-+#endif
-+
-+	if (is_nvme_remap(dev->bus))
-+		return to_pci_sysdata(dev->bus)->nvme_remap_dev;
- 
- 	return dev;
- }
--#endif
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index 88df08a246fa..deecce63d0fc 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -7703,6 +7703,7 @@ MODULE_ALIAS("bfq-iosched");
- static int __init bfq_init(void)
- {
- 	int ret;
-+	char msg[60] = "BFQ I/O-scheduler: BFQ-CachyOS v6.10";
- 
- #ifdef CONFIG_BFQ_GROUP_IOSCHED
- 	ret = blkcg_policy_register(&blkcg_policy_bfq);
-@@ -7734,6 +7735,11 @@ static int __init bfq_init(void)
- 	if (ret)
- 		goto slab_kill;
- 
-+#ifdef CONFIG_BFQ_GROUP_IOSCHED
-+	strcat(msg, " (with cgroups support)");
-+#endif
-+	pr_info("%s", msg);
-+
- 	return 0;
- 
- slab_kill:
-diff --git a/block/elevator.c b/block/elevator.c
-index f64ebd726e58..4f1ccf8cf250 100644
---- a/block/elevator.c
-+++ b/block/elevator.c
-@@ -567,9 +567,19 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
- 
- 	if (q->nr_hw_queues != 1 &&
- 	    !blk_mq_is_shared_tags(q->tag_set->flags))
-+#if defined(CONFIG_CACHY) && defined(CONFIG_MQ_IOSCHED_KYBER)
-+		return elevator_find_get(q, "kyber");
-+#elif defined(CONFIG_CACHY)
-+		return elevator_find_get(q, "mq-deadline");
-+#else
- 		return NULL;
-+#endif
- 
-+#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ)
-+	return elevator_find_get(q, "bfq");
-+#else
- 	return elevator_find_get(q, "mq-deadline");
-+#endif
- }
- 
- /*
-diff --git a/drivers/Makefile b/drivers/Makefile
-index fe9ceb0d2288..b58955caf19b 100644
---- a/drivers/Makefile
-+++ b/drivers/Makefile
-@@ -61,14 +61,8 @@ obj-y				+= char/
- # iommu/ comes before gpu as gpu are using iommu controllers
- obj-y				+= iommu/
- 
--# gpu/ comes after char for AGP vs DRM startup and after iommu
--obj-y				+= gpu/
--
- obj-$(CONFIG_CONNECTOR)		+= connector/
- 
--# i810fb depends on char/agp/
--obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
--
- obj-$(CONFIG_PARPORT)		+= parport/
- obj-y				+= base/ block/ misc/ mfd/ nfc/
- obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
-@@ -80,6 +74,13 @@ obj-y				+= macintosh/
- obj-y				+= scsi/
- obj-y				+= nvme/
- obj-$(CONFIG_ATA)		+= ata/
-+
-+# gpu/ comes after char for AGP vs DRM startup and after iommu
-+obj-y				+= gpu/
-+
-+# i810fb depends on char/agp/
-+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-+
- obj-$(CONFIG_TARGET_CORE)	+= target/
- obj-$(CONFIG_MTD)		+= mtd/
- obj-$(CONFIG_SPI)		+= spi/
-diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
-index fc6fd583faf8..f79e205a51dd 100644
---- a/drivers/ata/ahci.c
-+++ b/drivers/ata/ahci.c
-@@ -1618,7 +1618,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
- }
- #endif
- 
--static void ahci_remap_check(struct pci_dev *pdev, int bar,
-+static int ahci_remap_check(struct pci_dev *pdev, int bar,
- 		struct ahci_host_priv *hpriv)
- {
- 	int i;
-@@ -1631,7 +1631,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
- 	    pci_resource_len(pdev, bar) < SZ_512K ||
- 	    bar != AHCI_PCI_BAR_STANDARD ||
- 	    !(readl(hpriv->mmio + AHCI_VSCAP) & 1))
--		return;
-+		return 0;
- 
- 	cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
- 	for (i = 0; i < AHCI_MAX_REMAP; i++) {
-@@ -1646,18 +1646,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
- 	}
- 
- 	if (!hpriv->remapped_nvme)
--		return;
--
--	dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n",
--		 hpriv->remapped_nvme);
--	dev_warn(&pdev->dev,
--		 "Switch your BIOS from RAID to AHCI mode to use them.\n");
-+		return 0;
- 
--	/*
--	 * Don't rely on the msi-x capability in the remap case,
--	 * share the legacy interrupt across ahci and remapped devices.
--	 */
--	hpriv->flags |= AHCI_HFLAG_NO_MSI;
-+	/* Abort probe, allowing intel-nvme-remap to step in when available */
-+	dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n");
-+	return -ENODEV;
- }
- 
- static int ahci_get_irq_vector(struct ata_host *host, int port)
-@@ -1894,7 +1887,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
- 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
- 
- 	/* detect remapped nvme devices */
--	ahci_remap_check(pdev, ahci_pci_bar, hpriv);
-+	rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv);
-+	if (rc)
-+		return rc;
- 
- 	sysfs_add_file_to_group(&pdev->dev.kobj,
- 				&dev_attr_remapped_nvme.attr,
-diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
-index 97c2d4f15d76..5a3af44d785a 100644
---- a/drivers/cpufreq/Kconfig.x86
-+++ b/drivers/cpufreq/Kconfig.x86
-@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE
- 	select ACPI_PROCESSOR if ACPI
- 	select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
- 	select CPU_FREQ_GOV_PERFORMANCE
--	select CPU_FREQ_GOV_SCHEDUTIL if SMP
- 	help
- 	  This driver provides a P state for Intel core processors.
- 	  The driver implements an internal governor and will become
-@@ -39,7 +38,6 @@ config X86_AMD_PSTATE
- 	depends on X86 && ACPI
- 	select ACPI_PROCESSOR
- 	select ACPI_CPPC_LIB if X86_64
--	select CPU_FREQ_GOV_SCHEDUTIL if SMP
- 	help
- 	  This driver adds a CPUFreq driver which utilizes a fine grain
- 	  processor performance frequency control range instead of legacy
-diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
-index c31914a9876f..1035c074f36a 100644
---- a/drivers/cpufreq/intel_pstate.c
-+++ b/drivers/cpufreq/intel_pstate.c
-@@ -3550,6 +3550,8 @@ static int __init intel_pstate_setup(char *str)
- 
- 	if (!strcmp(str, "disable"))
- 		no_load = 1;
-+	else if (!strcmp(str, "enable"))
-+		no_load = 0;
- 	else if (!strcmp(str, "active"))
- 		default_driver = &intel_pstate;
- 	else if (!strcmp(str, "passive"))
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-index f87d53e183c3..c489d3b2576b 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-@@ -159,6 +159,7 @@ struct amdgpu_watchdog_timer {
-  */
- extern int amdgpu_modeset;
- extern unsigned int amdgpu_vram_limit;
-+extern int amdgpu_ignore_min_pcap;
- extern int amdgpu_vis_vram_limit;
- extern int amdgpu_gart_size;
- extern int amdgpu_gtt_size;
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-index ea14f1c8f430..bb0b636d0d75 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -132,6 +132,7 @@ enum AMDGPU_DEBUG_MASK {
- };
- 
- unsigned int amdgpu_vram_limit = UINT_MAX;
-+int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */
- int amdgpu_vis_vram_limit;
- int amdgpu_gart_size = -1; /* auto */
- int amdgpu_gtt_size = -1; /* auto */
-@@ -243,6 +244,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
- 	.period = 0x0, /* default to 0x0 (timeout disable) */
- };
- 
-+/**
-+ * DOC: ignore_min_pcap (int)
-+ * Ignore the minimum power cap.
-+ * Useful on graphics cards where the minimum power cap is very high.
-+ * The default is 0 (Do not ignore).
-+ */
-+MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap");
-+module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600);
-+
- /**
-  * DOC: vramlimit (int)
-  * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
-index 677eb141554e..ceb3f1e4ed1d 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
-@@ -151,6 +151,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
- 		}
- 	}
- 
-+	/* from vcn4 and above, only unified queue is used */
-+	adev->vcn.using_unified_queue =
-+		amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
-+
- 	hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data;
- 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
- 
-@@ -279,18 +283,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
- 	return 0;
- }
- 
--/* from vcn4 and above, only unified queue is used */
--static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
--{
--	struct amdgpu_device *adev = ring->adev;
--	bool ret = false;
--
--	if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0))
--		ret = true;
--
--	return ret;
--}
--
- bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
- {
- 	bool ret = false;
-@@ -401,7 +393,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
- 		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- 			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
- 
--		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
-+		/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
-+		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
-+		    !adev->vcn.using_unified_queue) {
- 			struct dpg_pause_state new_state;
- 
- 			if (fence[j] ||
-@@ -447,7 +441,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
- 	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- 	       AMD_PG_STATE_UNGATE);
- 
--	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
-+	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
-+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
-+	    !adev->vcn.using_unified_queue) {
- 		struct dpg_pause_state new_state;
- 
- 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
-@@ -473,8 +469,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
- 
- void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
- {
-+	struct amdgpu_device *adev = ring->adev;
-+
-+	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
- 	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
--		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
-+	    ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
-+	    !adev->vcn.using_unified_queue)
- 		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- 
- 	atomic_dec(&ring->adev->vcn.total_submission_cnt);
-@@ -728,12 +728,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
- 	struct amdgpu_job *job;
- 	struct amdgpu_ib *ib;
- 	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
--	bool sq = amdgpu_vcn_using_unified_queue(ring);
- 	uint32_t *ib_checksum;
- 	uint32_t ib_pack_in_dw;
- 	int i, r;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		ib_size_dw += 8;
- 
- 	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
-@@ -746,7 +745,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
- 	ib->length_dw = 0;
- 
- 	/* single queue headers */
--	if (sq) {
-+	if (adev->vcn.using_unified_queue) {
- 		ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
- 						+ 4 + 2; /* engine info + decoding ib in dw */
- 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
-@@ -765,7 +764,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
- 	for (i = ib->length_dw; i < ib_size_dw; ++i)
- 		ib->ptr[i] = 0x0;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
- 
- 	r = amdgpu_job_submit_direct(job, ring, &f);
-@@ -855,15 +854,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
- 					 struct dma_fence **fence)
- {
- 	unsigned int ib_size_dw = 16;
-+	struct amdgpu_device *adev = ring->adev;
- 	struct amdgpu_job *job;
- 	struct amdgpu_ib *ib;
- 	struct dma_fence *f = NULL;
- 	uint32_t *ib_checksum = NULL;
- 	uint64_t addr;
--	bool sq = amdgpu_vcn_using_unified_queue(ring);
- 	int i, r;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		ib_size_dw += 8;
- 
- 	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
-@@ -877,7 +876,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
- 
- 	ib->length_dw = 0;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
- 
- 	ib->ptr[ib->length_dw++] = 0x00000018;
-@@ -899,7 +898,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
- 	for (i = ib->length_dw; i < ib_size_dw; ++i)
- 		ib->ptr[i] = 0x0;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
- 
- 	r = amdgpu_job_submit_direct(job, ring, &f);
-@@ -922,15 +921,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
- 					  struct dma_fence **fence)
- {
- 	unsigned int ib_size_dw = 16;
-+	struct amdgpu_device *adev = ring->adev;
- 	struct amdgpu_job *job;
- 	struct amdgpu_ib *ib;
- 	struct dma_fence *f = NULL;
- 	uint32_t *ib_checksum = NULL;
- 	uint64_t addr;
--	bool sq = amdgpu_vcn_using_unified_queue(ring);
- 	int i, r;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		ib_size_dw += 8;
- 
- 	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
-@@ -944,7 +943,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
- 
- 	ib->length_dw = 0;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
- 
- 	ib->ptr[ib->length_dw++] = 0x00000018;
-@@ -966,7 +965,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
- 	for (i = ib->length_dw; i < ib_size_dw; ++i)
- 		ib->ptr[i] = 0x0;
- 
--	if (sq)
-+	if (adev->vcn.using_unified_queue)
- 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
- 
- 	r = amdgpu_job_submit_direct(job, ring, &f);
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
-index 9f06def236fd..1a5439abd1a0 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
-@@ -329,6 +329,7 @@ struct amdgpu_vcn {
- 
- 	uint16_t inst_mask;
- 	uint8_t	num_inst_per_aid;
-+	bool using_unified_queue;
- };
- 
- struct amdgpu_fw_shared_rb_ptrs_struct {
-diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
-index 47b8b49da8a7..943959d1f401 100644
---- a/drivers/gpu/drm/amd/display/Kconfig
-+++ b/drivers/gpu/drm/amd/display/Kconfig
-@@ -51,4 +51,10 @@ config DRM_AMD_SECURE_DISPLAY
- 	  This option enables the calculation of crc of specific region via
- 	  debugfs. Cooperate with specific DMCU FW.
- 
-+config AMD_PRIVATE_COLOR
-+	bool "Enable KMS color management by AMD for AMD"
-+	default n
-+	help
-+	  This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck.
-+
- endmenu
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-index 3cdcadd41be1..8c0b165ec7fb 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-@@ -4118,7 +4118,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
- 		return r;
- 	}
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- 	if (amdgpu_dm_create_color_properties(adev))
- 		return -ENOMEM;
- #endif
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-index ebabfe3a512f..4d3ebcaacca1 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-@@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
- 	return val;
- }
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- /* Pre-defined Transfer Functions (TF)
-  *
-  * AMD driver supports pre-defined mathematical functions for transferring
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-index e23a0a276e33..dd83cf50a89b 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-@@ -338,7 +338,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
- }
- #endif
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- /**
-  * dm_crtc_additional_color_mgmt - enable additional color properties
-  * @crtc: DRM CRTC
-@@ -420,7 +420,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- #if defined(CONFIG_DEBUG_FS)
- 	.late_register = amdgpu_dm_crtc_late_register,
- #endif
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- 	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
- 	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
- #endif
-@@ -599,7 +599,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
- 
- 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- 	dm_crtc_additional_color_mgmt(&acrtc->base);
- #endif
- 	return 0;
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-index 8a4c40b4c27e..779880c64575 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-@@ -1468,7 +1468,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
- 	drm_atomic_helper_plane_destroy_state(plane, state);
- }
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- static void
- dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
- 					     struct drm_plane *plane)
-@@ -1659,7 +1659,7 @@ static const struct drm_plane_funcs dm_plane_funcs = {
- 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
- 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
- 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- 	.atomic_set_property = dm_atomic_plane_set_property,
- 	.atomic_get_property = dm_atomic_plane_get_property,
- #endif
-@@ -1742,7 +1742,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
- 
- 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
- 
--#ifdef AMD_PRIVATE_COLOR
-+#ifdef CONFIG_AMD_PRIVATE_COLOR
- 	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
- #endif
- 	/* Create (reset) the plane state */
-diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
-index 5574bc628053..f109a101d84f 100644
---- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
-+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
-@@ -945,19 +945,10 @@ void optc1_set_drr(
- 				OTG_FORCE_LOCK_ON_EVENT, 0,
- 				OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
- 				OTG_SET_V_TOTAL_MIN_MASK, 0);
--
--		// Setup manual flow control for EOF via TRIG_A
--		optc->funcs->setup_manual_trigger(optc);
--
--	} else {
--		REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
--				OTG_SET_V_TOTAL_MIN_MASK, 0,
--				OTG_V_TOTAL_MIN_SEL, 0,
--				OTG_V_TOTAL_MAX_SEL, 0,
--				OTG_FORCE_LOCK_ON_EVENT, 0);
--
--		optc->funcs->set_vtotal_min_max(optc, 0, 0);
- 	}
-+
-+	// Setup manual flow control for EOF via TRIG_A
-+	optc->funcs->setup_manual_trigger(optc);
- }
- 
- void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max)
-diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
-index d6f095b4555d..58bdbd859bf9 100644
---- a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
-+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
-@@ -462,6 +462,16 @@ void optc2_setup_manual_trigger(struct timing_generator *optc)
- {
- 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
- 
-+	/* Set the min/max selectors unconditionally so that
-+	 * DMCUB fw may change OTG timings when necessary
-+	 * TODO: Remove the w/a after fixing the issue in DMCUB firmware
-+	 */
-+	REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
-+				 OTG_V_TOTAL_MIN_SEL, 1,
-+				 OTG_V_TOTAL_MAX_SEL, 1,
-+				 OTG_FORCE_LOCK_ON_EVENT, 0,
-+				 OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-+
- 	REG_SET_8(OTG_TRIGA_CNTL, 0,
- 			OTG_TRIGA_SOURCE_SELECT, 21,
- 			OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,
-diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-index c11952a4389b..52f54a228b39 100644
---- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-@@ -3155,6 +3155,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
- 					 struct device_attribute *attr,
- 					 char *buf)
- {
-+	if (amdgpu_ignore_min_pcap)
-+		return sysfs_emit(buf, "%i\n", 0);
-+
- 	return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN);
- }
- 
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
-index e1796ecf9c05..5e46bd293205 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
-@@ -2749,7 +2749,10 @@ int smu_get_power_limit(void *handle,
- 			*limit = smu->max_power_limit;
- 			break;
- 		case SMU_PPT_LIMIT_MIN:
--			*limit = smu->min_power_limit;
-+			if (amdgpu_ignore_min_pcap)
-+				*limit = 0;
-+			else
-+				*limit = smu->min_power_limit;
- 			break;
- 		default:
- 			return -EINVAL;
-@@ -2773,7 +2776,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit)
- 		if (smu->ppt_funcs->set_power_limit)
- 			return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
- 
--	if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
-+	if (amdgpu_ignore_min_pcap) {
-+		if ((limit > smu->max_power_limit)) {
-+			dev_err(smu->adev->dev,
-+				"New power limit (%d) is over the max allowed %d\n",
-+				limit, smu->max_power_limit);
-+			return -EINVAL;
-+		}
-+	} else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
- 		dev_err(smu->adev->dev,
- 			"New power limit (%d) is out of range [%d,%d]\n",
- 			limit, smu->min_power_limit, smu->max_power_limit);
-diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
-index fc16fddee5c5..05b21fe9b395 100644
---- a/drivers/gpu/drm/drm_atomic_uapi.c
-+++ b/drivers/gpu/drm/drm_atomic_uapi.c
-@@ -1066,21 +1066,14 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
- 			break;
- 		}
- 
--		if (async_flip && prop != config->prop_fb_id) {
-+		if (async_flip && (prop != config->prop_fb_id ||
-+				   plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY)) {
- 			ret = drm_atomic_plane_get_property(plane, plane_state,
- 							    prop, &old_val);
- 			ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
- 			break;
- 		}
- 
--		if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) {
--			drm_dbg_atomic(prop->dev,
--				       "[OBJECT:%d] Only primary planes can be changed during async flip\n",
--				       obj->id);
--			ret = -EINVAL;
--			break;
--		}
--
- 		ret = drm_atomic_plane_set_property(plane,
- 				plane_state, file_priv,
- 				prop, prop_value);
-diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
-index fe6e8a1bb607..1488a904e3bf 100644
---- a/drivers/i2c/busses/Kconfig
-+++ b/drivers/i2c/busses/Kconfig
-@@ -238,6 +238,15 @@ config I2C_CHT_WC
- 	  combined with a FUSB302 Type-C port-controller as such it is advised
- 	  to also select CONFIG_TYPEC_FUSB302=m.
- 
-+config I2C_NCT6775
-+	tristate "Nuvoton NCT6775 and compatible SMBus controller"
-+	help
-+		If you say yes to this option, support will be included for the
-+		Nuvoton NCT6775 and compatible SMBus controllers.
-+
-+		This driver can also be built as a module.  If so, the module
-+		will be called i2c-nct6775.
-+
- config I2C_NFORCE2
- 	tristate "Nvidia nForce2, nForce3 and nForce4"
- 	depends on PCI && HAS_IOPORT
-diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
-index 78d0561339e5..9ea3a294f9f0 100644
---- a/drivers/i2c/busses/Makefile
-+++ b/drivers/i2c/busses/Makefile
-@@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_CHT_WC)	+= i2c-cht-wc.o
- obj-$(CONFIG_I2C_I801)		+= i2c-i801.o
- obj-$(CONFIG_I2C_ISCH)		+= i2c-isch.o
- obj-$(CONFIG_I2C_ISMT)		+= i2c-ismt.o
-+obj-$(CONFIG_I2C_NCT6775)   += i2c-nct6775.o
- obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
- obj-$(CONFIG_I2C_NFORCE2_S4985)	+= i2c-nforce2-s4985.o
- obj-$(CONFIG_I2C_NVIDIA_GPU)	+= i2c-nvidia-gpu.o
-diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c
-new file mode 100644
-index 000000000000..fdbd9a1c8d7a
---- /dev/null
-+++ b/drivers/i2c/busses/i2c-nct6775.c
-@@ -0,0 +1,648 @@
-+/*
-+ * i2c-nct6775 - Driver for the SMBus master functionality of
-+ *	       Nuvoton NCT677x Super-I/O chips
-+ *
-+ * Copyright (C) 2019  Adam Honse <calcprogrammer1@gmail.com>
-+ *
-+ * Derived from nct6775 hwmon driver
-+ * Copyright (C) 2012  Guenter Roeck <linux@roeck-us.net>
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/slab.h>
-+#include <linux/jiffies.h>
-+#include <linux/platform_device.h>
-+#include <linux/hwmon.h>
-+#include <linux/hwmon-sysfs.h>
-+#include <linux/hwmon-vid.h>
-+#include <linux/err.h>
-+#include <linux/mutex.h>
-+#include <linux/delay.h>
-+#include <linux/ioport.h>
-+#include <linux/i2c.h>
-+#include <linux/acpi.h>
-+#include <linux/bitops.h>
-+#include <linux/dmi.h>
-+#include <linux/io.h>
-+#include <linux/nospec.h>
-+
-+#define DRVNAME "i2c-nct6775"
-+
-+/* Nuvoton SMBus address offsets */
-+#define SMBHSTDAT       (0 + nuvoton_nct6793d_smba)
-+#define SMBBLKSZ        (1 + nuvoton_nct6793d_smba)
-+#define SMBHSTCMD       (2 + nuvoton_nct6793d_smba)
-+#define SMBHSTIDX       (3 + nuvoton_nct6793d_smba)  //Index field is the Command field on other controllers
-+#define SMBHSTCTL       (4 + nuvoton_nct6793d_smba)
-+#define SMBHSTADD       (5 + nuvoton_nct6793d_smba)
-+#define SMBHSTERR       (9 + nuvoton_nct6793d_smba)
-+#define SMBHSTSTS       (0xE + nuvoton_nct6793d_smba)
-+
-+/* Command register */
-+#define NCT6793D_READ_BYTE      0
-+#define NCT6793D_READ_WORD      1
-+#define NCT6793D_READ_BLOCK     2
-+#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3
-+#define NCT6793D_PROC_CALL      4
-+#define NCT6793D_WRITE_BYTE     8
-+#define NCT6793D_WRITE_WORD     9
-+#define NCT6793D_WRITE_BLOCK    10
-+
-+/* Control register */
-+#define NCT6793D_MANUAL_START   128
-+#define NCT6793D_SOFT_RESET     64
-+
-+/* Error register */
-+#define NCT6793D_NO_ACK         32
-+
-+/* Status register */
-+#define NCT6793D_FIFO_EMPTY     1
-+#define NCT6793D_FIFO_FULL      2
-+#define NCT6793D_MANUAL_ACTIVE  4
-+
-+#define NCT6775_LD_SMBUS		0x0B
-+
-+/* Other settings */
-+#define MAX_RETRIES		400
-+
-+enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
-+	     nct6795, nct6796, nct6798 };
-+
-+struct nct6775_sio_data {
-+	int sioreg;
-+	enum kinds kind;
-+};
-+
-+/* used to set data->name = nct6775_device_names[data->sio_kind] */
-+static const char * const nct6775_device_names[] = {
-+	"nct6106",
-+	"nct6775",
-+	"nct6776",
-+	"nct6779",
-+	"nct6791",
-+	"nct6792",
-+	"nct6793",
-+	"nct6795",
-+	"nct6796",
-+	"nct6798",
-+};
-+
-+static const char * const nct6775_sio_names[] __initconst = {
-+	"NCT6106D",
-+	"NCT6775F",
-+	"NCT6776D/F",
-+	"NCT6779D",
-+	"NCT6791D",
-+	"NCT6792D",
-+	"NCT6793D",
-+	"NCT6795D",
-+	"NCT6796D",
-+	"NCT6798D",
-+};
-+
-+#define SIO_REG_LDSEL		0x07	/* Logical device select */
-+#define SIO_REG_DEVID		0x20	/* Device ID (2 bytes) */
-+#define SIO_REG_SMBA		0x62	/* SMBus base address register */
-+
-+#define SIO_NCT6106_ID		0xc450
-+#define SIO_NCT6775_ID		0xb470
-+#define SIO_NCT6776_ID		0xc330
-+#define SIO_NCT6779_ID		0xc560
-+#define SIO_NCT6791_ID		0xc800
-+#define SIO_NCT6792_ID		0xc910
-+#define SIO_NCT6793_ID		0xd120
-+#define SIO_NCT6795_ID		0xd350
-+#define SIO_NCT6796_ID		0xd420
-+#define SIO_NCT6798_ID		0xd428
-+#define SIO_ID_MASK			0xFFF0
-+
-+static inline void
-+superio_outb(int ioreg, int reg, int val)
-+{
-+	outb(reg, ioreg);
-+	outb(val, ioreg + 1);
-+}
-+
-+static inline int
-+superio_inb(int ioreg, int reg)
-+{
-+	outb(reg, ioreg);
-+	return inb(ioreg + 1);
-+}
-+
-+static inline void
-+superio_select(int ioreg, int ld)
-+{
-+	outb(SIO_REG_LDSEL, ioreg);
-+	outb(ld, ioreg + 1);
-+}
-+
-+static inline int
-+superio_enter(int ioreg)
-+{
-+	/*
-+	 * Try to reserve <ioreg> and <ioreg + 1> for exclusive access.
-+	 */
-+	if (!request_muxed_region(ioreg, 2, DRVNAME))
-+		return -EBUSY;
-+
-+	outb(0x87, ioreg);
-+	outb(0x87, ioreg);
-+
-+	return 0;
-+}
-+
-+static inline void
-+superio_exit(int ioreg)
-+{
-+	outb(0xaa, ioreg);
-+	outb(0x02, ioreg);
-+	outb(0x02, ioreg + 1);
-+	release_region(ioreg, 2);
-+}
-+
-+/*
-+ * ISA constants
-+ */
-+
-+#define IOREGION_ALIGNMENT	(~7)
-+#define IOREGION_LENGTH		2
-+#define ADDR_REG_OFFSET		0
-+#define DATA_REG_OFFSET		1
-+
-+#define NCT6775_REG_BANK	0x4E
-+#define NCT6775_REG_CONFIG	0x40
-+
-+static struct i2c_adapter *nct6775_adapter;
-+
-+struct i2c_nct6775_adapdata {
-+	unsigned short smba;
-+};
-+
-+/* Return negative errno on error. */
-+static s32 nct6775_access(struct i2c_adapter * adap, u16 addr,
-+		 unsigned short flags, char read_write,
-+		 u8 command, int size, union i2c_smbus_data * data)
-+{
-+	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
-+	unsigned short nuvoton_nct6793d_smba = adapdata->smba;
-+	int i, len, cnt;
-+	union i2c_smbus_data tmp_data;
-+	int timeout = 0;
-+
-+	tmp_data.word = 0;
-+	cnt = 0;
-+	len = 0;
-+
-+	outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL);
-+
-+	switch (size) {
-+		case I2C_SMBUS_QUICK:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			break;
-+		case I2C_SMBUS_BYTE_DATA:
-+			tmp_data.byte = data->byte;
-+			fallthrough;
-+		case I2C_SMBUS_BYTE:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				outb_p(tmp_data.byte, SMBHSTDAT);
-+				outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD);
-+			}
-+			else {
-+				outb_p(NCT6793D_READ_BYTE, SMBHSTCMD);
-+			}
-+			break;
-+		case I2C_SMBUS_WORD_DATA:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				outb_p(data->word & 0xff, SMBHSTDAT);
-+				outb_p((data->word & 0xff00) >> 8, SMBHSTDAT);
-+				outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD);
-+			}
-+			else {
-+				outb_p(NCT6793D_READ_WORD, SMBHSTCMD);
-+			}
-+			break;
-+		case I2C_SMBUS_BLOCK_DATA:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				len = data->block[0];
-+				if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
-+					return -EINVAL;
-+				outb_p(len, SMBBLKSZ);
-+
-+				cnt = 1;
-+				if (len >= 4) {
-+					for (i = cnt; i <= 4; i++) {
-+						outb_p(data->block[i], SMBHSTDAT);
-+					}
-+
-+					len -= 4;
-+					cnt += 4;
-+				}
-+				else {
-+					for (i = cnt; i <= len; i++ ) {
-+						outb_p(data->block[i], SMBHSTDAT);
-+					}
-+
-+					len = 0;
-+				}
-+
-+				outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD);
-+			}
-+			else {
-+				return -ENOTSUPP;
-+			}
-+			break;
-+		default:
-+			dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
-+			return -EOPNOTSUPP;
-+	}
-+
-+	outb_p(NCT6793D_MANUAL_START, SMBHSTCTL);
-+
-+	while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) {
-+		if (read_write == I2C_SMBUS_WRITE) {
-+			timeout = 0;
-+			while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0)
-+			{
-+				if(timeout > MAX_RETRIES)
-+				{
-+					return -ETIMEDOUT;
-+				}
-+				usleep_range(250, 500);
-+				timeout++;
-+			}
-+
-+			//Load more bytes into FIFO
-+			if (len >= 4) {
-+				for (i = cnt; i <= (cnt + 4); i++) {
-+					outb_p(data->block[i], SMBHSTDAT);
-+				}
-+
-+				len -= 4;
-+				cnt += 4;
-+			}
-+			else {
-+				for (i = cnt; i <= (cnt + len); i++) {
-+					outb_p(data->block[i], SMBHSTDAT);
-+				}
-+
-+				len = 0;
-+			}
-+		}
-+		else {
-+			return -ENOTSUPP;
-+		}
-+		
-+	}
-+
-+	//wait for manual mode to complete
-+	timeout = 0;
-+	while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0)
-+	{
-+		if(timeout > MAX_RETRIES)
-+		{
-+			return -ETIMEDOUT;
-+		}
-+		usleep_range(250, 500);
-+		timeout++;
-+	}
-+
-+	if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) {    	
-+		return -ENXIO;
-+	}
-+	else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) {
-+		return 0;
-+	}
-+
-+	switch (size) {
-+		case I2C_SMBUS_QUICK:
-+		case I2C_SMBUS_BYTE_DATA:
-+			data->byte = inb_p(SMBHSTDAT);
-+			break;
-+		case I2C_SMBUS_WORD_DATA:
-+			data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8);
-+			break;
-+	}
-+	return 0;
-+}
-+
-+static u32 nct6775_func(struct i2c_adapter *adapter)
-+{
-+	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
-+	    I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
-+	    I2C_FUNC_SMBUS_BLOCK_DATA;
-+}
-+
-+static const struct i2c_algorithm smbus_algorithm = {
-+	.smbus_xfer	= nct6775_access,
-+	.functionality	= nct6775_func,
-+};
-+
-+static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap)
-+{
-+	struct i2c_adapter *adap;
-+	struct i2c_nct6775_adapdata *adapdata;
-+	int retval;
-+
-+	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
-+	if (adap == NULL) {
-+		return -ENOMEM;
-+	}
-+
-+	adap->owner = THIS_MODULE;
-+	adap->class = I2C_CLASS_HWMON;
-+	adap->algo = &smbus_algorithm;
-+
-+	adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
-+	if (adapdata == NULL) {
-+		kfree(adap);
-+		return -ENOMEM;
-+	}
-+
-+	adapdata->smba = smba;
-+
-+	snprintf(adap->name, sizeof(adap->name),
-+		"SMBus NCT67xx adapter%s at %04x", name, smba);
-+
-+	i2c_set_adapdata(adap, adapdata);
-+
-+	retval = i2c_add_adapter(adap);
-+	if (retval) {
-+		kfree(adapdata);
-+		kfree(adap);
-+		return retval;
-+	}
-+
-+	*padap = adap;
-+	return 0;
-+}
-+
-+static void nct6775_remove_adapter(struct i2c_adapter *adap)
-+{
-+	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
-+
-+	if (adapdata->smba) {
-+		i2c_del_adapter(adap);
-+		kfree(adapdata);
-+		kfree(adap);
-+	}
-+}
-+
-+//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
-+
-+/*
-+ * when Super-I/O functions move to a separate file, the Super-I/O
-+ * bus will manage the lifetime of the device and this module will only keep
-+ * track of the nct6775 driver. But since we use platform_device_alloc(), we
-+ * must keep track of the device
-+ */
-+static struct platform_device *pdev[2];
-+
-+static int nct6775_probe(struct platform_device *pdev)
-+{
-+	struct device *dev = &pdev->dev;
-+	struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
-+	struct resource *res;
-+
-+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-+	if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH,
-+				 DRVNAME))
-+		return -EBUSY;
-+
-+	switch (sio_data->kind) {
-+	case nct6791:
-+	case nct6792:
-+	case nct6793:
-+	case nct6795:
-+	case nct6796:
-+	case nct6798:
-+		nct6775_add_adapter(res->start, "", &nct6775_adapter);
-+		break;
-+	default:
-+		return -ENODEV;
-+	}
-+
-+	return 0;
-+}
-+/*
-+static void nct6791_enable_io_mapping(int sioaddr)
-+{
-+	int val;
-+
-+	val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE);
-+	if (val & 0x10) {
-+		pr_info("Enabling hardware monitor logical device mappings.\n");
-+		superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE,
-+			     val & ~0x10);
-+	}
-+}*/
-+
-+static struct platform_driver i2c_nct6775_driver = {
-+	.driver = {
-+		.name	= DRVNAME,
-+//		.pm	= &nct6775_dev_pm_ops,
-+	},
-+	.probe		= nct6775_probe,
-+};
-+
-+static void __exit i2c_nct6775_exit(void)
-+{
-+	int i;
-+
-+	if(nct6775_adapter)
-+		nct6775_remove_adapter(nct6775_adapter);
-+
-+	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-+		if (pdev[i])
-+			platform_device_unregister(pdev[i]);
-+	}
-+	platform_driver_unregister(&i2c_nct6775_driver);
-+}
-+
-+/* nct6775_find() looks for a '627 in the Super-I/O config space */
-+static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
-+{
-+	u16 val;
-+	int err;
-+	int addr;
-+
-+	err = superio_enter(sioaddr);
-+	if (err)
-+		return err;
-+
-+	val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) |
-+		superio_inb(sioaddr, SIO_REG_DEVID + 1);
-+
-+	switch (val & SIO_ID_MASK) {
-+	case SIO_NCT6106_ID:
-+		sio_data->kind = nct6106;
-+		break;
-+	case SIO_NCT6775_ID:
-+		sio_data->kind = nct6775;
-+		break;
-+	case SIO_NCT6776_ID:
-+		sio_data->kind = nct6776;
-+		break;
-+	case SIO_NCT6779_ID:
-+		sio_data->kind = nct6779;
-+		break;
-+	case SIO_NCT6791_ID:
-+		sio_data->kind = nct6791;
-+		break;
-+	case SIO_NCT6792_ID:
-+		sio_data->kind = nct6792;
-+		break;
-+	case SIO_NCT6793_ID:
-+		sio_data->kind = nct6793;
-+		break;
-+	case SIO_NCT6795_ID:
-+		sio_data->kind = nct6795;
-+		break;
-+	case SIO_NCT6796_ID:
-+		sio_data->kind = nct6796;
-+		break;
-+	case SIO_NCT6798_ID:
-+		sio_data->kind = nct6798;
-+		break;
-+	default:
-+		if (val != 0xffff)
-+			pr_debug("unsupported chip ID: 0x%04x\n", val);
-+		superio_exit(sioaddr);
-+		return -ENODEV;
-+	}
-+
-+	/* We have a known chip, find the SMBus I/O address */
-+	superio_select(sioaddr, NCT6775_LD_SMBUS);
-+	val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8)
-+	    | superio_inb(sioaddr, SIO_REG_SMBA + 1);
-+	addr = val & IOREGION_ALIGNMENT;
-+	if (addr == 0) {
-+		pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n");
-+		superio_exit(sioaddr);
-+		return -ENODEV;
-+	}
-+
-+	//if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-+	//    sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
-+	//    sio_data->kind == nct6796)
-+	//	nct6791_enable_io_mapping(sioaddr);
-+
-+	superio_exit(sioaddr);
-+	pr_info("Found %s or compatible chip at %#x:%#x\n",
-+		nct6775_sio_names[sio_data->kind], sioaddr, addr);
-+	sio_data->sioreg = sioaddr;
-+
-+	return addr;
-+}
-+
-+static int __init i2c_nct6775_init(void)
-+{
-+	int i, err;
-+	bool found = false;
-+	int address;
-+	struct resource res;
-+	struct nct6775_sio_data sio_data;
-+	int sioaddr[2] = { 0x2e, 0x4e };
-+
-+	err = platform_driver_register(&i2c_nct6775_driver);
-+	if (err)
-+		return err;
-+
-+	/*
-+	 * initialize sio_data->kind and sio_data->sioreg.
-+	 *
-+	 * when Super-I/O functions move to a separate file, the Super-I/O
-+	 * driver will probe 0x2e and 0x4e and auto-detect the presence of a
-+	 * nct6775 hardware monitor, and call probe()
-+	 */
-+	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-+		address = nct6775_find(sioaddr[i], &sio_data);
-+		if (address <= 0)
-+			continue;
-+
-+		found = true;
-+
-+		pdev[i] = platform_device_alloc(DRVNAME, address);
-+		if (!pdev[i]) {
-+			err = -ENOMEM;
-+			goto exit_device_unregister;
-+		}
-+
-+		err = platform_device_add_data(pdev[i], &sio_data,
-+					       sizeof(struct nct6775_sio_data));
-+		if (err)
-+			goto exit_device_put;
-+
-+		memset(&res, 0, sizeof(res));
-+		res.name = DRVNAME;
-+		res.start = address;
-+		res.end = address + IOREGION_LENGTH - 1;
-+		res.flags = IORESOURCE_IO;
-+
-+		err = acpi_check_resource_conflict(&res);
-+		if (err) {
-+			platform_device_put(pdev[i]);
-+			pdev[i] = NULL;
-+			continue;
-+		}
-+
-+		err = platform_device_add_resources(pdev[i], &res, 1);
-+		if (err)
-+			goto exit_device_put;
-+
-+		/* platform_device_add calls probe() */
-+		err = platform_device_add(pdev[i]);
-+		if (err)
-+			goto exit_device_put;
-+	}
-+	if (!found) {
-+		err = -ENODEV;
-+		goto exit_unregister;
-+	}
-+
-+	return 0;
-+
-+exit_device_put:
-+	platform_device_put(pdev[i]);
-+exit_device_unregister:
-+	while (--i >= 0) {
-+		if (pdev[i])
-+			platform_device_unregister(pdev[i]);
-+	}
-+exit_unregister:
-+	platform_driver_unregister(&i2c_nct6775_driver);
-+	return err;
-+}
-+
-+MODULE_AUTHOR("Adam Honse <calcprogrammer1@gmail.com>");
-+MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips");
-+MODULE_LICENSE("GPL");
-+
-+module_init(i2c_nct6775_init);
-+module_exit(i2c_nct6775_exit);
-diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
-index 6a0392172b2f..e7dd007bf6b1 100644
---- a/drivers/i2c/busses/i2c-piix4.c
-+++ b/drivers/i2c/busses/i2c-piix4.c
-@@ -568,11 +568,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter)
- 	if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */
- 		usleep_range(2000, 2100);
- 	else
--		usleep_range(250, 500);
-+		usleep_range(25, 50);
- 
- 	while ((++timeout < MAX_TIMEOUT) &&
- 	       ((temp = inb_p(SMBHSTSTS)) & 0x01))
--		usleep_range(250, 500);
-+		usleep_range(25, 50);
- 
- 	/* If the SMBus is still busy, we give up */
- 	if (timeout == MAX_TIMEOUT) {
-diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
-index 51e0c4954600..35c3ad741870 100644
---- a/drivers/input/evdev.c
-+++ b/drivers/input/evdev.c
-@@ -46,6 +46,7 @@ struct evdev_client {
- 	struct fasync_struct *fasync;
- 	struct evdev *evdev;
- 	struct list_head node;
-+	struct rcu_head rcu;
- 	enum input_clock_type clk_type;
- 	bool revoked;
- 	unsigned long *evmasks[EV_CNT];
-@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
- 	spin_unlock(&evdev->client_lock);
- }
- 
-+static void evdev_reclaim_client(struct rcu_head *rp)
-+{
-+	struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
-+	unsigned int i;
-+	for (i = 0; i < EV_CNT; ++i)
-+		bitmap_free(client->evmasks[i]);
-+	kvfree(client);
-+}
-+
- static void evdev_detach_client(struct evdev *evdev,
- 				struct evdev_client *client)
- {
- 	spin_lock(&evdev->client_lock);
- 	list_del_rcu(&client->node);
- 	spin_unlock(&evdev->client_lock);
--	synchronize_rcu();
-+	call_rcu(&client->rcu, evdev_reclaim_client);
- }
- 
- static int evdev_open_device(struct evdev *evdev)
-@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
- {
- 	struct evdev_client *client = file->private_data;
- 	struct evdev *evdev = client->evdev;
--	unsigned int i;
- 
- 	mutex_lock(&evdev->mutex);
- 
-@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
- 
- 	evdev_detach_client(evdev, client);
- 
--	for (i = 0; i < EV_CNT; ++i)
--		bitmap_free(client->evmasks[i]);
--
--	kvfree(client);
--
- 	evdev_close_device(evdev);
- 
- 	return 0;
-@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
- 
-  err_free_client:
- 	evdev_detach_client(evdev, client);
--	kvfree(client);
- 	return error;
- }
- 
-diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
-index 1b7a97cc3779..37e9e43908ab 100644
---- a/drivers/md/dm-crypt.c
-+++ b/drivers/md/dm-crypt.c
-@@ -3284,6 +3284,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
- 			goto bad;
- 	}
- 
-+#ifdef CONFIG_CACHY
-+	set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
-+	set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
-+#endif
-+
- 	ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
- 	if (ret < 0)
- 		goto bad;
-diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
-index 331b8e535e5b..80dabeebf580 100644
---- a/drivers/media/v4l2-core/Kconfig
-+++ b/drivers/media/v4l2-core/Kconfig
-@@ -40,6 +40,11 @@ config VIDEO_TUNER
- config V4L2_JPEG_HELPER
- 	tristate
- 
-+config V4L2_LOOPBACK
-+	tristate "V4L2 loopback device"
-+	help
-+	  V4L2 loopback device
-+
- # Used by drivers that need v4l2-h264.ko
- config V4L2_H264
- 	tristate
-diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
-index 2177b9d63a8f..c179507cedc4 100644
---- a/drivers/media/v4l2-core/Makefile
-+++ b/drivers/media/v4l2-core/Makefile
-@@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o
- obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o
- obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o
- 
-+obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o
-+
- obj-$(CONFIG_VIDEO_TUNER) += tuner.o
- obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o
-diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c
-new file mode 100644
-index 000000000000..25cb1beb26e5
---- /dev/null
-+++ b/drivers/media/v4l2-core/v4l2loopback.c
-@@ -0,0 +1,3184 @@
-+/* -*- c-file-style: "linux" -*- */
-+/*
-+ * v4l2loopback.c  --  video4linux2 loopback driver
-+ *
-+ * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com)
-+ * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at)
-+ * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de)
-+ * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com)
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ */
-+#include <linux/version.h>
-+#include <linux/vmalloc.h>
-+#include <linux/mm.h>
-+#include <linux/time.h>
-+#include <linux/module.h>
-+#include <linux/videodev2.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/fs.h>
-+#include <linux/capability.h>
-+#include <linux/eventpoll.h>
-+#include <media/v4l2-ioctl.h>
-+#include <media/v4l2-common.h>
-+#include <media/v4l2-device.h>
-+#include <media/v4l2-ctrls.h>
-+#include <media/v4l2-event.h>
-+
-+#include <linux/miscdevice.h>
-+#include "v4l2loopback.h"
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
-+#error This module is not supported on kernels before 4.0.0.
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-+#define strscpy strlcpy
-+#endif
-+
-+#if defined(timer_setup) && defined(from_timer)
-+#define HAVE_TIMER_SETUP
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0)
-+#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER
-+#endif
-+
-+#define V4L2LOOPBACK_VERSION_CODE                                              \
-+	KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \
-+		       V4L2LOOPBACK_VERSION_BUGFIX)
-+
-+MODULE_DESCRIPTION("V4L2 loopback video device");
-+MODULE_AUTHOR("Vasily Levin, "
-+	      "IOhannes m zmoelnig <zmoelnig@iem.at>,"
-+	      "Stefan Diewald,"
-+	      "Anton Novikov"
-+	      "et al.");
-+#ifdef SNAPSHOT_VERSION
-+MODULE_VERSION(__stringify(SNAPSHOT_VERSION));
-+#else
-+MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify(
-+	V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX));
-+#endif
-+MODULE_LICENSE("GPL");
-+
-+/*
-+ * helpers
-+ */
-+#define dprintk(fmt, args...)                                          \
-+	do {                                                           \
-+		if (debug > 0) {                                       \
-+			printk(KERN_INFO "v4l2-loopback[" __stringify( \
-+				       __LINE__) "], pid(%d):  " fmt,  \
-+			       task_pid_nr(current), ##args);          \
-+		}                                                      \
-+	} while (0)
-+
-+#define MARK()                                                             \
-+	do {                                                               \
-+		if (debug > 1) {                                           \
-+			printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \
-+			       __LINE__, __func__, task_pid_nr(current));  \
-+		}                                                          \
-+	} while (0)
-+
-+#define dprintkrw(fmt, args...)                                        \
-+	do {                                                           \
-+		if (debug > 2) {                                       \
-+			printk(KERN_INFO "v4l2-loopback[" __stringify( \
-+				       __LINE__) "], pid(%d): " fmt,   \
-+			       task_pid_nr(current), ##args);          \
-+		}                                                      \
-+	} while (0)
-+
-+static inline void v4l2l_get_timestamp(struct v4l2_buffer *b)
-+{
-+	struct timespec64 ts;
-+	ktime_get_ts64(&ts);
-+
-+	b->timestamp.tv_sec = ts.tv_sec;
-+	b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC);
-+	b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-+}
-+
-+#if BITS_PER_LONG == 32
-+#include <asm/div64.h> /* do_div() for 64bit division */
-+static inline int v4l2l_mod64(const s64 A, const u32 B)
-+{
-+	u64 a = (u64)A;
-+	u32 b = B;
-+
-+	if (A > 0)
-+		return do_div(a, b);
-+	a = -A;
-+	return -do_div(a, b);
-+}
-+#else
-+static inline int v4l2l_mod64(const s64 A, const u32 B)
-+{
-+	return A % B;
-+}
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
-+typedef unsigned __poll_t;
-+#endif
-+
-+/* module constants
-+ *  can be overridden during he build process using something like
-+ *	make KCPPFLAGS="-DMAX_DEVICES=100"
-+ */
-+
-+/* maximum number of v4l2loopback devices that can be created */
-+#ifndef MAX_DEVICES
-+#define MAX_DEVICES 8
-+#endif
-+
-+/* whether the default is to announce capabilities exclusively or not */
-+#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
-+#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0
-+#endif
-+
-+/* when a producer is considered to have gone stale */
-+#ifndef MAX_TIMEOUT
-+#define MAX_TIMEOUT (100 * 1000) /* in msecs */
-+#endif
-+
-+/* max buffers that can be mapped, actually they
-+ * are all mapped to max_buffers buffers */
-+#ifndef MAX_BUFFERS
-+#define MAX_BUFFERS 32
-+#endif
-+
-+/* module parameters */
-+static int debug = 0;
-+module_param(debug, int, S_IRUGO | S_IWUSR);
-+MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)");
-+
-+#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2
-+static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS;
-+module_param(max_buffers, int, S_IRUGO);
-+MODULE_PARM_DESC(max_buffers,
-+		 "how many buffers should be allocated [DEFAULT: " __stringify(
-+			 V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]");
-+
-+/* how many times a device can be opened
-+ * the per-module default value can be overridden on a per-device basis using
-+ * the /sys/devices interface
-+ *
-+ * note that max_openers should be at least 2 in order to get a working system:
-+ *   one opener for the producer and one opener for the consumer
-+ *   however, we leave that to the user
-+ */
-+#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10
-+static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS;
-+module_param(max_openers, int, S_IRUGO | S_IWUSR);
-+MODULE_PARM_DESC(
-+	max_openers,
-+	"how many users can open the loopback device [DEFAULT: " __stringify(
-+		V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]");
-+
-+static int devices = -1;
-+module_param(devices, int, 0);
-+MODULE_PARM_DESC(devices, "how many devices should be created");
-+
-+static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 };
-+module_param_array(video_nr, int, NULL, 0444);
-+MODULE_PARM_DESC(video_nr,
-+		 "video device numbers (-1=auto, 0=/dev/video0, etc.)");
-+
-+static char *card_label[MAX_DEVICES];
-+module_param_array(card_label, charp, NULL, 0000);
-+MODULE_PARM_DESC(card_label, "card labels for each device");
-+
-+static bool exclusive_caps[MAX_DEVICES] = {
-+	[0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
-+};
-+module_param_array(exclusive_caps, bool, NULL, 0444);
-+/* FIXXME: wording */
-+MODULE_PARM_DESC(
-+	exclusive_caps,
-+	"whether to announce OUTPUT/CAPTURE capabilities exclusively or not  [DEFAULT: " __stringify(
-+		V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]");
-+
-+/* format specifications */
-+#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2
-+#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1
-+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192
-+#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192
-+
-+#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640
-+#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480
-+
-+static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
-+module_param(max_width, int, S_IRUGO);
-+MODULE_PARM_DESC(max_width,
-+		 "maximum allowed frame width [DEFAULT: " __stringify(
-+			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]");
-+static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
-+module_param(max_height, int, S_IRUGO);
-+MODULE_PARM_DESC(max_height,
-+		 "maximum allowed frame height [DEFAULT: " __stringify(
-+			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]");
-+
-+static DEFINE_IDR(v4l2loopback_index_idr);
-+static DEFINE_MUTEX(v4l2loopback_ctl_mutex);
-+
-+/* frame intervals */
-+#define V4L2LOOPBACK_FPS_MIN 0
-+#define V4L2LOOPBACK_FPS_MAX 1000
-+
-+/* control IDs */
-+#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000)
-+#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0)
-+#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1)
-+#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2)
-+#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3)
-+
-+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl);
-+static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = {
-+	.s_ctrl = v4l2loopback_s_ctrl,
-+};
-+static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = {
-+	// clang-format off
-+	.ops	= &v4l2loopback_ctrl_ops,
-+	.id	= CID_KEEP_FORMAT,
-+	.name	= "keep_format",
-+	.type	= V4L2_CTRL_TYPE_BOOLEAN,
-+	.min	= 0,
-+	.max	= 1,
-+	.step	= 1,
-+	.def	= 0,
-+	// clang-format on
-+};
-+static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = {
-+	// clang-format off
-+	.ops	= &v4l2loopback_ctrl_ops,
-+	.id	= CID_SUSTAIN_FRAMERATE,
-+	.name	= "sustain_framerate",
-+	.type	= V4L2_CTRL_TYPE_BOOLEAN,
-+	.min	= 0,
-+	.max	= 1,
-+	.step	= 1,
-+	.def	= 0,
-+	// clang-format on
-+};
-+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = {
-+	// clang-format off
-+	.ops	= &v4l2loopback_ctrl_ops,
-+	.id	= CID_TIMEOUT,
-+	.name	= "timeout",
-+	.type	= V4L2_CTRL_TYPE_INTEGER,
-+	.min	= 0,
-+	.max	= MAX_TIMEOUT,
-+	.step	= 1,
-+	.def	= 0,
-+	// clang-format on
-+};
-+static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = {
-+	// clang-format off
-+	.ops	= &v4l2loopback_ctrl_ops,
-+	.id	= CID_TIMEOUT_IMAGE_IO,
-+	.name	= "timeout_image_io",
-+	.type	= V4L2_CTRL_TYPE_BUTTON,
-+	.min	= 0,
-+	.max	= 1,
-+	.step	= 1,
-+	.def	= 0,
-+	// clang-format on
-+};
-+
-+/* module structures */
-+struct v4l2loopback_private {
-+	int device_nr;
-+};
-+
-+/* TODO(vasaka) use typenames which are common to kernel, but first find out if
-+ * it is needed */
-+/* struct keeping state and settings of loopback device */
-+
-+struct v4l2l_buffer {
-+	struct v4l2_buffer buffer;
-+	struct list_head list_head;
-+	int use_count;
-+};
-+
-+struct v4l2_loopback_device {
-+	struct v4l2_device v4l2_dev;
-+	struct v4l2_ctrl_handler ctrl_handler;
-+	struct video_device *vdev;
-+	/* pixel and stream format */
-+	struct v4l2_pix_format pix_format;
-+	bool pix_format_has_valid_sizeimage;
-+	struct v4l2_captureparm capture_param;
-+	unsigned long frame_jiffies;
-+
-+	/* ctrls */
-+	int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all
-+			    openers close() the device */
-+	int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain
-+				  (close to) nominal framerate */
-+
-+	/* buffers stuff */
-+	u8 *image; /* pointer to actual buffers data */
-+	unsigned long int imagesize; /* size of buffers data */
-+	int buffers_number; /* should not be big, 4 is a good choice */
-+	struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */
-+	int used_buffers; /* number of the actually used buffers */
-+	int max_openers; /* how many times can this device be opened */
-+
-+	s64 write_position; /* number of last written frame + 1 */
-+	struct list_head outbufs_list; /* buffers in output DQBUF order */
-+	int bufpos2index
-+		[MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers)
-+                        * to inner buffer index */
-+	long buffer_size;
-+
-+	/* sustain_framerate stuff */
-+	struct timer_list sustain_timer;
-+	unsigned int reread_count;
-+
-+	/* timeout stuff */
-+	unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */
-+	int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will
-+			       * read/write to timeout_image */
-+	u8 *timeout_image; /* copy of it will be captured when timeout passes */
-+	struct v4l2l_buffer timeout_image_buffer;
-+	struct timer_list timeout_timer;
-+	int timeout_happened;
-+
-+	/* sync stuff */
-+	atomic_t open_count;
-+
-+	int ready_for_capture; /* set to the number of writers that opened the
-+                                * device and negotiated format. */
-+	int ready_for_output; /* set to true when no writer is currently attached
-+			       * this differs slightly from !ready_for_capture,
-+			       * e.g. when using fallback images */
-+	int active_readers; /* increase if any reader starts streaming */
-+	int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE)
-+                                * should only be announced if the resp. "ready"
-+                                * flag is set; default=TRUE */
-+
-+	int min_width, max_width;
-+	int min_height, max_height;
-+
-+	char card_label[32];
-+
-+	wait_queue_head_t read_event;
-+	spinlock_t lock, list_lock;
-+};
-+
-+/* types of opener shows what opener wants to do with loopback */
-+enum opener_type {
-+	// clang-format off
-+	UNNEGOTIATED	= 0,
-+	READER		= 1,
-+	WRITER		= 2,
-+	// clang-format on
-+};
-+
-+/* struct keeping state and type of opener */
-+struct v4l2_loopback_opener {
-+	enum opener_type type;
-+	s64 read_position; /* number of last processed frame + 1 or
-+			    * write_position - 1 if reader went out of sync */
-+	unsigned int reread_count;
-+	struct v4l2_buffer *buffers;
-+	int buffers_number; /* should not be big, 4 is a good choice */
-+	int timeout_image_io;
-+
-+	struct v4l2_fh fh;
-+};
-+
-+#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh)
-+
-+/* this is heavily inspired by the bttv driver found in the linux kernel */
-+struct v4l2l_format {
-+	char *name;
-+	int fourcc; /* video4linux 2 */
-+	int depth; /* bit/pixel */
-+	int flags;
-+};
-+/* set the v4l2l_format.flags to PLANAR for non-packed formats */
-+#define FORMAT_FLAGS_PLANAR 0x01
-+#define FORMAT_FLAGS_COMPRESSED 0x02
-+
-+#include "v4l2loopback_formats.h"
-+
-+#ifndef V4L2_TYPE_IS_CAPTURE
-+#define V4L2_TYPE_IS_CAPTURE(type)                \
-+	((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \
-+	 (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
-+#endif /* V4L2_TYPE_IS_CAPTURE */
-+#ifndef V4L2_TYPE_IS_OUTPUT
-+#define V4L2_TYPE_IS_OUTPUT(type)                \
-+	((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \
-+	 (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
-+#endif /* V4L2_TYPE_IS_OUTPUT */
-+
-+/* whether the format can be changed */
-+/* the format is fixated if we
-+   - have writers (ready_for_capture>0)
-+   - and/or have readers (active_readers>0)
-+*/
-+#define V4L2LOOPBACK_IS_FIXED_FMT(device)                               \
-+	(device->ready_for_capture > 0 || device->active_readers > 0 || \
-+	 device->keep_format)
-+
-+static const unsigned int FORMATS = ARRAY_SIZE(formats);
-+
-+static char *fourcc2str(unsigned int fourcc, char buf[4])
-+{
-+	buf[0] = (fourcc >> 0) & 0xFF;
-+	buf[1] = (fourcc >> 8) & 0xFF;
-+	buf[2] = (fourcc >> 16) & 0xFF;
-+	buf[3] = (fourcc >> 24) & 0xFF;
-+
-+	return buf;
-+}
-+
-+static const struct v4l2l_format *format_by_fourcc(int fourcc)
-+{
-+	unsigned int i;
-+
-+	for (i = 0; i < FORMATS; i++) {
-+		if (formats[i].fourcc == fourcc)
-+			return formats + i;
-+	}
-+
-+	dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF,
-+		(fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF,
-+		(fourcc >> 24) & 0xFF);
-+	return NULL;
-+}
-+
-+static void pix_format_set_size(struct v4l2_pix_format *f,
-+				const struct v4l2l_format *fmt,
-+				unsigned int width, unsigned int height)
-+{
-+	f->width = width;
-+	f->height = height;
-+
-+	if (fmt->flags & FORMAT_FLAGS_PLANAR) {
-+		f->bytesperline = width; /* Y plane */
-+		f->sizeimage = (width * height * fmt->depth) >> 3;
-+	} else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) {
-+		/* doesn't make sense for compressed formats */
-+		f->bytesperline = 0;
-+		f->sizeimage = (width * height * fmt->depth) >> 3;
-+	} else {
-+		f->bytesperline = (width * fmt->depth) >> 3;
-+		f->sizeimage = height * f->bytesperline;
-+	}
-+}
-+
-+static int v4l2l_fill_format(struct v4l2_format *fmt, int capture,
-+			     const u32 minwidth, const u32 maxwidth,
-+			     const u32 minheight, const u32 maxheight)
-+{
-+	u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height;
-+	u32 pixelformat = fmt->fmt.pix.pixelformat;
-+	struct v4l2_format fmt0 = *fmt;
-+	u32 bytesperline = 0, sizeimage = 0;
-+	if (!width)
-+		width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
-+	if (!height)
-+		height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
-+	if (width < minwidth)
-+		width = minwidth;
-+	if (width > maxwidth)
-+		width = maxwidth;
-+	if (height < minheight)
-+		height = minheight;
-+	if (height > maxheight)
-+		height = maxheight;
-+
-+	/* sets: width,height,pixelformat,bytesperline,sizeimage */
-+	if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) {
-+		fmt0.fmt.pix.bytesperline = 0;
-+		fmt0.fmt.pix.sizeimage = 0;
-+	}
-+
-+	if (0) {
-+		;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
-+	} else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width,
-+				     height)) {
-+		;
-+	} else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width,
-+					height)) {
-+		;
-+#endif
-+	} else {
-+		const struct v4l2l_format *format =
-+			format_by_fourcc(pixelformat);
-+		if (!format)
-+			return -EINVAL;
-+		pix_format_set_size(&fmt0.fmt.pix, format, width, height);
-+		fmt0.fmt.pix.pixelformat = format->fourcc;
-+	}
-+
-+	if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) {
-+		*fmt = fmt0;
-+
-+		if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) ||
-+		    (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3))
-+			fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB;
-+		if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field)
-+			fmt->fmt.pix_mp.field = V4L2_FIELD_NONE;
-+		if (capture)
-+			fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-+		else
-+			fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-+	} else {
-+		bytesperline = fmt->fmt.pix.bytesperline;
-+		sizeimage = fmt->fmt.pix.sizeimage;
-+
-+		*fmt = fmt0;
-+
-+		if (!fmt->fmt.pix.bytesperline)
-+			fmt->fmt.pix.bytesperline = bytesperline;
-+		if (!fmt->fmt.pix.sizeimage)
-+			fmt->fmt.pix.sizeimage = sizeimage;
-+
-+		if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) ||
-+		    (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3))
-+			fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
-+		if (V4L2_FIELD_ANY == fmt->fmt.pix.field)
-+			fmt->fmt.pix.field = V4L2_FIELD_NONE;
-+		if (capture)
-+			fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+		else
-+			fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+	}
-+
-+	return 0;
-+}
-+
-+/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */
-+static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt)
-+{
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
-+	const struct v4l2_format_info *info;
-+
-+	info = v4l2_format_info(fmt->fmt.pix.pixelformat);
-+	if (info && info->mem_planes == 1)
-+		return true;
-+#endif
-+
-+	return false;
-+}
-+
-+static int pix_format_eq(const struct v4l2_pix_format *ref,
-+			 const struct v4l2_pix_format *tgt, int strict)
-+{
-+	/* check if the two formats are equivalent.
-+	 * ANY fields are handled gracefully
-+	 */
-+#define _pix_format_eq0(x)    \
-+	if (ref->x != tgt->x) \
-+	result = 0
-+#define _pix_format_eq1(x, def)                              \
-+	do {                                                 \
-+		if ((def != tgt->x) && (ref->x != tgt->x)) { \
-+			printk(KERN_INFO #x " failed");      \
-+			result = 0;                          \
-+		}                                            \
-+	} while (0)
-+	int result = 1;
-+	_pix_format_eq0(width);
-+	_pix_format_eq0(height);
-+	_pix_format_eq0(pixelformat);
-+	if (!strict)
-+		return result;
-+	_pix_format_eq1(field, V4L2_FIELD_ANY);
-+	_pix_format_eq0(bytesperline);
-+	_pix_format_eq0(sizeimage);
-+	_pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT);
-+	return result;
-+}
-+
-+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f);
-+static int inner_try_setfmt(struct file *file, struct v4l2_format *fmt)
-+{
-+	int capture = V4L2_TYPE_IS_CAPTURE(fmt->type);
-+	struct v4l2_loopback_device *dev;
-+	int needschange = 0;
-+	char buf[5];
-+	buf[4] = 0;
-+
-+	dev = v4l2loopback_getdevice(file);
-+
-+	needschange = !(pix_format_eq(&dev->pix_format, &fmt->fmt.pix, 0));
-+	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
-+		fmt->fmt.pix = dev->pix_format;
-+		if (needschange) {
-+			if (dev->active_readers > 0 && capture) {
-+				/* cannot call fmt_cap while there are readers */
-+				return -EBUSY;
-+			}
-+			if (dev->ready_for_capture > 0 && !capture) {
-+				/* cannot call fmt_out while there are writers */
-+				return -EBUSY;
-+			}
-+		}
-+	}
-+	if (v4l2l_fill_format(fmt, capture, dev->min_width, dev->max_width,
-+			      dev->min_height, dev->max_height) != 0) {
-+		return -EINVAL;
-+	}
-+
-+	if (1) {
-+		char buf[5];
-+		buf[4] = 0;
-+		dprintk("capFOURCC=%s\n",
-+			fourcc2str(dev->pix_format.pixelformat, buf));
-+	}
-+	return 0;
-+}
-+
-+static int set_timeperframe(struct v4l2_loopback_device *dev,
-+			    struct v4l2_fract *tpf)
-+{
-+	if ((tpf->denominator < 1) || (tpf->numerator < 1)) {
-+		return -EINVAL;
-+	}
-+	dev->capture_param.timeperframe = *tpf;
-+	dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator /
-+					      tpf->denominator);
-+	return 0;
-+}
-+
-+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd);
-+
-+/* device attributes */
-+/* available via sysfs: /sys/devices/virtual/video4linux/video* */
-+
-+static ssize_t attr_show_format(struct device *cd,
-+				struct device_attribute *attr, char *buf)
-+{
-+	/* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */
-+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
-+	const struct v4l2_fract *tpf;
-+	char buf4cc[5], buf_fps[32];
-+
-+	if (!dev || !V4L2LOOPBACK_IS_FIXED_FMT(dev))
-+		return 0;
-+	tpf = &dev->capture_param.timeperframe;
-+
-+	fourcc2str(dev->pix_format.pixelformat, buf4cc);
-+	buf4cc[4] = 0;
-+	if (tpf->numerator == 1)
-+		snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator);
-+	else
-+		snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator,
-+			 tpf->numerator);
-+	return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width,
-+		       dev->pix_format.height, buf_fps);
-+}
-+
-+static ssize_t attr_store_format(struct device *cd,
-+				 struct device_attribute *attr, const char *buf,
-+				 size_t len)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
-+	int fps_num = 0, fps_den = 1;
-+
-+	if (!dev)
-+		return -ENODEV;
-+
-+	/* only fps changing is supported */
-+	if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) {
-+		struct v4l2_fract f = { .numerator = fps_den,
-+					.denominator = fps_num };
-+		int err = 0;
-+		if ((err = set_timeperframe(dev, &f)) < 0)
-+			return err;
-+		return len;
-+	}
-+	return -EINVAL;
-+}
-+
-+static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format,
-+		   attr_store_format);
-+
-+static ssize_t attr_show_buffers(struct device *cd,
-+				 struct device_attribute *attr, char *buf)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
-+
-+	if (!dev)
-+		return -ENODEV;
-+
-+	return sprintf(buf, "%d\n", dev->used_buffers);
-+}
-+
-+static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL);
-+
-+static ssize_t attr_show_maxopeners(struct device *cd,
-+				    struct device_attribute *attr, char *buf)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
-+
-+	if (!dev)
-+		return -ENODEV;
-+
-+	return sprintf(buf, "%d\n", dev->max_openers);
-+}
-+
-+static ssize_t attr_store_maxopeners(struct device *cd,
-+				     struct device_attribute *attr,
-+				     const char *buf, size_t len)
-+{
-+	struct v4l2_loopback_device *dev = NULL;
-+	unsigned long curr = 0;
-+
-+	if (kstrtoul(buf, 0, &curr))
-+		return -EINVAL;
-+
-+	dev = v4l2loopback_cd2dev(cd);
-+	if (!dev)
-+		return -ENODEV;
-+
-+	if (dev->max_openers == curr)
-+		return len;
-+
-+	if (curr > __INT_MAX__ || dev->open_count.counter > curr) {
-+		/* request to limit to less openers as are currently attached to us */
-+		return -EINVAL;
-+	}
-+
-+	dev->max_openers = (int)curr;
-+
-+	return len;
-+}
-+
-+static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners,
-+		   attr_store_maxopeners);
-+
-+static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr,
-+			       char *buf)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
-+
-+	if (!dev)
-+		return -ENODEV;
-+
-+	if (dev->ready_for_capture)
-+		return sprintf(buf, "capture\n");
-+	if (dev->ready_for_output)
-+		return sprintf(buf, "output\n");
-+
-+	return -EAGAIN;
-+}
-+
-+static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL);
-+
-+static void v4l2loopback_remove_sysfs(struct video_device *vdev)
-+{
-+#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x)
-+
-+	if (vdev) {
-+		V4L2_SYSFS_DESTROY(format);
-+		V4L2_SYSFS_DESTROY(buffers);
-+		V4L2_SYSFS_DESTROY(max_openers);
-+		V4L2_SYSFS_DESTROY(state);
-+		/* ... */
-+	}
-+}
-+
-+static void v4l2loopback_create_sysfs(struct video_device *vdev)
-+{
-+	int res = 0;
-+
-+#define V4L2_SYSFS_CREATE(x)                                 \
-+	res = device_create_file(&vdev->dev, &dev_attr_##x); \
-+	if (res < 0)                                         \
-+	break
-+	if (!vdev)
-+		return;
-+	do {
-+		V4L2_SYSFS_CREATE(format);
-+		V4L2_SYSFS_CREATE(buffers);
-+		V4L2_SYSFS_CREATE(max_openers);
-+		V4L2_SYSFS_CREATE(state);
-+		/* ... */
-+	} while (0);
-+
-+	if (res >= 0)
-+		return;
-+	dev_err(&vdev->dev, "%s error: %d\n", __func__, res);
-+}
-+
-+/* Event APIs */
-+
-+#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START)
-+#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000
-+#define V4L2_EVENT_PRI_CLIENT_USAGE \
-+	(V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1)
-+
-+struct v4l2_event_client_usage {
-+	__u32 count;
-+};
-+
-+/* global module data */
-+/* find a device based on it's device-number (e.g. '3' for /dev/video3) */
-+struct v4l2loopback_lookup_cb_data {
-+	int device_nr;
-+	struct v4l2_loopback_device *device;
-+};
-+static int v4l2loopback_lookup_cb(int id, void *ptr, void *data)
-+{
-+	struct v4l2_loopback_device *device = ptr;
-+	struct v4l2loopback_lookup_cb_data *cbdata = data;
-+	if (cbdata && device && device->vdev) {
-+		if (device->vdev->num == cbdata->device_nr) {
-+			cbdata->device = device;
-+			cbdata->device_nr = id;
-+			return 1;
-+		}
-+	}
-+	return 0;
-+}
-+static int v4l2loopback_lookup(int device_nr,
-+			       struct v4l2_loopback_device **device)
-+{
-+	struct v4l2loopback_lookup_cb_data data = {
-+		.device_nr = device_nr,
-+		.device = NULL,
-+	};
-+	int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb,
-+			       &data);
-+	if (1 == err) {
-+		if (device)
-+			*device = data.device;
-+		return data.device_nr;
-+	}
-+	return -ENODEV;
-+}
-+static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd)
-+{
-+	struct video_device *loopdev = to_video_device(cd);
-+	struct v4l2loopback_private *ptr =
-+		(struct v4l2loopback_private *)video_get_drvdata(loopdev);
-+	int nr = ptr->device_nr;
-+
-+	return idr_find(&v4l2loopback_index_idr, nr);
-+}
-+
-+static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f)
-+{
-+	struct v4l2loopback_private *ptr = video_drvdata(f);
-+	int nr = ptr->device_nr;
-+
-+	return idr_find(&v4l2loopback_index_idr, nr);
-+}
-+
-+/* forward declarations */
-+static void client_usage_queue_event(struct video_device *vdev);
-+static void init_buffers(struct v4l2_loopback_device *dev);
-+static int allocate_buffers(struct v4l2_loopback_device *dev);
-+static void free_buffers(struct v4l2_loopback_device *dev);
-+static void try_free_buffers(struct v4l2_loopback_device *dev);
-+static int allocate_timeout_image(struct v4l2_loopback_device *dev);
-+static void check_timers(struct v4l2_loopback_device *dev);
-+static const struct v4l2_file_operations v4l2_loopback_fops;
-+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops;
-+
-+/* Queue helpers */
-+/* next functions sets buffer flags and adjusts counters accordingly */
-+static inline void set_done(struct v4l2l_buffer *buffer)
-+{
-+	buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED;
-+	buffer->buffer.flags |= V4L2_BUF_FLAG_DONE;
-+}
-+
-+static inline void set_queued(struct v4l2l_buffer *buffer)
-+{
-+	buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE;
-+	buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED;
-+}
-+
-+static inline void unset_flags(struct v4l2l_buffer *buffer)
-+{
-+	buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED;
-+	buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE;
-+}
-+
-+/* V4L2 ioctl caps and params calls */
-+/* returns device capabilities
-+ * called on VIDIOC_QUERYCAP
-+ */
-+static int vidioc_querycap(struct file *file, void *priv,
-+			   struct v4l2_capability *cap)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	int device_nr =
-+		((struct v4l2loopback_private *)video_get_drvdata(dev->vdev))
-+			->device_nr;
-+	__u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE;
-+
-+	strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver));
-+	snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label);
-+	snprintf(cap->bus_info, sizeof(cap->bus_info),
-+		 "platform:v4l2loopback-%03d", device_nr);
-+
-+	if (dev->announce_all_caps) {
-+		capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT;
-+	} else {
-+		if (dev->ready_for_capture) {
-+			capabilities |= V4L2_CAP_VIDEO_CAPTURE;
-+		}
-+		if (dev->ready_for_output) {
-+			capabilities |= V4L2_CAP_VIDEO_OUTPUT;
-+		}
-+	}
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
-+	dev->vdev->device_caps =
-+#endif /* >=linux-4.7.0 */
-+		cap->device_caps = cap->capabilities = capabilities;
-+
-+	cap->capabilities |= V4L2_CAP_DEVICE_CAPS;
-+
-+	memset(cap->reserved, 0, sizeof(cap->reserved));
-+	return 0;
-+}
-+
-+static int vidioc_enum_framesizes(struct file *file, void *fh,
-+				  struct v4l2_frmsizeenum *argp)
-+{
-+	struct v4l2_loopback_device *dev;
-+
-+	/* there can be only one... */
-+	if (argp->index)
-+		return -EINVAL;
-+
-+	dev = v4l2loopback_getdevice(file);
-+	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
-+		/* format has already been negotiated
-+		 * cannot change during runtime
-+		 */
-+		if (argp->pixel_format != dev->pix_format.pixelformat)
-+			return -EINVAL;
-+
-+		argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
-+
-+		argp->discrete.width = dev->pix_format.width;
-+		argp->discrete.height = dev->pix_format.height;
-+	} else {
-+		/* if the format has not been negotiated yet, we accept anything
-+		 */
-+		if (NULL == format_by_fourcc(argp->pixel_format))
-+			return -EINVAL;
-+
-+		if (dev->min_width == dev->max_width &&
-+		    dev->min_height == dev->max_height) {
-+			argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
-+
-+			argp->discrete.width = dev->min_width;
-+			argp->discrete.height = dev->min_height;
-+		} else {
-+			argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
-+
-+			argp->stepwise.min_width = dev->min_width;
-+			argp->stepwise.min_height = dev->min_height;
-+
-+			argp->stepwise.max_width = dev->max_width;
-+			argp->stepwise.max_height = dev->max_height;
-+
-+			argp->stepwise.step_width = 1;
-+			argp->stepwise.step_height = 1;
-+		}
-+	}
-+	return 0;
-+}
-+
-+/* returns frameinterval (fps) for the set resolution
-+ * called on VIDIOC_ENUM_FRAMEINTERVALS
-+ */
-+static int vidioc_enum_frameintervals(struct file *file, void *fh,
-+				      struct v4l2_frmivalenum *argp)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+
-+	/* there can be only one... */
-+	if (argp->index)
-+		return -EINVAL;
-+
-+	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
-+		if (argp->width != dev->pix_format.width ||
-+		    argp->height != dev->pix_format.height ||
-+		    argp->pixel_format != dev->pix_format.pixelformat)
-+			return -EINVAL;
-+
-+		argp->type = V4L2_FRMIVAL_TYPE_DISCRETE;
-+		argp->discrete = dev->capture_param.timeperframe;
-+	} else {
-+		if (argp->width < dev->min_width ||
-+		    argp->width > dev->max_width ||
-+		    argp->height < dev->min_height ||
-+		    argp->height > dev->max_height ||
-+		    NULL == format_by_fourcc(argp->pixel_format))
-+			return -EINVAL;
-+
-+		argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
-+		argp->stepwise.min.numerator = 1;
-+		argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX;
-+		argp->stepwise.max.numerator = 1;
-+		argp->stepwise.max.denominator = V4L2LOOPBACK_FPS_MIN;
-+		argp->stepwise.step.numerator = 1;
-+		argp->stepwise.step.denominator = 1;
-+	}
-+
-+	return 0;
-+}
-+
-+/* ------------------ CAPTURE ----------------------- */
-+
-+/* returns device formats
-+ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
-+ */
-+static int vidioc_enum_fmt_cap(struct file *file, void *fh,
-+			       struct v4l2_fmtdesc *f)
-+{
-+	struct v4l2_loopback_device *dev;
-+	const struct v4l2l_format *fmt;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+
-+	if (f->index)
-+		return -EINVAL;
-+
-+	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
-+		/* format has been fixed, so only one single format is supported */
-+		const __u32 format = dev->pix_format.pixelformat;
-+
-+		if ((fmt = format_by_fourcc(format))) {
-+			snprintf(f->description, sizeof(f->description), "%s",
-+				 fmt->name);
-+		} else {
-+			snprintf(f->description, sizeof(f->description),
-+				 "[%c%c%c%c]", (format >> 0) & 0xFF,
-+				 (format >> 8) & 0xFF, (format >> 16) & 0xFF,
-+				 (format >> 24) & 0xFF);
-+		}
-+
-+		f->pixelformat = dev->pix_format.pixelformat;
-+	} else {
-+		return -EINVAL;
-+	}
-+	f->flags = 0;
-+	MARK();
-+	return 0;
-+}
-+
-+/* returns current video format
-+ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
-+ */
-+static int vidioc_g_fmt_cap(struct file *file, void *priv,
-+			    struct v4l2_format *fmt)
-+{
-+	struct v4l2_loopback_device *dev;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	if (!dev->ready_for_capture && !dev->ready_for_output)
-+		return -EINVAL;
-+
-+	fmt->fmt.pix = dev->pix_format;
-+	MARK();
-+	return 0;
-+}
-+
-+/* checks if it is OK to change to format fmt;
-+ * actual check is done by inner_try_setfmt
-+ * just checking that pixelformat is OK and set other parameters, app should
-+ * obey this decision
-+ * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
-+ */
-+static int vidioc_try_fmt_cap(struct file *file, void *priv,
-+			      struct v4l2_format *fmt)
-+{
-+	int ret = 0;
-+	if (!V4L2_TYPE_IS_CAPTURE(fmt->type))
-+		return -EINVAL;
-+	ret = inner_try_setfmt(file, fmt);
-+	if (-EBUSY == ret)
-+		return 0;
-+	return ret;
-+}
-+
-+/* sets new output format, if possible
-+ * actually format is set  by input and we even do not check it, just return
-+ * current one, but it is possible to set subregions of input TODO(vasaka)
-+ * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
-+ */
-+static int vidioc_s_fmt_cap(struct file *file, void *priv,
-+			    struct v4l2_format *fmt)
-+{
-+	int ret;
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	if (!V4L2_TYPE_IS_CAPTURE(fmt->type))
-+		return -EINVAL;
-+	ret = inner_try_setfmt(file, fmt);
-+	if (!ret) {
-+		dev->pix_format = fmt->fmt.pix;
-+	}
-+	return ret;
-+}
-+
-+/* ------------------ OUTPUT ----------------------- */
-+
-+/* returns device formats;
-+ * LATER: allow all formats
-+ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
-+ */
-+static int vidioc_enum_fmt_out(struct file *file, void *fh,
-+			       struct v4l2_fmtdesc *f)
-+{
-+	struct v4l2_loopback_device *dev;
-+	const struct v4l2l_format *fmt;
-+
-+	dev = v4l2loopback_getdevice(file);
-+
-+	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
-+		/* format has been fixed, so only one single format is supported */
-+		const __u32 format = dev->pix_format.pixelformat;
-+
-+		if (f->index)
-+			return -EINVAL;
-+
-+		if ((fmt = format_by_fourcc(format))) {
-+			snprintf(f->description, sizeof(f->description), "%s",
-+				 fmt->name);
-+		} else {
-+			snprintf(f->description, sizeof(f->description),
-+				 "[%c%c%c%c]", (format >> 0) & 0xFF,
-+				 (format >> 8) & 0xFF, (format >> 16) & 0xFF,
-+				 (format >> 24) & 0xFF);
-+		}
-+
-+		f->pixelformat = dev->pix_format.pixelformat;
-+	} else {
-+		/* fill in a dummy format */
-+		/* coverity[unsigned_compare] */
-+		if (f->index < 0 || f->index >= FORMATS)
-+			return -EINVAL;
-+
-+		fmt = &formats[f->index];
-+
-+		f->pixelformat = fmt->fourcc;
-+		snprintf(f->description, sizeof(f->description), "%s",
-+			 fmt->name);
-+	}
-+	f->flags = 0;
-+
-+	return 0;
-+}
-+
-+/* returns current video format format fmt */
-+/* NOTE: this is called from the producer
-+ * so if format has not been negotiated yet,
-+ * it should return ALL of available formats,
-+ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
-+ */
-+static int vidioc_g_fmt_out(struct file *file, void *priv,
-+			    struct v4l2_format *fmt)
-+{
-+	struct v4l2_loopback_device *dev;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+
-+	/*
-+	 * LATER: this should return the currently valid format
-+	 * gstreamer doesn't like it, if this returns -EINVAL, as it
-+	 * then concludes that there is _no_ valid format
-+	 * CHECK whether this assumption is wrong,
-+	 * or whether we have to always provide a valid format
-+	 */
-+
-+	fmt->fmt.pix = dev->pix_format;
-+	return 0;
-+}
-+
-+/* checks if it is OK to change to format fmt;
-+ * if format is negotiated do not change it
-+ * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
-+ */
-+static int vidioc_try_fmt_out(struct file *file, void *priv,
-+			      struct v4l2_format *fmt)
-+{
-+	int ret = 0;
-+	if (!V4L2_TYPE_IS_OUTPUT(fmt->type))
-+		return -EINVAL;
-+	ret = inner_try_setfmt(file, fmt);
-+	if (-EBUSY == ret)
-+		return 0;
-+	return ret;
-+}
-+
-+/* sets new output format, if possible;
-+ * allocate data here because we do not know if it will be streaming or
-+ * read/write IO
-+ * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
-+ */
-+static int vidioc_s_fmt_out(struct file *file, void *priv,
-+			    struct v4l2_format *fmt)
-+{
-+	struct v4l2_loopback_device *dev;
-+	int ret;
-+	char buf[5];
-+	buf[4] = 0;
-+	if (!V4L2_TYPE_IS_OUTPUT(fmt->type))
-+		return -EINVAL;
-+	dev = v4l2loopback_getdevice(file);
-+
-+	ret = inner_try_setfmt(file, fmt);
-+	if (!ret) {
-+		dev->pix_format = fmt->fmt.pix;
-+		dev->pix_format_has_valid_sizeimage =
-+			v4l2l_pix_format_has_valid_sizeimage(fmt);
-+		dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture,
-+			dev->pix_format.sizeimage);
-+		dprintk("outFOURCC=%s\n",
-+			fourcc2str(dev->pix_format.pixelformat, buf));
-+
-+		if (!dev->ready_for_capture) {
-+			dev->buffer_size =
-+				PAGE_ALIGN(dev->pix_format.sizeimage);
-+			// JMZ: TODO get rid of the next line
-+			fmt->fmt.pix.sizeimage = dev->buffer_size;
-+			ret = allocate_buffers(dev);
-+		}
-+	}
-+	return ret;
-+}
-+
-+// #define V4L2L_OVERLAY
-+#ifdef V4L2L_OVERLAY
-+/* ------------------ OVERLAY ----------------------- */
-+/* currently unsupported */
-+/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work
-+ * while it should only require it, if overlay is requested
-+ * once the gstreamer element is fixed, remove the overlay dummies
-+ */
-+#warning OVERLAY dummies
-+static int vidioc_g_fmt_overlay(struct file *file, void *priv,
-+				struct v4l2_format *fmt)
-+{
-+	return 0;
-+}
-+
-+static int vidioc_s_fmt_overlay(struct file *file, void *priv,
-+				struct v4l2_format *fmt)
-+{
-+	return 0;
-+}
-+#endif /* V4L2L_OVERLAY */
-+
-+/* ------------------ PARAMs ----------------------- */
-+
-+/* get some data flow parameters, only capability, fps and readbuffers has
-+ * effect on this driver
-+ * called on VIDIOC_G_PARM
-+ */
-+static int vidioc_g_parm(struct file *file, void *priv,
-+			 struct v4l2_streamparm *parm)
-+{
-+	/* do not care about type of opener, hope these enums would always be
-+	 * compatible */
-+	struct v4l2_loopback_device *dev;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	parm->parm.capture = dev->capture_param;
-+	return 0;
-+}
-+
-+/* get some data flow parameters, only capability, fps and readbuffers has
-+ * effect on this driver
-+ * called on VIDIOC_S_PARM
-+ */
-+static int vidioc_s_parm(struct file *file, void *priv,
-+			 struct v4l2_streamparm *parm)
-+{
-+	struct v4l2_loopback_device *dev;
-+	int err = 0;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	dprintk("vidioc_s_parm called frate=%d/%d\n",
-+		parm->parm.capture.timeperframe.numerator,
-+		parm->parm.capture.timeperframe.denominator);
-+
-+	switch (parm->type) {
-+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-+		if ((err = set_timeperframe(
-+			     dev, &parm->parm.capture.timeperframe)) < 0)
-+			return err;
-+		break;
-+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
-+		if ((err = set_timeperframe(
-+			     dev, &parm->parm.capture.timeperframe)) < 0)
-+			return err;
-+		break;
-+	default:
-+		return -1;
-+	}
-+
-+	parm->parm.capture = dev->capture_param;
-+	return 0;
-+}
-+
-+#ifdef V4L2LOOPBACK_WITH_STD
-+/* sets a tv standard, actually we do not need to handle this any special way
-+ * added to support effecttv
-+ * called on VIDIOC_S_STD
-+ */
-+static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std)
-+{
-+	v4l2_std_id req_std = 0, supported_std = 0;
-+	const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0;
-+
-+	if (_std) {
-+		req_std = *_std;
-+		*_std = all_std;
-+	}
-+
-+	/* we support everything in V4L2_STD_ALL, but not more... */
-+	supported_std = (all_std & req_std);
-+	if (no_std == supported_std)
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+/* gets a fake video standard
-+ * called on VIDIOC_G_STD
-+ */
-+static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm)
-+{
-+	if (norm)
-+		*norm = V4L2_STD_ALL;
-+	return 0;
-+}
-+/* gets a fake video standard
-+ * called on VIDIOC_QUERYSTD
-+ */
-+static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm)
-+{
-+	if (norm)
-+		*norm = V4L2_STD_ALL;
-+	return 0;
-+}
-+#endif /* V4L2LOOPBACK_WITH_STD */
-+
-+static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id,
-+				 s64 val)
-+{
-+	switch (id) {
-+	case CID_KEEP_FORMAT:
-+		if (val < 0 || val > 1)
-+			return -EINVAL;
-+		dev->keep_format = val;
-+		try_free_buffers(
-+			dev); /* will only free buffers if !keep_format */
-+		break;
-+	case CID_SUSTAIN_FRAMERATE:
-+		if (val < 0 || val > 1)
-+			return -EINVAL;
-+		spin_lock_bh(&dev->lock);
-+		dev->sustain_framerate = val;
-+		check_timers(dev);
-+		spin_unlock_bh(&dev->lock);
-+		break;
-+	case CID_TIMEOUT:
-+		if (val < 0 || val > MAX_TIMEOUT)
-+			return -EINVAL;
-+		spin_lock_bh(&dev->lock);
-+		dev->timeout_jiffies = msecs_to_jiffies(val);
-+		check_timers(dev);
-+		spin_unlock_bh(&dev->lock);
-+		allocate_timeout_image(dev);
-+		break;
-+	case CID_TIMEOUT_IMAGE_IO:
-+		dev->timeout_image_io = 1;
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
-+	return 0;
-+}
-+
-+static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl)
-+{
-+	struct v4l2_loopback_device *dev = container_of(
-+		ctrl->handler, struct v4l2_loopback_device, ctrl_handler);
-+	return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val);
-+}
-+
-+/* returns set of device outputs, in our case there is only one
-+ * called on VIDIOC_ENUMOUTPUT
-+ */
-+static int vidioc_enum_output(struct file *file, void *fh,
-+			      struct v4l2_output *outp)
-+{
-+	__u32 index = outp->index;
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	MARK();
-+
-+	if (!dev->announce_all_caps && !dev->ready_for_output)
-+		return -ENOTTY;
-+
-+	if (0 != index)
-+		return -EINVAL;
-+
-+	/* clear all data (including the reserved fields) */
-+	memset(outp, 0, sizeof(*outp));
-+
-+	outp->index = index;
-+	strscpy(outp->name, "loopback in", sizeof(outp->name));
-+	outp->type = V4L2_OUTPUT_TYPE_ANALOG;
-+	outp->audioset = 0;
-+	outp->modulator = 0;
-+#ifdef V4L2LOOPBACK_WITH_STD
-+	outp->std = V4L2_STD_ALL;
-+#ifdef V4L2_OUT_CAP_STD
-+	outp->capabilities |= V4L2_OUT_CAP_STD;
-+#endif /*  V4L2_OUT_CAP_STD */
-+#endif /* V4L2LOOPBACK_WITH_STD */
-+
-+	return 0;
-+}
-+
-+/* which output is currently active,
-+ * called on VIDIOC_G_OUTPUT
-+ */
-+static int vidioc_g_output(struct file *file, void *fh, unsigned int *i)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	if (!dev->announce_all_caps && !dev->ready_for_output)
-+		return -ENOTTY;
-+	if (i)
-+		*i = 0;
-+	return 0;
-+}
-+
-+/* set output, can make sense if we have more than one video src,
-+ * called on VIDIOC_S_OUTPUT
-+ */
-+static int vidioc_s_output(struct file *file, void *fh, unsigned int i)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	if (!dev->announce_all_caps && !dev->ready_for_output)
-+		return -ENOTTY;
-+
-+	if (i)
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+/* returns set of device inputs, in our case there is only one,
-+ * but later I may add more
-+ * called on VIDIOC_ENUMINPUT
-+ */
-+static int vidioc_enum_input(struct file *file, void *fh,
-+			     struct v4l2_input *inp)
-+{
-+	struct v4l2_loopback_device *dev;
-+	__u32 index = inp->index;
-+	MARK();
-+
-+	if (0 != index)
-+		return -EINVAL;
-+
-+	/* clear all data (including the reserved fields) */
-+	memset(inp, 0, sizeof(*inp));
-+
-+	inp->index = index;
-+	strscpy(inp->name, "loopback", sizeof(inp->name));
-+	inp->type = V4L2_INPUT_TYPE_CAMERA;
-+	inp->audioset = 0;
-+	inp->tuner = 0;
-+	inp->status = 0;
-+
-+#ifdef V4L2LOOPBACK_WITH_STD
-+	inp->std = V4L2_STD_ALL;
-+#ifdef V4L2_IN_CAP_STD
-+	inp->capabilities |= V4L2_IN_CAP_STD;
-+#endif
-+#endif /* V4L2LOOPBACK_WITH_STD */
-+
-+	dev = v4l2loopback_getdevice(file);
-+	if (!dev->ready_for_capture) {
-+		inp->status |= V4L2_IN_ST_NO_SIGNAL;
-+	}
-+
-+	return 0;
-+}
-+
-+/* which input is currently active,
-+ * called on VIDIOC_G_INPUT
-+ */
-+static int vidioc_g_input(struct file *file, void *fh, unsigned int *i)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	if (!dev->announce_all_caps && !dev->ready_for_capture)
-+		return -ENOTTY;
-+	if (i)
-+		*i = 0;
-+	return 0;
-+}
-+
-+/* set input, can make sense if we have more than one video src,
-+ * called on VIDIOC_S_INPUT
-+ */
-+static int vidioc_s_input(struct file *file, void *fh, unsigned int i)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	if (!dev->announce_all_caps && !dev->ready_for_capture)
-+		return -ENOTTY;
-+	if (i == 0)
-+		return 0;
-+	return -EINVAL;
-+}
-+
-+/* --------------- V4L2 ioctl buffer related calls ----------------- */
-+
-+/* negotiate buffer type
-+ * only mmap streaming supported
-+ * called on VIDIOC_REQBUFS
-+ */
-+static int vidioc_reqbufs(struct file *file, void *fh,
-+			  struct v4l2_requestbuffers *b)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	int i;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+
-+	dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count,
-+		dev->buffers_number);
-+
-+	if (opener->timeout_image_io) {
-+		dev->timeout_image_io = 0;
-+		if (b->memory != V4L2_MEMORY_MMAP)
-+			return -EINVAL;
-+		b->count = 2;
-+		return 0;
-+	}
-+
-+	if (V4L2_TYPE_IS_OUTPUT(b->type) && (!dev->ready_for_output)) {
-+		return -EBUSY;
-+	}
-+
-+	init_buffers(dev);
-+	switch (b->memory) {
-+	case V4L2_MEMORY_MMAP:
-+		/* do nothing here, buffers are always allocated */
-+		if (b->count < 1 || dev->buffers_number < 1)
-+			return 0;
-+
-+		if (b->count > dev->buffers_number)
-+			b->count = dev->buffers_number;
-+
-+		/* make sure that outbufs_list contains buffers from 0 to used_buffers-1
-+		 * actually, it will have been already populated via v4l2_loopback_init()
-+		 * at this point */
-+		if (list_empty(&dev->outbufs_list)) {
-+			for (i = 0; i < dev->used_buffers; ++i)
-+				list_add_tail(&dev->buffers[i].list_head,
-+					      &dev->outbufs_list);
-+		}
-+
-+		/* also, if dev->used_buffers is going to be decreased, we should remove
-+		 * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */
-+		if (b->count < dev->used_buffers) {
-+			struct v4l2l_buffer *pos, *n;
-+
-+			list_for_each_entry_safe(pos, n, &dev->outbufs_list,
-+						 list_head) {
-+				if (pos->buffer.index >= b->count)
-+					list_del(&pos->list_head);
-+			}
-+
-+			/* after we update dev->used_buffers, buffers in outbufs_list will
-+			 * correspond to dev->write_position + [0;b->count-1] range */
-+			i = v4l2l_mod64(dev->write_position, b->count);
-+			list_for_each_entry(pos, &dev->outbufs_list,
-+					    list_head) {
-+				dev->bufpos2index[i % b->count] =
-+					pos->buffer.index;
-+				++i;
-+			}
-+		}
-+
-+		opener->buffers_number = b->count;
-+		if (opener->buffers_number < dev->used_buffers)
-+			dev->used_buffers = opener->buffers_number;
-+		return 0;
-+	default:
-+		return -EINVAL;
-+	}
-+}
-+
-+/* returns buffer asked for;
-+ * give app as many buffers as it wants, if it less than MAX,
-+ * but map them in our inner buffers
-+ * called on VIDIOC_QUERYBUF
-+ */
-+static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b)
-+{
-+	enum v4l2_buf_type type;
-+	int index;
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+
-+	MARK();
-+
-+	type = b->type;
-+	index = b->index;
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+
-+	if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
-+	    (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) {
-+		return -EINVAL;
-+	}
-+	if (b->index > max_buffers)
-+		return -EINVAL;
-+
-+	if (opener->timeout_image_io)
-+		*b = dev->timeout_image_buffer.buffer;
-+	else
-+		*b = dev->buffers[b->index % dev->used_buffers].buffer;
-+
-+	b->type = type;
-+	b->index = index;
-+	dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory,
-+		  dev->buffers_number, dev->buffer_size);
-+
-+	/*  Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture'
-+            https://github.com/umlaeute/v4l2loopback/issues/60 */
-+	b->flags &= ~V4L2_BUF_FLAG_DONE;
-+	b->flags |= V4L2_BUF_FLAG_QUEUED;
-+
-+	return 0;
-+}
-+
-+static void buffer_written(struct v4l2_loopback_device *dev,
-+			   struct v4l2l_buffer *buf)
-+{
-+	del_timer_sync(&dev->sustain_timer);
-+	del_timer_sync(&dev->timeout_timer);
-+
-+	spin_lock_bh(&dev->list_lock);
-+	list_move_tail(&buf->list_head, &dev->outbufs_list);
-+	spin_unlock_bh(&dev->list_lock);
-+
-+	spin_lock_bh(&dev->lock);
-+	dev->bufpos2index[v4l2l_mod64(dev->write_position, dev->used_buffers)] =
-+		buf->buffer.index;
-+	++dev->write_position;
-+	dev->reread_count = 0;
-+
-+	check_timers(dev);
-+	spin_unlock_bh(&dev->lock);
-+}
-+
-+/* put buffer to queue
-+ * called on VIDIOC_QBUF
-+ */
-+static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	struct v4l2l_buffer *b;
-+	int index;
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+
-+	if (buf->index > max_buffers)
-+		return -EINVAL;
-+	if (opener->timeout_image_io)
-+		return 0;
-+
-+	index = buf->index % dev->used_buffers;
-+	b = &dev->buffers[index];
-+
-+	switch (buf->type) {
-+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-+		dprintkrw(
-+			"qbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
-+			index, buf->index, buf, buf->type, buf->bytesused,
-+			buf->length, buf->flags, buf->field,
-+			(long long)buf->timestamp.tv_sec,
-+			(long int)buf->timestamp.tv_usec, buf->sequence);
-+		set_queued(b);
-+		return 0;
-+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
-+		dprintkrw(
-+			"qbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
-+			index, buf->index, buf, buf->type, buf->bytesused,
-+			buf->length, buf->flags, buf->field,
-+			(long long)buf->timestamp.tv_sec,
-+			(long int)buf->timestamp.tv_usec, buf->sequence);
-+		if ((!(b->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY)) &&
-+		    (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0))
-+			v4l2l_get_timestamp(&b->buffer);
-+		else {
-+			b->buffer.timestamp = buf->timestamp;
-+			b->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
-+		}
-+		if (dev->pix_format_has_valid_sizeimage) {
-+			if (buf->bytesused >= dev->pix_format.sizeimage) {
-+				b->buffer.bytesused = dev->pix_format.sizeimage;
-+			} else {
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
-+				dev_warn_ratelimited(
-+					&dev->vdev->dev,
-+#else
-+				dprintkrw(
-+#endif
-+					"warning queued output buffer bytesused too small %d < %d\n",
-+					buf->bytesused,
-+					dev->pix_format.sizeimage);
-+				b->buffer.bytesused = buf->bytesused;
-+			}
-+		} else {
-+			b->buffer.bytesused = buf->bytesused;
-+		}
-+
-+		set_done(b);
-+		buffer_written(dev, b);
-+
-+		/*  Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture'
-+                    https://github.com/umlaeute/v4l2loopback/issues/60 */
-+		buf->flags &= ~V4L2_BUF_FLAG_DONE;
-+		buf->flags |= V4L2_BUF_FLAG_QUEUED;
-+
-+		wake_up_all(&dev->read_event);
-+		return 0;
-+	default:
-+		return -EINVAL;
-+	}
-+}
-+
-+static int can_read(struct v4l2_loopback_device *dev,
-+		    struct v4l2_loopback_opener *opener)
-+{
-+	int ret;
-+
-+	spin_lock_bh(&dev->lock);
-+	check_timers(dev);
-+	ret = dev->write_position > opener->read_position ||
-+	      dev->reread_count > opener->reread_count || dev->timeout_happened;
-+	spin_unlock_bh(&dev->lock);
-+	return ret;
-+}
-+
-+static int get_capture_buffer(struct file *file)
-+{
-+	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
-+	struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data);
-+	int pos, ret;
-+	int timeout_happened;
-+
-+	if ((file->f_flags & O_NONBLOCK) &&
-+	    (dev->write_position <= opener->read_position &&
-+	     dev->reread_count <= opener->reread_count &&
-+	     !dev->timeout_happened))
-+		return -EAGAIN;
-+	wait_event_interruptible(dev->read_event, can_read(dev, opener));
-+
-+	spin_lock_bh(&dev->lock);
-+	if (dev->write_position == opener->read_position) {
-+		if (dev->reread_count > opener->reread_count + 2)
-+			opener->reread_count = dev->reread_count - 1;
-+		++opener->reread_count;
-+		pos = v4l2l_mod64(opener->read_position + dev->used_buffers - 1,
-+				  dev->used_buffers);
-+	} else {
-+		opener->reread_count = 0;
-+		if (dev->write_position >
-+		    opener->read_position + dev->used_buffers)
-+			opener->read_position = dev->write_position - 1;
-+		pos = v4l2l_mod64(opener->read_position, dev->used_buffers);
-+		++opener->read_position;
-+	}
-+	timeout_happened = dev->timeout_happened;
-+	dev->timeout_happened = 0;
-+	spin_unlock_bh(&dev->lock);
-+
-+	ret = dev->bufpos2index[pos];
-+	if (timeout_happened) {
-+		if (ret < 0) {
-+			dprintk("trying to return not mapped buf[%d]\n", ret);
-+			return -EFAULT;
-+		}
-+		/* although allocated on-demand, timeout_image is freed only
-+		 * in free_buffers(), so we don't need to worry about it being
-+		 * deallocated suddenly */
-+		memcpy(dev->image + dev->buffers[ret].buffer.m.offset,
-+		       dev->timeout_image, dev->buffer_size);
-+	}
-+	return ret;
-+}
-+
-+/* put buffer to dequeue
-+ * called on VIDIOC_DQBUF
-+ */
-+static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	int index;
-+	struct v4l2l_buffer *b;
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+	if (opener->timeout_image_io) {
-+		*buf = dev->timeout_image_buffer.buffer;
-+		return 0;
-+	}
-+
-+	switch (buf->type) {
-+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-+		index = get_capture_buffer(file);
-+		if (index < 0)
-+			return index;
-+		dprintkrw("capture DQBUF pos: %lld index: %d\n",
-+			  (long long)(opener->read_position - 1), index);
-+		if (!(dev->buffers[index].buffer.flags &
-+		      V4L2_BUF_FLAG_MAPPED)) {
-+			dprintk("trying to return not mapped buf[%d]\n", index);
-+			return -EINVAL;
-+		}
-+		unset_flags(&dev->buffers[index]);
-+		*buf = dev->buffers[index].buffer;
-+		dprintkrw(
-+			"dqbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
-+			index, buf->index, buf, buf->type, buf->bytesused,
-+			buf->length, buf->flags, buf->field,
-+			(long long)buf->timestamp.tv_sec,
-+			(long int)buf->timestamp.tv_usec, buf->sequence);
-+		return 0;
-+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
-+		spin_lock_bh(&dev->list_lock);
-+
-+		b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer,
-+			       list_head);
-+		list_move_tail(&b->list_head, &dev->outbufs_list);
-+
-+		spin_unlock_bh(&dev->list_lock);
-+		dprintkrw("output DQBUF index: %d\n", b->buffer.index);
-+		unset_flags(b);
-+		*buf = b->buffer;
-+		buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+		dprintkrw(
-+			"dqbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
-+			index, buf->index, buf, buf->type, buf->bytesused,
-+			buf->length, buf->flags, buf->field,
-+			(long long)buf->timestamp.tv_sec,
-+			(long int)buf->timestamp.tv_usec, buf->sequence);
-+		return 0;
-+	default:
-+		return -EINVAL;
-+	}
-+}
-+
-+/* ------------- STREAMING ------------------- */
-+
-+/* start streaming
-+ * called on VIDIOC_STREAMON
-+ */
-+static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+
-+	switch (type) {
-+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
-+		if (!dev->ready_for_capture) {
-+			int ret = allocate_buffers(dev);
-+			if (ret < 0)
-+				return ret;
-+		}
-+		opener->type = WRITER;
-+		dev->ready_for_output = 0;
-+		dev->ready_for_capture++;
-+		return 0;
-+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-+		if (!dev->ready_for_capture)
-+			return -EIO;
-+		if (dev->active_readers > 0)
-+			return -EBUSY;
-+		opener->type = READER;
-+		dev->active_readers++;
-+		client_usage_queue_event(dev->vdev);
-+		return 0;
-+	default:
-+		return -EINVAL;
-+	}
-+	return -EINVAL;
-+}
-+
-+/* stop streaming
-+ * called on VIDIOC_STREAMOFF
-+ */
-+static int vidioc_streamoff(struct file *file, void *fh,
-+			    enum v4l2_buf_type type)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+
-+	MARK();
-+	dprintk("%d\n", type);
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(fh);
-+	switch (type) {
-+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
-+		if (dev->ready_for_capture > 0)
-+			dev->ready_for_capture--;
-+		return 0;
-+	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-+		if (opener->type == READER) {
-+			opener->type = 0;
-+			dev->active_readers--;
-+			client_usage_queue_event(dev->vdev);
-+		}
-+		return 0;
-+	default:
-+		return -EINVAL;
-+	}
-+	return -EINVAL;
-+}
-+
-+#ifdef CONFIG_VIDEO_V4L1_COMPAT
-+static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p)
-+{
-+	struct v4l2_loopback_device *dev;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	p->frames = dev->buffers_number;
-+	p->offsets[0] = 0;
-+	p->offsets[1] = 0;
-+	p->size = dev->buffer_size;
-+	return 0;
-+}
-+#endif
-+
-+static void client_usage_queue_event(struct video_device *vdev)
-+{
-+	struct v4l2_event ev;
-+	struct v4l2_loopback_device *dev;
-+
-+	dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device,
-+			   v4l2_dev);
-+
-+	memset(&ev, 0, sizeof(ev));
-+	ev.type = V4L2_EVENT_PRI_CLIENT_USAGE;
-+	((struct v4l2_event_client_usage *)&ev.u)->count = dev->active_readers;
-+
-+	v4l2_event_queue(vdev, &ev);
-+}
-+
-+static int client_usage_ops_add(struct v4l2_subscribed_event *sev,
-+				unsigned elems)
-+{
-+	if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL))
-+		return 0;
-+
-+	client_usage_queue_event(sev->fh->vdev);
-+	return 0;
-+}
-+
-+static void client_usage_ops_replace(struct v4l2_event *old,
-+				     const struct v4l2_event *new)
-+{
-+	*((struct v4l2_event_client_usage *)&old->u) =
-+		*((struct v4l2_event_client_usage *)&new->u);
-+}
-+
-+static void client_usage_ops_merge(const struct v4l2_event *old,
-+				   struct v4l2_event *new)
-+{
-+	*((struct v4l2_event_client_usage *)&new->u) =
-+		*((struct v4l2_event_client_usage *)&old->u);
-+}
-+
-+const struct v4l2_subscribed_event_ops client_usage_ops = {
-+	.add = client_usage_ops_add,
-+	.replace = client_usage_ops_replace,
-+	.merge = client_usage_ops_merge,
-+};
-+
-+static int vidioc_subscribe_event(struct v4l2_fh *fh,
-+				  const struct v4l2_event_subscription *sub)
-+{
-+	switch (sub->type) {
-+	case V4L2_EVENT_CTRL:
-+		return v4l2_ctrl_subscribe_event(fh, sub);
-+	case V4L2_EVENT_PRI_CLIENT_USAGE:
-+		return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops);
-+	}
-+
-+	return -EINVAL;
-+}
-+
-+/* file operations */
-+static void vm_open(struct vm_area_struct *vma)
-+{
-+	struct v4l2l_buffer *buf;
-+	MARK();
-+
-+	buf = vma->vm_private_data;
-+	buf->use_count++;
-+
-+	buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED;
-+}
-+
-+static void vm_close(struct vm_area_struct *vma)
-+{
-+	struct v4l2l_buffer *buf;
-+	MARK();
-+
-+	buf = vma->vm_private_data;
-+	buf->use_count--;
-+
-+	if (buf->use_count <= 0)
-+		buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED;
-+}
-+
-+static struct vm_operations_struct vm_ops = {
-+	.open = vm_open,
-+	.close = vm_close,
-+};
-+
-+static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	u8 *addr;
-+	unsigned long start;
-+	unsigned long size;
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	struct v4l2l_buffer *buffer = NULL;
-+	MARK();
-+
-+	start = (unsigned long)vma->vm_start;
-+	size = (unsigned long)(vma->vm_end - vma->vm_start);
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(file->private_data);
-+
-+	if (size > dev->buffer_size) {
-+		dprintk("userspace tries to mmap too much, fail\n");
-+		return -EINVAL;
-+	}
-+	if (opener->timeout_image_io) {
-+		/* we are going to map the timeout_image_buffer */
-+		if ((vma->vm_pgoff << PAGE_SHIFT) !=
-+		    dev->buffer_size * MAX_BUFFERS) {
-+			dprintk("invalid mmap offset for timeout_image_io mode\n");
-+			return -EINVAL;
-+		}
-+	} else if ((vma->vm_pgoff << PAGE_SHIFT) >
-+		   dev->buffer_size * (dev->buffers_number - 1)) {
-+		dprintk("userspace tries to mmap too far, fail\n");
-+		return -EINVAL;
-+	}
-+
-+	/* FIXXXXXME: allocation should not happen here! */
-+	if (NULL == dev->image)
-+		if (allocate_buffers(dev) < 0)
-+			return -EINVAL;
-+
-+	if (opener->timeout_image_io) {
-+		buffer = &dev->timeout_image_buffer;
-+		addr = dev->timeout_image;
-+	} else {
-+		int i;
-+		for (i = 0; i < dev->buffers_number; ++i) {
-+			buffer = &dev->buffers[i];
-+			if ((buffer->buffer.m.offset >> PAGE_SHIFT) ==
-+			    vma->vm_pgoff)
-+				break;
-+		}
-+
-+		if (i >= dev->buffers_number)
-+			return -EINVAL;
-+
-+		addr = dev->image + (vma->vm_pgoff << PAGE_SHIFT);
-+	}
-+
-+	while (size > 0) {
-+		struct page *page;
-+
-+		page = vmalloc_to_page(addr);
-+
-+		if (vm_insert_page(vma, start, page) < 0)
-+			return -EAGAIN;
-+
-+		start += PAGE_SIZE;
-+		addr += PAGE_SIZE;
-+		size -= PAGE_SIZE;
-+	}
-+
-+	vma->vm_ops = &vm_ops;
-+	vma->vm_private_data = buffer;
-+
-+	vm_open(vma);
-+
-+	MARK();
-+	return 0;
-+}
-+
-+static unsigned int v4l2_loopback_poll(struct file *file,
-+				       struct poll_table_struct *pts)
-+{
-+	struct v4l2_loopback_opener *opener;
-+	struct v4l2_loopback_device *dev;
-+	__poll_t req_events = poll_requested_events(pts);
-+	int ret_mask = 0;
-+	MARK();
-+
-+	opener = fh_to_opener(file->private_data);
-+	dev = v4l2loopback_getdevice(file);
-+
-+	if (req_events & POLLPRI) {
-+		if (!v4l2_event_pending(&opener->fh))
-+			poll_wait(file, &opener->fh.wait, pts);
-+		if (v4l2_event_pending(&opener->fh)) {
-+			ret_mask |= POLLPRI;
-+			if (!(req_events & DEFAULT_POLLMASK))
-+				return ret_mask;
-+		}
-+	}
-+
-+	switch (opener->type) {
-+	case WRITER:
-+		ret_mask |= POLLOUT | POLLWRNORM;
-+		break;
-+	case READER:
-+		if (!can_read(dev, opener)) {
-+			if (ret_mask)
-+				return ret_mask;
-+			poll_wait(file, &dev->read_event, pts);
-+		}
-+		if (can_read(dev, opener))
-+			ret_mask |= POLLIN | POLLRDNORM;
-+		if (v4l2_event_pending(&opener->fh))
-+			ret_mask |= POLLPRI;
-+		break;
-+	default:
-+		break;
-+	}
-+
-+	MARK();
-+	return ret_mask;
-+}
-+
-+/* do not want to limit device opens, it can be as many readers as user want,
-+ * writers are limited by means of setting writer field */
-+static int v4l2_loopback_open(struct file *file)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_opener *opener;
-+	MARK();
-+	dev = v4l2loopback_getdevice(file);
-+	if (dev->open_count.counter >= dev->max_openers)
-+		return -EBUSY;
-+	/* kfree on close */
-+	opener = kzalloc(sizeof(*opener), GFP_KERNEL);
-+	if (opener == NULL)
-+		return -ENOMEM;
-+
-+	atomic_inc(&dev->open_count);
-+
-+	opener->timeout_image_io = dev->timeout_image_io;
-+	if (opener->timeout_image_io) {
-+		int r = allocate_timeout_image(dev);
-+
-+		if (r < 0) {
-+			dprintk("timeout image allocation failed\n");
-+
-+			atomic_dec(&dev->open_count);
-+
-+			kfree(opener);
-+			return r;
-+		}
-+	}
-+
-+	v4l2_fh_init(&opener->fh, video_devdata(file));
-+	file->private_data = &opener->fh;
-+
-+	v4l2_fh_add(&opener->fh);
-+	dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL);
-+	MARK();
-+	return 0;
-+}
-+
-+static int v4l2_loopback_close(struct file *file)
-+{
-+	struct v4l2_loopback_opener *opener;
-+	struct v4l2_loopback_device *dev;
-+	int is_writer = 0, is_reader = 0;
-+	MARK();
-+
-+	opener = fh_to_opener(file->private_data);
-+	dev = v4l2loopback_getdevice(file);
-+
-+	if (WRITER == opener->type)
-+		is_writer = 1;
-+	if (READER == opener->type)
-+		is_reader = 1;
-+
-+	atomic_dec(&dev->open_count);
-+	if (dev->open_count.counter == 0) {
-+		del_timer_sync(&dev->sustain_timer);
-+		del_timer_sync(&dev->timeout_timer);
-+	}
-+	try_free_buffers(dev);
-+
-+	v4l2_fh_del(&opener->fh);
-+	v4l2_fh_exit(&opener->fh);
-+
-+	kfree(opener);
-+	if (is_writer)
-+		dev->ready_for_output = 1;
-+	if (is_reader) {
-+		dev->active_readers--;
-+		client_usage_queue_event(dev->vdev);
-+	}
-+	MARK();
-+	return 0;
-+}
-+
-+static ssize_t v4l2_loopback_read(struct file *file, char __user *buf,
-+				  size_t count, loff_t *ppos)
-+{
-+	int read_index;
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_buffer *b;
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+
-+	read_index = get_capture_buffer(file);
-+	if (read_index < 0)
-+		return read_index;
-+	if (count > dev->buffer_size)
-+		count = dev->buffer_size;
-+	b = &dev->buffers[read_index].buffer;
-+	if (count > b->bytesused)
-+		count = b->bytesused;
-+	if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset),
-+			 count)) {
-+		printk(KERN_ERR
-+		       "v4l2-loopback: failed copy_to_user() in read buf\n");
-+		return -EFAULT;
-+	}
-+	dprintkrw("leave v4l2_loopback_read()\n");
-+	return count;
-+}
-+
-+static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf,
-+				   size_t count, loff_t *ppos)
-+{
-+	struct v4l2_loopback_opener *opener;
-+	struct v4l2_loopback_device *dev;
-+	int write_index;
-+	struct v4l2_buffer *b;
-+	int err = 0;
-+
-+	MARK();
-+
-+	dev = v4l2loopback_getdevice(file);
-+	opener = fh_to_opener(file->private_data);
-+
-+	if (UNNEGOTIATED == opener->type) {
-+		spin_lock(&dev->lock);
-+
-+		if (dev->ready_for_output) {
-+			err = vidioc_streamon(file, file->private_data,
-+					      V4L2_BUF_TYPE_VIDEO_OUTPUT);
-+		}
-+
-+		spin_unlock(&dev->lock);
-+
-+		if (err < 0)
-+			return err;
-+	}
-+
-+	if (WRITER != opener->type)
-+		return -EINVAL;
-+
-+	if (!dev->ready_for_capture) {
-+		int ret = allocate_buffers(dev);
-+		if (ret < 0)
-+			return ret;
-+		dev->ready_for_capture = 1;
-+	}
-+	dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count);
-+	if (count > dev->buffer_size)
-+		count = dev->buffer_size;
-+
-+	write_index = v4l2l_mod64(dev->write_position, dev->used_buffers);
-+	b = &dev->buffers[write_index].buffer;
-+
-+	if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf,
-+			   count)) {
-+		printk(KERN_ERR
-+		       "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n",
-+		       count);
-+		return -EFAULT;
-+	}
-+	v4l2l_get_timestamp(b);
-+	b->bytesused = count;
-+	b->sequence = dev->write_position;
-+	buffer_written(dev, &dev->buffers[write_index]);
-+	wake_up_all(&dev->read_event);
-+	dprintkrw("leave v4l2_loopback_write()\n");
-+	return count;
-+}
-+
-+/* init functions */
-+/* frees buffers, if already allocated */
-+static void free_buffers(struct v4l2_loopback_device *dev)
-+{
-+	MARK();
-+	dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev);
-+	if (!dev)
-+		return;
-+	if (dev->image) {
-+		vfree(dev->image);
-+		dev->image = NULL;
-+	}
-+	if (dev->timeout_image) {
-+		vfree(dev->timeout_image);
-+		dev->timeout_image = NULL;
-+	}
-+	dev->imagesize = 0;
-+}
-+/* frees buffers, if they are no longer needed */
-+static void try_free_buffers(struct v4l2_loopback_device *dev)
-+{
-+	MARK();
-+	if (0 == dev->open_count.counter && !dev->keep_format) {
-+		free_buffers(dev);
-+		dev->ready_for_capture = 0;
-+		dev->buffer_size = 0;
-+		dev->write_position = 0;
-+	}
-+}
-+/* allocates buffers, if buffer_size is set */
-+static int allocate_buffers(struct v4l2_loopback_device *dev)
-+{
-+	int err;
-+
-+	MARK();
-+	/* vfree on close file operation in case no open handles left */
-+
-+	if (dev->buffer_size < 1 || dev->buffers_number < 1)
-+		return -EINVAL;
-+
-+	if ((__LONG_MAX__ / dev->buffer_size) < dev->buffers_number)
-+		return -ENOSPC;
-+
-+	if (dev->image) {
-+		dprintk("allocating buffers again: %ld %ld\n",
-+			dev->buffer_size * dev->buffers_number, dev->imagesize);
-+		/* FIXME: prevent double allocation more intelligently! */
-+		if (dev->buffer_size * dev->buffers_number == dev->imagesize)
-+			return 0;
-+
-+		/* check whether the total number of readers/writers is <=1 */
-+		if ((dev->ready_for_capture + dev->active_readers) <= 1)
-+			free_buffers(dev);
-+		else
-+			return -EINVAL;
-+	}
-+
-+	dev->imagesize = (unsigned long)dev->buffer_size *
-+			 (unsigned long)dev->buffers_number;
-+
-+	dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size,
-+		dev->buffers_number);
-+	err = -ENOMEM;
-+
-+	if (dev->timeout_jiffies > 0) {
-+		err = allocate_timeout_image(dev);
-+		if (err < 0)
-+			goto error;
-+	}
-+
-+	dev->image = vmalloc(dev->imagesize);
-+	if (dev->image == NULL)
-+		goto error;
-+
-+	dprintk("vmallocated %ld bytes\n", dev->imagesize);
-+	MARK();
-+
-+	init_buffers(dev);
-+	return 0;
-+
-+error:
-+	free_buffers(dev);
-+	return err;
-+}
-+
-+/* init inner buffers, they are capture mode and flags are set as
-+ * for capture mod buffers */
-+static void init_buffers(struct v4l2_loopback_device *dev)
-+{
-+	int i;
-+	int buffer_size;
-+	int bytesused;
-+	MARK();
-+
-+	buffer_size = dev->buffer_size;
-+	bytesused = dev->pix_format.sizeimage;
-+	for (i = 0; i < dev->buffers_number; ++i) {
-+		struct v4l2_buffer *b = &dev->buffers[i].buffer;
-+		b->index = i;
-+		b->bytesused = bytesused;
-+		b->length = buffer_size;
-+		b->field = V4L2_FIELD_NONE;
-+		b->flags = 0;
-+		b->m.offset = i * buffer_size;
-+		b->memory = V4L2_MEMORY_MMAP;
-+		b->sequence = 0;
-+		b->timestamp.tv_sec = 0;
-+		b->timestamp.tv_usec = 0;
-+		b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+
-+		v4l2l_get_timestamp(b);
-+	}
-+	dev->timeout_image_buffer = dev->buffers[0];
-+	dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size;
-+	MARK();
-+}
-+
-+static int allocate_timeout_image(struct v4l2_loopback_device *dev)
-+{
-+	MARK();
-+	if (dev->buffer_size <= 0) {
-+		dev->timeout_image_io = 0;
-+		return -EINVAL;
-+	}
-+
-+	if (dev->timeout_image == NULL) {
-+		dev->timeout_image = vzalloc(dev->buffer_size);
-+		if (dev->timeout_image == NULL) {
-+			dev->timeout_image_io = 0;
-+			return -ENOMEM;
-+		}
-+	}
-+	return 0;
-+}
-+
-+/* fills and register video device */
-+static void init_vdev(struct video_device *vdev, int nr)
-+{
-+	MARK();
-+
-+#ifdef V4L2LOOPBACK_WITH_STD
-+	vdev->tvnorms = V4L2_STD_ALL;
-+#endif /* V4L2LOOPBACK_WITH_STD */
-+
-+	vdev->vfl_type = VFL_TYPE_VIDEO;
-+	vdev->fops = &v4l2_loopback_fops;
-+	vdev->ioctl_ops = &v4l2_loopback_ioctl_ops;
-+	vdev->release = &video_device_release;
-+	vdev->minor = -1;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
-+	vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE |
-+			    V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE |
-+			    V4L2_CAP_STREAMING;
-+#endif
-+
-+	if (debug > 1)
-+		vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL |
-+				  V4L2_DEV_DEBUG_IOCTL_ARG;
-+
-+	vdev->vfl_dir = VFL_DIR_M2M;
-+
-+	MARK();
-+}
-+
-+/* init default capture parameters, only fps may be changed in future */
-+static void init_capture_param(struct v4l2_captureparm *capture_param)
-+{
-+	MARK();
-+	capture_param->capability = 0;
-+	capture_param->capturemode = 0;
-+	capture_param->extendedmode = 0;
-+	capture_param->readbuffers = max_buffers;
-+	capture_param->timeperframe.numerator = 1;
-+	capture_param->timeperframe.denominator = 30;
-+}
-+
-+static void check_timers(struct v4l2_loopback_device *dev)
-+{
-+	if (!dev->ready_for_capture)
-+		return;
-+
-+	if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer))
-+		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
-+	if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer))
-+		mod_timer(&dev->sustain_timer,
-+			  jiffies + dev->frame_jiffies * 3 / 2);
-+}
-+#ifdef HAVE_TIMER_SETUP
-+static void sustain_timer_clb(struct timer_list *t)
-+{
-+	struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer);
-+#else
-+static void sustain_timer_clb(unsigned long nr)
-+{
-+	struct v4l2_loopback_device *dev =
-+		idr_find(&v4l2loopback_index_idr, nr);
-+#endif
-+	spin_lock(&dev->lock);
-+	if (dev->sustain_framerate) {
-+		dev->reread_count++;
-+		dprintkrw("reread: %lld %d\n", (long long)dev->write_position,
-+			  dev->reread_count);
-+		if (dev->reread_count == 1)
-+			mod_timer(&dev->sustain_timer,
-+				  jiffies + max(1UL, dev->frame_jiffies / 2));
-+		else
-+			mod_timer(&dev->sustain_timer,
-+				  jiffies + dev->frame_jiffies);
-+		wake_up_all(&dev->read_event);
-+	}
-+	spin_unlock(&dev->lock);
-+}
-+#ifdef HAVE_TIMER_SETUP
-+static void timeout_timer_clb(struct timer_list *t)
-+{
-+	struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer);
-+#else
-+static void timeout_timer_clb(unsigned long nr)
-+{
-+	struct v4l2_loopback_device *dev =
-+		idr_find(&v4l2loopback_index_idr, nr);
-+#endif
-+	spin_lock(&dev->lock);
-+	if (dev->timeout_jiffies > 0) {
-+		dev->timeout_happened = 1;
-+		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
-+		wake_up_all(&dev->read_event);
-+	}
-+	spin_unlock(&dev->lock);
-+}
-+
-+/* init loopback main structure */
-+#define DEFAULT_FROM_CONF(confmember, default_condition, default_value)        \
-+	((conf) ?                                                              \
-+		 ((conf->confmember default_condition) ? (default_value) :     \
-+							 (conf->confmember)) : \
-+		 default_value)
-+
-+static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_ctrl_handler *hdl;
-+	struct v4l2loopback_private *vdev_priv = NULL;
-+
-+	int err = -ENOMEM;
-+
-+	u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
-+	u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
-+
-+	u32 _min_width = DEFAULT_FROM_CONF(min_width,
-+					   < V4L2LOOPBACK_SIZE_MIN_WIDTH,
-+					   V4L2LOOPBACK_SIZE_MIN_WIDTH);
-+	u32 _min_height = DEFAULT_FROM_CONF(min_height,
-+					    < V4L2LOOPBACK_SIZE_MIN_HEIGHT,
-+					    V4L2LOOPBACK_SIZE_MIN_HEIGHT);
-+	u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width);
-+	u32 _max_height =
-+		DEFAULT_FROM_CONF(max_height, < _min_height, max_height);
-+	bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ?
-+					  (conf->announce_all_caps) :
-+					  V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS;
-+	int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers);
-+	int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers);
-+
-+	int nr = -1;
-+
-+	_announce_all_caps = (!!_announce_all_caps);
-+
-+	if (conf) {
-+		const int output_nr = conf->output_nr;
-+#ifdef SPLIT_DEVICES
-+		const int capture_nr = conf->capture_nr;
-+#else
-+		const int capture_nr = output_nr;
-+#endif
-+		if (capture_nr >= 0 && output_nr == capture_nr) {
-+			nr = output_nr;
-+		} else if (capture_nr < 0 && output_nr < 0) {
-+			nr = -1;
-+		} else if (capture_nr < 0) {
-+			nr = output_nr;
-+		} else if (output_nr < 0) {
-+			nr = capture_nr;
-+		} else {
-+			printk(KERN_ERR
-+			       "split OUTPUT and CAPTURE devices not yet supported.");
-+			printk(KERN_INFO
-+			       "both devices must have the same number (%d != %d).",
-+			       output_nr, capture_nr);
-+			return -EINVAL;
-+		}
-+	}
-+
-+	if (idr_find(&v4l2loopback_index_idr, nr))
-+		return -EEXIST;
-+
-+	dprintk("creating v4l2loopback-device #%d\n", nr);
-+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-+	if (!dev)
-+		return -ENOMEM;
-+
-+	/* allocate id, if @id >= 0, we're requesting that specific id */
-+	if (nr >= 0) {
-+		err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1,
-+				GFP_KERNEL);
-+		if (err == -ENOSPC)
-+			err = -EEXIST;
-+	} else {
-+		err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL);
-+	}
-+	if (err < 0)
-+		goto out_free_dev;
-+	nr = err;
-+	err = -ENOMEM;
-+
-+	if (conf && conf->card_label[0]) {
-+		snprintf(dev->card_label, sizeof(dev->card_label), "%s",
-+			 conf->card_label);
-+	} else {
-+		snprintf(dev->card_label, sizeof(dev->card_label),
-+			 "Dummy video device (0x%04X)", nr);
-+	}
-+	snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name),
-+		 "v4l2loopback-%03d", nr);
-+
-+	err = v4l2_device_register(NULL, &dev->v4l2_dev);
-+	if (err)
-+		goto out_free_idr;
-+	MARK();
-+
-+	dev->vdev = video_device_alloc();
-+	if (dev->vdev == NULL) {
-+		err = -ENOMEM;
-+		goto out_unregister;
-+	}
-+
-+	vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL);
-+	if (vdev_priv == NULL) {
-+		err = -ENOMEM;
-+		goto out_unregister;
-+	}
-+
-+	video_set_drvdata(dev->vdev, vdev_priv);
-+	if (video_get_drvdata(dev->vdev) == NULL) {
-+		err = -ENOMEM;
-+		goto out_unregister;
-+	}
-+
-+	MARK();
-+	snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s",
-+		 dev->card_label);
-+
-+	vdev_priv->device_nr = nr;
-+
-+	init_vdev(dev->vdev, nr);
-+	dev->vdev->v4l2_dev = &dev->v4l2_dev;
-+	init_capture_param(&dev->capture_param);
-+	err = set_timeperframe(dev, &dev->capture_param.timeperframe);
-+	if (err)
-+		goto out_unregister;
-+	dev->keep_format = 0;
-+	dev->sustain_framerate = 0;
-+
-+	dev->announce_all_caps = _announce_all_caps;
-+	dev->min_width = _min_width;
-+	dev->min_height = _min_height;
-+	dev->max_width = _max_width;
-+	dev->max_height = _max_height;
-+	dev->max_openers = _max_openers;
-+	dev->buffers_number = dev->used_buffers = _max_buffers;
-+
-+	dev->write_position = 0;
-+
-+	MARK();
-+	spin_lock_init(&dev->lock);
-+	spin_lock_init(&dev->list_lock);
-+	INIT_LIST_HEAD(&dev->outbufs_list);
-+	if (list_empty(&dev->outbufs_list)) {
-+		int i;
-+
-+		for (i = 0; i < dev->used_buffers; ++i)
-+			list_add_tail(&dev->buffers[i].list_head,
-+				      &dev->outbufs_list);
-+	}
-+	memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index));
-+	atomic_set(&dev->open_count, 0);
-+	dev->ready_for_capture = 0;
-+	dev->ready_for_output = 1;
-+
-+	dev->buffer_size = 0;
-+	dev->image = NULL;
-+	dev->imagesize = 0;
-+#ifdef HAVE_TIMER_SETUP
-+	timer_setup(&dev->sustain_timer, sustain_timer_clb, 0);
-+	timer_setup(&dev->timeout_timer, timeout_timer_clb, 0);
-+#else
-+	setup_timer(&dev->sustain_timer, sustain_timer_clb, nr);
-+	setup_timer(&dev->timeout_timer, timeout_timer_clb, nr);
-+#endif
-+	dev->reread_count = 0;
-+	dev->timeout_jiffies = 0;
-+	dev->timeout_image = NULL;
-+	dev->timeout_happened = 0;
-+
-+	hdl = &dev->ctrl_handler;
-+	err = v4l2_ctrl_handler_init(hdl, 4);
-+	if (err)
-+		goto out_unregister;
-+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL);
-+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL);
-+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL);
-+	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL);
-+	if (hdl->error) {
-+		err = hdl->error;
-+		goto out_free_handler;
-+	}
-+	dev->v4l2_dev.ctrl_handler = hdl;
-+
-+	err = v4l2_ctrl_handler_setup(hdl);
-+	if (err)
-+		goto out_free_handler;
-+
-+	/* FIXME set buffers to 0 */
-+
-+	/* Set initial format */
-+	if (_width < _min_width)
-+		_width = _min_width;
-+	if (_width > _max_width)
-+		_width = _max_width;
-+	if (_height < _min_height)
-+		_height = _min_height;
-+	if (_height > _max_height)
-+		_height = _max_height;
-+
-+	dev->pix_format.width = _width;
-+	dev->pix_format.height = _height;
-+	dev->pix_format.pixelformat = formats[0].fourcc;
-+	dev->pix_format.colorspace =
-+		V4L2_COLORSPACE_DEFAULT; /* do we need to set this ? */
-+	dev->pix_format.field = V4L2_FIELD_NONE;
-+
-+	dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage);
-+	dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size,
-+		dev->pix_format.sizeimage);
-+
-+	if (dev->buffer_size && ((err = allocate_buffers(dev)) < 0))
-+		goto out_free_handler;
-+
-+	init_waitqueue_head(&dev->read_event);
-+
-+	/* register the device -> it creates /dev/video* */
-+	if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) {
-+		printk(KERN_ERR
-+		       "v4l2loopback: failed video_register_device()\n");
-+		err = -EFAULT;
-+		goto out_free_device;
-+	}
-+	v4l2loopback_create_sysfs(dev->vdev);
-+
-+	MARK();
-+	if (ret_nr)
-+		*ret_nr = dev->vdev->num;
-+	return 0;
-+
-+out_free_device:
-+	video_device_release(dev->vdev);
-+out_free_handler:
-+	v4l2_ctrl_handler_free(&dev->ctrl_handler);
-+out_unregister:
-+	video_set_drvdata(dev->vdev, NULL);
-+	if (vdev_priv != NULL)
-+		kfree(vdev_priv);
-+	v4l2_device_unregister(&dev->v4l2_dev);
-+out_free_idr:
-+	idr_remove(&v4l2loopback_index_idr, nr);
-+out_free_dev:
-+	kfree(dev);
-+	return err;
-+}
-+
-+static void v4l2_loopback_remove(struct v4l2_loopback_device *dev)
-+{
-+	free_buffers(dev);
-+	v4l2loopback_remove_sysfs(dev->vdev);
-+	kfree(video_get_drvdata(dev->vdev));
-+	video_unregister_device(dev->vdev);
-+	v4l2_device_unregister(&dev->v4l2_dev);
-+	v4l2_ctrl_handler_free(&dev->ctrl_handler);
-+	kfree(dev);
-+}
-+
-+static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd,
-+				       unsigned long parm)
-+{
-+	struct v4l2_loopback_device *dev;
-+	struct v4l2_loopback_config conf;
-+	struct v4l2_loopback_config *confptr = &conf;
-+	int device_nr, capture_nr, output_nr;
-+	int ret;
-+
-+	ret = mutex_lock_killable(&v4l2loopback_ctl_mutex);
-+	if (ret)
-+		return ret;
-+
-+	ret = -EINVAL;
-+	switch (cmd) {
-+	default:
-+		ret = -ENOSYS;
-+		break;
-+		/* add a v4l2loopback device (pair), based on the user-provided specs */
-+	case V4L2LOOPBACK_CTL_ADD:
-+		if (parm) {
-+			if ((ret = copy_from_user(&conf, (void *)parm,
-+						  sizeof(conf))) < 0)
-+				break;
-+		} else
-+			confptr = NULL;
-+		ret = v4l2_loopback_add(confptr, &device_nr);
-+		if (ret >= 0)
-+			ret = device_nr;
-+		break;
-+		/* remove a v4l2loopback device (both capture and output) */
-+	case V4L2LOOPBACK_CTL_REMOVE:
-+		ret = v4l2loopback_lookup((int)parm, &dev);
-+		if (ret >= 0 && dev) {
-+			int nr = ret;
-+			ret = -EBUSY;
-+			if (dev->open_count.counter > 0)
-+				break;
-+			idr_remove(&v4l2loopback_index_idr, nr);
-+			v4l2_loopback_remove(dev);
-+			ret = 0;
-+		};
-+		break;
-+		/* get information for a loopback device.
-+                 * this is mostly about limits (which cannot be queried directly with  VIDIOC_G_FMT and friends
-+                 */
-+	case V4L2LOOPBACK_CTL_QUERY:
-+		if (!parm)
-+			break;
-+		if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) <
-+		    0)
-+			break;
-+		capture_nr = output_nr = conf.output_nr;
-+#ifdef SPLIT_DEVICES
-+		capture_nr = conf.capture_nr;
-+#endif
-+		device_nr = (output_nr < 0) ? capture_nr : output_nr;
-+		MARK();
-+		/* get the device from either capture_nr or output_nr (whatever is valid) */
-+		if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0)
-+			break;
-+		MARK();
-+		/* if we got the device from output_nr and there is a valid capture_nr,
-+                 * make sure that both refer to the same device (or bail out)
-+                 */
-+		if ((device_nr != capture_nr) && (capture_nr >= 0) &&
-+		    ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0))
-+			break;
-+		MARK();
-+		/* if otoh, we got the device from capture_nr and there is a valid output_nr,
-+                 * make sure that both refer to the same device (or bail out)
-+                 */
-+		if ((device_nr != output_nr) && (output_nr >= 0) &&
-+		    ((ret = v4l2loopback_lookup(output_nr, 0)) < 0))
-+			break;
-+		MARK();
-+
-+		/* v4l2_loopback_config identified a single device, so fetch the data */
-+		snprintf(conf.card_label, sizeof(conf.card_label), "%s",
-+			 dev->card_label);
-+		MARK();
-+		conf.output_nr = dev->vdev->num;
-+#ifdef SPLIT_DEVICES
-+		conf.capture_nr = dev->vdev->num;
-+#endif
-+		conf.min_width = dev->min_width;
-+		conf.min_height = dev->min_height;
-+		conf.max_width = dev->max_width;
-+		conf.max_height = dev->max_height;
-+		conf.announce_all_caps = dev->announce_all_caps;
-+		conf.max_buffers = dev->buffers_number;
-+		conf.max_openers = dev->max_openers;
-+		conf.debug = debug;
-+		MARK();
-+		if (copy_to_user((void *)parm, &conf, sizeof(conf))) {
-+			ret = -EFAULT;
-+			break;
-+		}
-+		MARK();
-+		ret = 0;
-+		;
-+		break;
-+	}
-+
-+	MARK();
-+	mutex_unlock(&v4l2loopback_ctl_mutex);
-+	MARK();
-+	return ret;
-+}
-+
-+/* LINUX KERNEL */
-+
-+static const struct file_operations v4l2loopback_ctl_fops = {
-+	// clang-format off
-+	.owner		= THIS_MODULE,
-+	.open		= nonseekable_open,
-+	.unlocked_ioctl	= v4l2loopback_control_ioctl,
-+	.compat_ioctl	= v4l2loopback_control_ioctl,
-+	.llseek		= noop_llseek,
-+	// clang-format on
-+};
-+
-+static struct miscdevice v4l2loopback_misc = {
-+	// clang-format off
-+	.minor		= MISC_DYNAMIC_MINOR,
-+	.name		= "v4l2loopback",
-+	.fops		= &v4l2loopback_ctl_fops,
-+	// clang-format on
-+};
-+
-+static const struct v4l2_file_operations v4l2_loopback_fops = {
-+	// clang-format off
-+	.owner		= THIS_MODULE,
-+	.open		= v4l2_loopback_open,
-+	.release	= v4l2_loopback_close,
-+	.read		= v4l2_loopback_read,
-+	.write		= v4l2_loopback_write,
-+	.poll		= v4l2_loopback_poll,
-+	.mmap		= v4l2_loopback_mmap,
-+	.unlocked_ioctl	= video_ioctl2,
-+	// clang-format on
-+};
-+
-+static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = {
-+	// clang-format off
-+	.vidioc_querycap		= &vidioc_querycap,
-+	.vidioc_enum_framesizes		= &vidioc_enum_framesizes,
-+	.vidioc_enum_frameintervals	= &vidioc_enum_frameintervals,
-+
-+	.vidioc_enum_output		= &vidioc_enum_output,
-+	.vidioc_g_output		= &vidioc_g_output,
-+	.vidioc_s_output		= &vidioc_s_output,
-+
-+	.vidioc_enum_input		= &vidioc_enum_input,
-+	.vidioc_g_input			= &vidioc_g_input,
-+	.vidioc_s_input			= &vidioc_s_input,
-+
-+	.vidioc_enum_fmt_vid_cap	= &vidioc_enum_fmt_cap,
-+	.vidioc_g_fmt_vid_cap		= &vidioc_g_fmt_cap,
-+	.vidioc_s_fmt_vid_cap		= &vidioc_s_fmt_cap,
-+	.vidioc_try_fmt_vid_cap		= &vidioc_try_fmt_cap,
-+
-+	.vidioc_enum_fmt_vid_out	= &vidioc_enum_fmt_out,
-+	.vidioc_s_fmt_vid_out		= &vidioc_s_fmt_out,
-+	.vidioc_g_fmt_vid_out		= &vidioc_g_fmt_out,
-+	.vidioc_try_fmt_vid_out		= &vidioc_try_fmt_out,
-+
-+#ifdef V4L2L_OVERLAY
-+	.vidioc_s_fmt_vid_overlay	= &vidioc_s_fmt_overlay,
-+	.vidioc_g_fmt_vid_overlay	= &vidioc_g_fmt_overlay,
-+#endif
-+
-+#ifdef V4L2LOOPBACK_WITH_STD
-+	.vidioc_s_std			= &vidioc_s_std,
-+	.vidioc_g_std			= &vidioc_g_std,
-+	.vidioc_querystd		= &vidioc_querystd,
-+#endif /* V4L2LOOPBACK_WITH_STD */
-+
-+	.vidioc_g_parm			= &vidioc_g_parm,
-+	.vidioc_s_parm			= &vidioc_s_parm,
-+
-+	.vidioc_reqbufs			= &vidioc_reqbufs,
-+	.vidioc_querybuf		= &vidioc_querybuf,
-+	.vidioc_qbuf			= &vidioc_qbuf,
-+	.vidioc_dqbuf			= &vidioc_dqbuf,
-+
-+	.vidioc_streamon		= &vidioc_streamon,
-+	.vidioc_streamoff		= &vidioc_streamoff,
-+
-+#ifdef CONFIG_VIDEO_V4L1_COMPAT
-+	.vidiocgmbuf			= &vidiocgmbuf,
-+#endif
-+
-+	.vidioc_subscribe_event		= &vidioc_subscribe_event,
-+	.vidioc_unsubscribe_event	= &v4l2_event_unsubscribe,
-+	// clang-format on
-+};
-+
-+static int free_device_cb(int id, void *ptr, void *data)
-+{
-+	struct v4l2_loopback_device *dev = ptr;
-+	v4l2_loopback_remove(dev);
-+	return 0;
-+}
-+static void free_devices(void)
-+{
-+	idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL);
-+	idr_destroy(&v4l2loopback_index_idr);
-+}
-+
-+static int __init v4l2loopback_init_module(void)
-+{
-+	const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH;
-+	const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT;
-+	int err;
-+	int i;
-+	MARK();
-+
-+	err = misc_register(&v4l2loopback_misc);
-+	if (err < 0)
-+		return err;
-+
-+	if (devices < 0) {
-+		devices = 1;
-+
-+		/* try guessing the devices from the "video_nr" parameter */
-+		for (i = MAX_DEVICES - 1; i >= 0; i--) {
-+			if (video_nr[i] >= 0) {
-+				devices = i + 1;
-+				break;
-+			}
-+		}
-+	}
-+
-+	if (devices > MAX_DEVICES) {
-+		devices = MAX_DEVICES;
-+		printk(KERN_INFO
-+		       "v4l2loopback: number of initial devices is limited to: %d\n",
-+		       MAX_DEVICES);
-+	}
-+
-+	if (max_buffers > MAX_BUFFERS) {
-+		max_buffers = MAX_BUFFERS;
-+		printk(KERN_INFO
-+		       "v4l2loopback: number of buffers is limited to: %d\n",
-+		       MAX_BUFFERS);
-+	}
-+
-+	if (max_openers < 0) {
-+		printk(KERN_INFO
-+		       "v4l2loopback: allowing %d openers rather than %d\n",
-+		       2, max_openers);
-+		max_openers = 2;
-+	}
-+
-+	if (max_width < min_width) {
-+		max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
-+		printk(KERN_INFO "v4l2loopback: using max_width %d\n",
-+		       max_width);
-+	}
-+	if (max_height < min_height) {
-+		max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
-+		printk(KERN_INFO "v4l2loopback: using max_height %d\n",
-+		       max_height);
-+	}
-+
-+	for (i = 0; i < devices; i++) {
-+		struct v4l2_loopback_config cfg = {
-+			// clang-format off
-+			.output_nr		= video_nr[i],
-+#ifdef SPLIT_DEVICES
-+			.capture_nr		= video_nr[i],
-+#endif
-+			.min_width		= min_width,
-+			.min_height		= min_height,
-+			.max_width		= max_width,
-+			.max_height		= max_height,
-+			.announce_all_caps	= (!exclusive_caps[i]),
-+			.max_buffers		= max_buffers,
-+			.max_openers		= max_openers,
-+			.debug			= debug,
-+			// clang-format on
-+		};
-+		cfg.card_label[0] = 0;
-+		if (card_label[i])
-+			snprintf(cfg.card_label, sizeof(cfg.card_label), "%s",
-+				 card_label[i]);
-+		err = v4l2_loopback_add(&cfg, 0);
-+		if (err) {
-+			free_devices();
-+			goto error;
-+		}
-+	}
-+
-+	dprintk("module installed\n");
-+
-+	printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n",
-+	       // clang-format off
-+	       (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff,
-+	       (V4L2LOOPBACK_VERSION_CODE >>  8) & 0xff,
-+	       (V4L2LOOPBACK_VERSION_CODE      ) & 0xff,
-+#ifdef SNAPSHOT_VERSION
-+	       " (" __stringify(SNAPSHOT_VERSION) ")"
-+#else
-+	       ""
-+#endif
-+	       );
-+	// clang-format on
-+
-+	return 0;
-+error:
-+	misc_deregister(&v4l2loopback_misc);
-+	return err;
-+}
-+
-+static void v4l2loopback_cleanup_module(void)
-+{
-+	MARK();
-+	/* unregister the device -> it deletes /dev/video* */
-+	free_devices();
-+	/* and get rid of /dev/v4l2loopback */
-+	misc_deregister(&v4l2loopback_misc);
-+	dprintk("module removed\n");
-+}
-+
-+MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
-+
-+module_init(v4l2loopback_init_module);
-+module_exit(v4l2loopback_cleanup_module);
-diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h
-new file mode 100644
-index 000000000000..1bc7e6b747a4
---- /dev/null
-+++ b/drivers/media/v4l2-core/v4l2loopback.h
-@@ -0,0 +1,98 @@
-+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-+/*
-+ * v4l2loopback.h
-+ *
-+ * Written by IOhannes m zmölnig, 7/1/20.
-+ *
-+ * Copyright 2020 by IOhannes m zmölnig.  Redistribution of this file is
-+ * permitted under the GNU General Public License.
-+ */
-+#ifndef _V4L2LOOPBACK_H
-+#define _V4L2LOOPBACK_H
-+
-+#define V4L2LOOPBACK_VERSION_MAJOR 0
-+#define V4L2LOOPBACK_VERSION_MINOR 13
-+#define V4L2LOOPBACK_VERSION_BUGFIX 1
-+
-+/* /dev/v4l2loopback interface */
-+
-+struct v4l2_loopback_config {
-+	/**
-+         * the device-number (/dev/video<nr>)
-+         * V4L2LOOPBACK_CTL_ADD:
-+         * setting this to a value<0, will allocate an available one
-+         * if nr>=0 and the device already exists, the ioctl will EEXIST
-+         * if output_nr and capture_nr are the same, only a single device will be created
-+	 * NOTE: currently split-devices (where output_nr and capture_nr differ)
-+	 *   are not implemented yet.
-+	 *   until then, requesting different device-IDs will result in EINVAL.
-+         *
-+         * V4L2LOOPBACK_CTL_QUERY:
-+         * either both output_nr and capture_nr must refer to the same loopback,
-+         * or one (and only one) of them must be -1
-+         *
-+         */
-+	int output_nr;
-+	int unused; /*capture_nr;*/
-+
-+	/**
-+         * a nice name for your device
-+         * if (*card_label)==0, an automatic name is assigned
-+         */
-+	char card_label[32];
-+
-+	/**
-+         * allowed frame size
-+         * if too low, default values are used
-+         */
-+	unsigned int min_width;
-+	unsigned int max_width;
-+	unsigned int min_height;
-+	unsigned int max_height;
-+
-+	/**
-+         * number of buffers to allocate for the queue
-+         * if set to <=0, default values are used
-+         */
-+	int max_buffers;
-+
-+	/**
-+         * how many consumers are allowed to open this device concurrently
-+         * if set to <=0, default values are used
-+         */
-+	int max_openers;
-+
-+	/**
-+         * set the debugging level for this device
-+         */
-+	int debug;
-+
-+	/**
-+         * whether to announce OUTPUT/CAPTURE capabilities exclusively
-+         * for this device or not
-+         * (!exclusive_caps)
-+	 * NOTE: this is going to be removed once separate output/capture
-+	 *       devices are implemented
-+         */
-+	int announce_all_caps;
-+};
-+
-+/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the
-+ * to-be-created device set.
-+ * if the ptr is NULL, a new device is created with default values at the driver's discretion.
-+ *
-+ * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY,
-+ * to get more information on the device)
-+ */
-+#define V4L2LOOPBACK_CTL_ADD 0x4C80
-+
-+/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set
-+ * (the two values must either refer to video-devices associated with the same loopback device
-+ *  or exactly one of them must be <0
-+ */
-+#define V4L2LOOPBACK_CTL_QUERY 0x4C82
-+
-+/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */
-+#define V4L2LOOPBACK_CTL_REMOVE 0x4C81
-+
-+#endif /* _V4L2LOOPBACK_H */
-diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h
-new file mode 100644
-index 000000000000..d855a3796554
---- /dev/null
-+++ b/drivers/media/v4l2-core/v4l2loopback_formats.h
-@@ -0,0 +1,445 @@
-+static const struct v4l2l_format formats[] = {
-+#ifndef V4L2_PIX_FMT_VP9
-+#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0')
-+#endif
-+#ifndef V4L2_PIX_FMT_HEVC
-+#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C')
-+#endif
-+
-+	/* here come the packed formats */
-+	{
-+		.name = "32 bpp RGB, le",
-+		.fourcc = V4L2_PIX_FMT_BGR32,
-+		.depth = 32,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "32 bpp RGB, be",
-+		.fourcc = V4L2_PIX_FMT_RGB32,
-+		.depth = 32,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "24 bpp RGB, le",
-+		.fourcc = V4L2_PIX_FMT_BGR24,
-+		.depth = 24,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "24 bpp RGB, be",
-+		.fourcc = V4L2_PIX_FMT_RGB24,
-+		.depth = 24,
-+		.flags = 0,
-+	},
-+#ifdef V4L2_PIX_FMT_ABGR32
-+	{
-+		.name = "32 bpp RGBA, le",
-+		.fourcc = V4L2_PIX_FMT_ABGR32,
-+		.depth = 32,
-+		.flags = 0,
-+	},
-+#endif
-+#ifdef V4L2_PIX_FMT_RGBA32
-+	{
-+		.name = "32 bpp RGBA",
-+		.fourcc = V4L2_PIX_FMT_RGBA32,
-+		.depth = 32,
-+		.flags = 0,
-+	},
-+#endif
-+#ifdef V4L2_PIX_FMT_RGB332
-+	{
-+		.name = "8 bpp RGB-3-3-2",
-+		.fourcc = V4L2_PIX_FMT_RGB332,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB332 */
-+#ifdef V4L2_PIX_FMT_RGB444
-+	{
-+		.name = "16 bpp RGB (xxxxrrrr ggggbbbb)",
-+		.fourcc = V4L2_PIX_FMT_RGB444,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB444 */
-+#ifdef V4L2_PIX_FMT_RGB555
-+	{
-+		.name = "16 bpp RGB-5-5-5",
-+		.fourcc = V4L2_PIX_FMT_RGB555,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB555 */
-+#ifdef V4L2_PIX_FMT_RGB565
-+	{
-+		.name = "16 bpp RGB-5-6-5",
-+		.fourcc = V4L2_PIX_FMT_RGB565,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB565 */
-+#ifdef V4L2_PIX_FMT_RGB555X
-+	{
-+		.name = "16 bpp RGB-5-5-5 BE",
-+		.fourcc = V4L2_PIX_FMT_RGB555X,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB555X */
-+#ifdef V4L2_PIX_FMT_RGB565X
-+	{
-+		.name = "16 bpp RGB-5-6-5 BE",
-+		.fourcc = V4L2_PIX_FMT_RGB565X,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_RGB565X */
-+#ifdef V4L2_PIX_FMT_BGR666
-+	{
-+		.name = "18 bpp BGR-6-6-6",
-+		.fourcc = V4L2_PIX_FMT_BGR666,
-+		.depth = 18,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_BGR666 */
-+	{
-+		.name = "4:2:2, packed, YUYV",
-+		.fourcc = V4L2_PIX_FMT_YUYV,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "4:2:2, packed, UYVY",
-+		.fourcc = V4L2_PIX_FMT_UYVY,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#ifdef V4L2_PIX_FMT_YVYU
-+	{
-+		.name = "4:2:2, packed YVYU",
-+		.fourcc = V4L2_PIX_FMT_YVYU,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif
-+#ifdef V4L2_PIX_FMT_VYUY
-+	{
-+		.name = "4:2:2, packed VYUY",
-+		.fourcc = V4L2_PIX_FMT_VYUY,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif
-+	{
-+		.name = "4:2:2, packed YYUV",
-+		.fourcc = V4L2_PIX_FMT_YYUV,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "YUV-8-8-8-8",
-+		.fourcc = V4L2_PIX_FMT_YUV32,
-+		.depth = 32,
-+		.flags = 0,
-+	},
-+	{
-+		.name = "8 bpp, Greyscale",
-+		.fourcc = V4L2_PIX_FMT_GREY,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#ifdef V4L2_PIX_FMT_Y4
-+	{
-+		.name = "4 bpp Greyscale",
-+		.fourcc = V4L2_PIX_FMT_Y4,
-+		.depth = 4,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_Y4 */
-+#ifdef V4L2_PIX_FMT_Y6
-+	{
-+		.name = "6 bpp Greyscale",
-+		.fourcc = V4L2_PIX_FMT_Y6,
-+		.depth = 6,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_Y6 */
-+#ifdef V4L2_PIX_FMT_Y10
-+	{
-+		.name = "10 bpp Greyscale",
-+		.fourcc = V4L2_PIX_FMT_Y10,
-+		.depth = 10,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_Y10 */
-+#ifdef V4L2_PIX_FMT_Y12
-+	{
-+		.name = "12 bpp Greyscale",
-+		.fourcc = V4L2_PIX_FMT_Y12,
-+		.depth = 12,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_Y12 */
-+	{
-+		.name = "16 bpp, Greyscale",
-+		.fourcc = V4L2_PIX_FMT_Y16,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#ifdef V4L2_PIX_FMT_YUV444
-+	{
-+		.name = "16 bpp xxxxyyyy uuuuvvvv",
-+		.fourcc = V4L2_PIX_FMT_YUV444,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_YUV444 */
-+#ifdef V4L2_PIX_FMT_YUV555
-+	{
-+		.name = "16 bpp YUV-5-5-5",
-+		.fourcc = V4L2_PIX_FMT_YUV555,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_YUV555 */
-+#ifdef V4L2_PIX_FMT_YUV565
-+	{
-+		.name = "16 bpp YUV-5-6-5",
-+		.fourcc = V4L2_PIX_FMT_YUV565,
-+		.depth = 16,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_YUV565 */
-+
-+/* bayer formats */
-+#ifdef V4L2_PIX_FMT_SRGGB8
-+	{
-+		.name = "Bayer RGGB 8bit",
-+		.fourcc = V4L2_PIX_FMT_SRGGB8,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_SRGGB8 */
-+#ifdef V4L2_PIX_FMT_SGRBG8
-+	{
-+		.name = "Bayer GRBG 8bit",
-+		.fourcc = V4L2_PIX_FMT_SGRBG8,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_SGRBG8 */
-+#ifdef V4L2_PIX_FMT_SGBRG8
-+	{
-+		.name = "Bayer GBRG 8bit",
-+		.fourcc = V4L2_PIX_FMT_SGBRG8,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_SGBRG8 */
-+#ifdef V4L2_PIX_FMT_SBGGR8
-+	{
-+		.name = "Bayer BA81 8bit",
-+		.fourcc = V4L2_PIX_FMT_SBGGR8,
-+		.depth = 8,
-+		.flags = 0,
-+	},
-+#endif /* V4L2_PIX_FMT_SBGGR8 */
-+
-+	/* here come the planar formats */
-+	{
-+		.name = "4:1:0, planar, Y-Cr-Cb",
-+		.fourcc = V4L2_PIX_FMT_YVU410,
-+		.depth = 9,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+	{
-+		.name = "4:2:0, planar, Y-Cr-Cb",
-+		.fourcc = V4L2_PIX_FMT_YVU420,
-+		.depth = 12,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+	{
-+		.name = "4:1:0, planar, Y-Cb-Cr",
-+		.fourcc = V4L2_PIX_FMT_YUV410,
-+		.depth = 9,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+	{
-+		.name = "4:2:0, planar, Y-Cb-Cr",
-+		.fourcc = V4L2_PIX_FMT_YUV420,
-+		.depth = 12,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+#ifdef V4L2_PIX_FMT_YUV422P
-+	{
-+		.name = "16 bpp YVU422 planar",
-+		.fourcc = V4L2_PIX_FMT_YUV422P,
-+		.depth = 16,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+#endif /* V4L2_PIX_FMT_YUV422P */
-+#ifdef V4L2_PIX_FMT_YUV411P
-+	{
-+		.name = "16 bpp YVU411 planar",
-+		.fourcc = V4L2_PIX_FMT_YUV411P,
-+		.depth = 16,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+#endif /* V4L2_PIX_FMT_YUV411P */
-+#ifdef V4L2_PIX_FMT_Y41P
-+	{
-+		.name = "12 bpp YUV 4:1:1",
-+		.fourcc = V4L2_PIX_FMT_Y41P,
-+		.depth = 12,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+#endif /* V4L2_PIX_FMT_Y41P */
-+#ifdef V4L2_PIX_FMT_NV12
-+	{
-+		.name = "12 bpp Y/CbCr 4:2:0 ",
-+		.fourcc = V4L2_PIX_FMT_NV12,
-+		.depth = 12,
-+		.flags = FORMAT_FLAGS_PLANAR,
-+	},
-+#endif /* V4L2_PIX_FMT_NV12 */
-+
-+/* here come the compressed formats */
-+
-+#ifdef V4L2_PIX_FMT_MJPEG
-+	{
-+		.name = "Motion-JPEG",
-+		.fourcc = V4L2_PIX_FMT_MJPEG,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_MJPEG */
-+#ifdef V4L2_PIX_FMT_JPEG
-+	{
-+		.name = "JFIF JPEG",
-+		.fourcc = V4L2_PIX_FMT_JPEG,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_JPEG */
-+#ifdef V4L2_PIX_FMT_DV
-+	{
-+		.name = "DV1394",
-+		.fourcc = V4L2_PIX_FMT_DV,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_DV */
-+#ifdef V4L2_PIX_FMT_MPEG
-+	{
-+		.name = "MPEG-1/2/4 Multiplexed",
-+		.fourcc = V4L2_PIX_FMT_MPEG,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_MPEG */
-+#ifdef V4L2_PIX_FMT_H264
-+	{
-+		.name = "H264 with start codes",
-+		.fourcc = V4L2_PIX_FMT_H264,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_H264 */
-+#ifdef V4L2_PIX_FMT_H264_NO_SC
-+	{
-+		.name = "H264 without start codes",
-+		.fourcc = V4L2_PIX_FMT_H264_NO_SC,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_H264_NO_SC */
-+#ifdef V4L2_PIX_FMT_H264_MVC
-+	{
-+		.name = "H264 MVC",
-+		.fourcc = V4L2_PIX_FMT_H264_MVC,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_H264_MVC */
-+#ifdef V4L2_PIX_FMT_H263
-+	{
-+		.name = "H263",
-+		.fourcc = V4L2_PIX_FMT_H263,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_H263 */
-+#ifdef V4L2_PIX_FMT_MPEG1
-+	{
-+		.name = "MPEG-1 ES",
-+		.fourcc = V4L2_PIX_FMT_MPEG1,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_MPEG1 */
-+#ifdef V4L2_PIX_FMT_MPEG2
-+	{
-+		.name = "MPEG-2 ES",
-+		.fourcc = V4L2_PIX_FMT_MPEG2,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_MPEG2 */
-+#ifdef V4L2_PIX_FMT_MPEG4
-+	{
-+		.name = "MPEG-4 part 2 ES",
-+		.fourcc = V4L2_PIX_FMT_MPEG4,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_MPEG4 */
-+#ifdef V4L2_PIX_FMT_XVID
-+	{
-+		.name = "Xvid",
-+		.fourcc = V4L2_PIX_FMT_XVID,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_XVID */
-+#ifdef V4L2_PIX_FMT_VC1_ANNEX_G
-+	{
-+		.name = "SMPTE 421M Annex G compliant stream",
-+		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_G,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */
-+#ifdef V4L2_PIX_FMT_VC1_ANNEX_L
-+	{
-+		.name = "SMPTE 421M Annex L compliant stream",
-+		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_L,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */
-+#ifdef V4L2_PIX_FMT_VP8
-+	{
-+		.name = "VP8",
-+		.fourcc = V4L2_PIX_FMT_VP8,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_VP8 */
-+#ifdef V4L2_PIX_FMT_VP9
-+	{
-+		.name = "VP9",
-+		.fourcc = V4L2_PIX_FMT_VP9,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_VP9 */
-+#ifdef V4L2_PIX_FMT_HEVC
-+	{
-+		.name = "HEVC",
-+		.fourcc = V4L2_PIX_FMT_HEVC,
-+		.depth = 32,
-+		.flags = FORMAT_FLAGS_COMPRESSED,
-+	},
-+#endif /* V4L2_PIX_FMT_HEVC */
-+};
-diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
-index f2b19e6174af..4fef4b174321 100644
---- a/drivers/pci/controller/Makefile
-+++ b/drivers/pci/controller/Makefile
-@@ -1,4 +1,10 @@
- # SPDX-License-Identifier: GPL-2.0
-+ifdef CONFIG_X86_64
-+ifdef CONFIG_SATA_AHCI
-+obj-y += intel-nvme-remap.o
-+endif
-+endif
-+
- obj-$(CONFIG_PCIE_CADENCE) += cadence/
- obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
- obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
-diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c
-new file mode 100644
-index 000000000000..e105e6f5cc91
---- /dev/null
-+++ b/drivers/pci/controller/intel-nvme-remap.c
-@@ -0,0 +1,462 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Intel remapped NVMe device support.
-+ *
-+ * Copyright (c) 2019 Endless Mobile, Inc.
-+ * Author: Daniel Drake <drake@endlessm.com>
-+ *
-+ * Some products ship by default with the SATA controller in "RAID" or
-+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this
-+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe
-+ * devices disappear from the PCI bus, and instead their I/O memory becomes
-+ * available within the AHCI device BARs.
-+ *
-+ * This scheme is understood to be a way of avoiding usage of the standard
-+ * Windows NVMe driver under that OS, instead mandating usage of Intel's
-+ * driver instead, which has better power management, and presumably offers
-+ * some RAID/disk-caching solutions too.
-+ *
-+ * Here in this driver, we support the remapped NVMe mode by claiming the
-+ * AHCI device and creating a fake PCIe root port. On the new bus, the
-+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI
-+ * devices corresponding to the remapped NVMe devices are created. The usual
-+ * ahci and nvme drivers are then expected to bind to these devices and
-+ * operate as normal.
-+ *
-+ * The PCI configuration space for the NVMe devices is completely
-+ * unavailable, so we fake a minimal one and hope for the best.
-+ *
-+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity,
-+ * we only support the legacy interrupt here, although MSI support
-+ * could potentially be added later.
-+ */
-+
-+#define MODULE_NAME "intel-nvme-remap"
-+
-+#include <linux/ahci-remap.h>
-+#include <linux/irq.h>
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/pci.h>
-+
-+#define AHCI_PCI_BAR_STANDARD 5
-+
-+struct nvme_remap_dev {
-+	struct pci_dev		*dev;		/* AHCI device */
-+	struct pci_bus		*bus;		/* our fake PCI bus */
-+	struct pci_sysdata	sysdata;
-+	int			irq_base;	/* our fake interrupts */
-+
-+	/*
-+	 * When we detect an all-ones write to a BAR register, this flag
-+	 * is set, so that we return the BAR size on the next read (a
-+	 * standard PCI behaviour).
-+	 * This includes the assumption that an all-ones BAR write is
-+	 * immediately followed by a read of the same register.
-+	 */
-+	bool			bar_sizing;
-+
-+	/*
-+	 * Resources copied from the AHCI device, to be regarded as
-+	 * resources on our fake bus.
-+	 */
-+	struct resource		ahci_resources[PCI_NUM_RESOURCES];
-+
-+	/* Resources corresponding to the NVMe devices. */
-+	struct resource		remapped_dev_mem[AHCI_MAX_REMAP];
-+
-+	/* Number of remapped NVMe devices found. */
-+	int			num_remapped_devices;
-+};
-+
-+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus)
-+{
-+	return container_of(bus->sysdata, struct nvme_remap_dev, sysdata);
-+}
-+
-+
-+/******** PCI configuration space **********/
-+
-+/*
-+ * Helper macros for tweaking returned contents of PCI configuration space.
-+ *
-+ * value contains len bytes of data read from reg.
-+ * If fixup_reg is included in that range, fix up the contents of that
-+ * register to fixed_value.
-+ */
-+#define NR_FIX8(fixup_reg, fixed_value) do { \
-+		if (reg <= fixup_reg && fixup_reg < reg + len) \
-+			((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \
-+	} while (0)
-+
-+#define NR_FIX16(fixup_reg, fixed_value) do { \
-+		NR_FIX8(fixup_reg, fixed_value); \
-+		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
-+	} while (0)
-+
-+#define NR_FIX24(fixup_reg, fixed_value) do { \
-+		NR_FIX8(fixup_reg, fixed_value); \
-+		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
-+		NR_FIX8(fixup_reg + 2, fixed_value >> 16); \
-+	} while (0)
-+
-+#define NR_FIX32(fixup_reg, fixed_value) do { \
-+		NR_FIX16(fixup_reg, (u16) fixed_value); \
-+		NR_FIX16(fixup_reg + 2, fixed_value >> 16); \
-+	} while (0)
-+
-+/*
-+ * Read PCI config space of the slot 0 (AHCI) device.
-+ * We pass through the read request to the underlying device, but
-+ * tweak the results in some cases.
-+ */
-+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg,
-+				     int len, u32 *value)
-+{
-+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
-+	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
-+	int ret;
-+
-+	ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn,
-+				      reg, len, value);
-+	if (ret)
-+		return ret;
-+
-+	/*
-+	 * Adjust the device class, to prevent this driver from attempting to
-+	 * additionally probe the device we're simulating here.
-+	 */
-+	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI);
-+
-+	/*
-+	 * Unset interrupt pin, otherwise ACPI tries to find routing
-+	 * info for our virtual IRQ, fails, and complains.
-+	 */
-+	NR_FIX8(PCI_INTERRUPT_PIN, 0);
-+
-+	/*
-+	 * Truncate the AHCI BAR to not include the region that covers the
-+	 * hidden devices. This will cause the ahci driver to successfully
-+	 * probe th new device (instead of handing it over to this driver).
-+	 */
-+	if (nrdev->bar_sizing) {
-+		NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1));
-+		nrdev->bar_sizing = false;
-+	}
-+
-+	return PCIBIOS_SUCCESSFUL;
-+}
-+
-+/*
-+ * Read PCI config space of a remapped device.
-+ * Since the original PCI config space is inaccessible, we provide a minimal,
-+ * fake config space instead.
-+ */
-+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port,
-+					int reg, int len, u32 *value)
-+{
-+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
-+	struct resource *remapped_mem;
-+
-+	if (port > nrdev->num_remapped_devices)
-+		return PCIBIOS_DEVICE_NOT_FOUND;
-+
-+	*value = 0;
-+	remapped_mem = &nrdev->remapped_dev_mem[port - 1];
-+
-+	/* Set a Vendor ID, otherwise Linux assumes no device is present */
-+	NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL);
-+
-+	/* Always appear on & bus mastering */
-+	NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
-+
-+	/* Set class so that nvme driver probes us */
-+	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS);
-+
-+	if (nrdev->bar_sizing) {
-+		NR_FIX32(PCI_BASE_ADDRESS_0,
-+			 ~(resource_size(remapped_mem) - 1));
-+		nrdev->bar_sizing = false;
-+	} else {
-+		resource_size_t mem_start = remapped_mem->start;
-+
-+		mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64;
-+		NR_FIX32(PCI_BASE_ADDRESS_0, mem_start);
-+		mem_start >>= 32;
-+		NR_FIX32(PCI_BASE_ADDRESS_1, mem_start);
-+	}
-+
-+	return PCIBIOS_SUCCESSFUL;
-+}
-+
-+/* Read PCI configuration space. */
-+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn,
-+			       int reg, int len, u32 *value)
-+{
-+	if (PCI_SLOT(devfn) == 0)
-+		return nvme_remap_pci_read_slot0(bus, reg, len, value);
-+	else
-+		return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn),
-+						    reg, len, value);
-+}
-+
-+/*
-+ * Write PCI config space of the slot 0 (AHCI) device.
-+ * Apart from the special case of BAR sizing, we disable all writes.
-+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master)
-+ * that would affect the operation of the NVMe devices.
-+ */
-+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg,
-+				      int len, u32 value)
-+{
-+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
-+	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
-+
-+	if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) {
-+		/*
-+		 * Writing all-ones to a BAR means that the size of the
-+		 * memory region is being checked. Flag this so that we can
-+		 * reply with an appropriate size on the next read.
-+		 */
-+		if (value == ~0)
-+			nrdev->bar_sizing = true;
-+
-+		return ahci_dev_bus->ops->write(ahci_dev_bus,
-+						nrdev->dev->devfn,
-+						reg, len, value);
-+	}
-+
-+	return PCIBIOS_SET_FAILED;
-+}
-+
-+/*
-+ * Write PCI config space of a remapped device.
-+ * Since the original PCI config space is inaccessible, we reject all
-+ * writes, except for the special case of BAR probing.
-+ */
-+static int nvme_remap_pci_write_remapped(struct pci_bus *bus,
-+					 unsigned int port,
-+					 int reg, int len, u32 value)
-+{
-+	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
-+
-+	if (port > nrdev->num_remapped_devices)
-+		return PCIBIOS_DEVICE_NOT_FOUND;
-+
-+	/*
-+	 * Writing all-ones to a BAR means that the size of the memory
-+	 * region is being checked. Flag this so that we can reply with
-+	 * an appropriate size on the next read.
-+	 */
-+	if (value == ~0 && reg >= PCI_BASE_ADDRESS_0
-+			&& reg <= PCI_BASE_ADDRESS_5) {
-+		nrdev->bar_sizing = true;
-+		return PCIBIOS_SUCCESSFUL;
-+	}
-+
-+	return PCIBIOS_SET_FAILED;
-+}
-+
-+/* Write PCI configuration space. */
-+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn,
-+				int reg, int len, u32 value)
-+{
-+	if (PCI_SLOT(devfn) == 0)
-+		return nvme_remap_pci_write_slot0(bus, reg, len, value);
-+	else
-+		return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn),
-+						     reg, len, value);
-+}
-+
-+static struct pci_ops nvme_remap_pci_ops = {
-+	.read	= nvme_remap_pci_read,
-+	.write	= nvme_remap_pci_write,
-+};
-+
-+
-+/******** Initialization & exit **********/
-+
-+/*
-+ * Find a PCI domain ID to use for our fake bus.
-+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits).
-+ */
-+static int find_free_domain(void)
-+{
-+	int domain = 0xffff;
-+	struct pci_bus *bus = NULL;
-+
-+	while ((bus = pci_find_next_bus(bus)) != NULL)
-+		domain = max_t(int, domain, pci_domain_nr(bus));
-+
-+	return domain + 1;
-+}
-+
-+static int find_remapped_devices(struct nvme_remap_dev *nrdev,
-+				 struct list_head *resources)
-+{
-+	void __iomem *mmio;
-+	int i, count = 0;
-+	u32 cap;
-+
-+	mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD,
-+			  pci_resource_len(nrdev->dev,
-+					   AHCI_PCI_BAR_STANDARD));
-+	if (!mmio)
-+		return -ENODEV;
-+
-+	/* Check if this device might have remapped nvme devices. */
-+	if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K ||
-+	    !(readl(mmio + AHCI_VSCAP) & 1))
-+		return -ENODEV;
-+
-+	cap = readq(mmio + AHCI_REMAP_CAP);
-+	for (i = AHCI_MAX_REMAP-1; i >= 0; i--) {
-+		struct resource *remapped_mem;
-+
-+		if ((cap & (1 << i)) == 0)
-+			continue;
-+		if (readl(mmio + ahci_remap_dcc(i))
-+				!= PCI_CLASS_STORAGE_EXPRESS)
-+			continue;
-+
-+		/* We've found a remapped device */
-+		remapped_mem = &nrdev->remapped_dev_mem[count++];
-+		remapped_mem->start =
-+			pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD)
-+			+ ahci_remap_base(i);
-+		remapped_mem->end = remapped_mem->start
-+			+ AHCI_REMAP_N_SIZE - 1;
-+		remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED;
-+		pci_add_resource(resources, remapped_mem);
-+	}
-+
-+	pcim_iounmap(nrdev->dev, mmio);
-+
-+	if (count == 0)
-+		return -ENODEV;
-+
-+	nrdev->num_remapped_devices = count;
-+	dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n",
-+		 nrdev->num_remapped_devices);
-+	return 0;
-+}
-+
-+static void nvme_remap_remove_root_bus(void *data)
-+{
-+	struct pci_bus *bus = data;
-+
-+	pci_stop_root_bus(bus);
-+	pci_remove_root_bus(bus);
-+}
-+
-+static int nvme_remap_probe(struct pci_dev *dev,
-+			    const struct pci_device_id *id)
-+{
-+	struct nvme_remap_dev *nrdev;
-+	LIST_HEAD(resources);
-+	int i;
-+	int ret;
-+	struct pci_dev *child;
-+
-+	nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL);
-+	nrdev->sysdata.domain = find_free_domain();
-+	nrdev->sysdata.nvme_remap_dev = dev;
-+	nrdev->dev = dev;
-+	pci_set_drvdata(dev, nrdev);
-+
-+	ret = pcim_enable_device(dev);
-+	if (ret < 0)
-+		return ret;
-+
-+	pci_set_master(dev);
-+
-+	ret = find_remapped_devices(nrdev, &resources);
-+	if (ret)
-+		return ret;
-+
-+	/* Add resources from the original AHCI device */
-+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-+		struct resource *res = &dev->resource[i];
-+
-+		if (res->start) {
-+			struct resource *nr_res = &nrdev->ahci_resources[i];
-+
-+			nr_res->start = res->start;
-+			nr_res->end = res->end;
-+			nr_res->flags = res->flags;
-+			pci_add_resource(&resources, nr_res);
-+		}
-+	}
-+
-+	/* Create virtual interrupts */
-+	nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0,
-+					       nrdev->num_remapped_devices + 1,
-+					       0);
-+	if (nrdev->irq_base < 0)
-+		return nrdev->irq_base;
-+
-+	/* Create and populate PCI bus */
-+	nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops,
-+					 &nrdev->sysdata, &resources);
-+	if (!nrdev->bus)
-+		return -ENODEV;
-+
-+	if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus,
-+				     nrdev->bus))
-+		return -ENOMEM;
-+
-+	/* We don't support sharing MSI interrupts between these devices */
-+	nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
-+
-+	pci_scan_child_bus(nrdev->bus);
-+
-+	list_for_each_entry(child, &nrdev->bus->devices, bus_list) {
-+		/*
-+		 * Prevent PCI core from trying to move memory BARs around.
-+		 * The hidden NVMe devices are at fixed locations.
-+		 */
-+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-+			struct resource *res = &child->resource[i];
-+
-+			if (res->flags & IORESOURCE_MEM)
-+				res->flags |= IORESOURCE_PCI_FIXED;
-+		}
-+
-+		/* Share the legacy IRQ between all devices */
-+		child->irq = dev->irq;
-+	}
-+
-+	pci_assign_unassigned_bus_resources(nrdev->bus);
-+	pci_bus_add_devices(nrdev->bus);
-+
-+	return 0;
-+}
-+
-+static const struct pci_device_id nvme_remap_ids[] = {
-+	/*
-+	 * Match all Intel RAID controllers.
-+	 *
-+	 * There's overlap here with the set of devices detected by the ahci
-+	 * driver, but ahci will only successfully probe when there
-+	 * *aren't* any remapped NVMe devices, and this driver will only
-+	 * successfully probe when there *are* remapped NVMe devices that
-+	 * need handling.
-+	 */
-+	{
-+		PCI_VDEVICE(INTEL, PCI_ANY_ID),
-+		.class = PCI_CLASS_STORAGE_RAID << 8,
-+		.class_mask = 0xffffff00,
-+	},
-+	{0,}
-+};
-+MODULE_DEVICE_TABLE(pci, nvme_remap_ids);
-+
-+static struct pci_driver nvme_remap_drv = {
-+	.name		= MODULE_NAME,
-+	.id_table	= nvme_remap_ids,
-+	.probe		= nvme_remap_probe,
-+};
-+module_pci_driver(nvme_remap_drv);
-+
-+MODULE_AUTHOR("Daniel Drake <drake@endlessm.com>");
-+MODULE_LICENSE("GPL v2");
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 568410e64ce6..192d0557fb05 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -3732,6 +3732,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
- 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
- }
- 
-+static bool acs_on_downstream;
-+static bool acs_on_multifunction;
-+
-+#define NUM_ACS_IDS 16
-+struct acs_on_id {
-+	unsigned short vendor;
-+	unsigned short device;
-+};
-+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
-+static u8 max_acs_id;
-+
-+static __init int pcie_acs_override_setup(char *p)
-+{
-+	if (!p)
-+		return -EINVAL;
-+
-+	while (*p) {
-+		if (!strncmp(p, "downstream", 10))
-+			acs_on_downstream = true;
-+		if (!strncmp(p, "multifunction", 13))
-+			acs_on_multifunction = true;
-+		if (!strncmp(p, "id:", 3)) {
-+			char opt[5];
-+			int ret;
-+			long val;
-+
-+			if (max_acs_id >= NUM_ACS_IDS - 1) {
-+				pr_warn("Out of PCIe ACS override slots (%d)\n",
-+						NUM_ACS_IDS);
-+				goto next;
-+			}
-+
-+			p += 3;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].vendor = val;
-+
-+			p += strcspn(p, ":");
-+			if (*p != ':') {
-+				pr_warn("PCIe ACS invalid ID\n");
-+				goto next;
-+			}
-+
-+			p++;
-+			snprintf(opt, 5, "%s", p);
-+			ret = kstrtol(opt, 16, &val);
-+			if (ret) {
-+				pr_warn("PCIe ACS ID parse error %d\n", ret);
-+				goto next;
-+			}
-+			acs_on_ids[max_acs_id].device = val;
-+			max_acs_id++;
-+		}
-+next:
-+		p += strcspn(p, ",");
-+		if (*p == ',')
-+			p++;
-+	}
-+
-+	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
-+		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
-+
-+	return 0;
-+}
-+early_param("pcie_acs_override", pcie_acs_override_setup);
-+
-+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
-+{
-+	int i;
-+
-+	/* Never override ACS for legacy devices or devices with ACS caps */
-+	if (!pci_is_pcie(dev) ||
-+		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
-+			return -ENOTTY;
-+
-+	for (i = 0; i < max_acs_id; i++)
-+		if (acs_on_ids[i].vendor == dev->vendor &&
-+			acs_on_ids[i].device == dev->device)
-+				return 1;
-+
-+	switch (pci_pcie_type(dev)) {
-+	case PCI_EXP_TYPE_DOWNSTREAM:
-+	case PCI_EXP_TYPE_ROOT_PORT:
-+		if (acs_on_downstream)
-+			return 1;
-+		break;
-+	case PCI_EXP_TYPE_ENDPOINT:
-+	case PCI_EXP_TYPE_UPSTREAM:
-+	case PCI_EXP_TYPE_LEG_END:
-+	case PCI_EXP_TYPE_RC_END:
-+		if (acs_on_multifunction && dev->multifunction)
-+			return 1;
-+	}
-+
-+	return -ENOTTY;
-+}
- /*
-  * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
-  * prevented for those affected devices.
-@@ -5143,6 +5243,7 @@ static const struct pci_dev_acs_enabled {
- 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
- 	/* Wangxun nics */
- 	{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
-+	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
- 	{ 0 }
- };
- 
-diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
-index a0a026d2d244..8bece21a8998 100644
---- a/include/linux/pagemap.h
-+++ b/include/linux/pagemap.h
-@@ -1281,7 +1281,7 @@ struct readahead_control {
- 		._index = i,						\
- 	}
- 
--#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-+#define VM_READAHEAD_PAGES	(SZ_8M / PAGE_SIZE)
- 
- void page_cache_ra_unbounded(struct readahead_control *,
- 		unsigned long nr_to_read, unsigned long lookahead_count);
-diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
-index 6030a8235617..60b7fe5fa74a 100644
---- a/include/linux/user_namespace.h
-+++ b/include/linux/user_namespace.h
-@@ -156,6 +156,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
- 
- #ifdef CONFIG_USER_NS
- 
-+extern int unprivileged_userns_clone;
-+
- static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
- {
- 	if (ns)
-@@ -189,6 +191,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
- struct ns_common *ns_get_owner(struct ns_common *ns);
- #else
- 
-+#define unprivileged_userns_clone 0
-+
- static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
- {
- 	return &init_user_ns;
-diff --git a/init/Kconfig b/init/Kconfig
-index febdea2afc3b..3ba6142f2f42 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -132,6 +132,10 @@ config THREAD_INFO_IN_TASK
- 
- menu "General setup"
- 
-+config CACHY
-+    bool "Some kernel tweaks by CachyOS"
-+    default y
-+
- config BROKEN
- 	bool
- 
-@@ -1251,6 +1255,22 @@ config USER_NS
- 
- 	  If unsure, say N.
- 
-+config USER_NS_UNPRIVILEGED
-+	bool "Allow unprivileged users to create namespaces"
-+	default y
-+	depends on USER_NS
-+	help
-+	  When disabled, unprivileged users will not be able to create
-+	  new namespaces. Allowing users to create their own namespaces
-+	  has been part of several recent local privilege escalation
-+	  exploits, so if you need user namespaces but are
-+	  paranoid^Wsecurity-conscious you want to disable this.
-+
-+	  This setting can be overridden at runtime via the
-+	  kernel.unprivileged_userns_clone sysctl.
-+
-+	  If unsure, say Y.
-+
- config PID_NS
- 	bool "PID Namespaces"
- 	default y
-@@ -1393,6 +1413,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
- 	  with the "-O2" compiler flag for best performance and most
- 	  helpful compile-time warnings.
- 
-+config CC_OPTIMIZE_FOR_PERFORMANCE_O3
-+	bool "Optimize more for performance (-O3)"
-+	help
-+	  Choosing this option will pass "-O3" to your compiler to optimize
-+	  the kernel yet more for performance.
-+
- config CC_OPTIMIZE_FOR_SIZE
- 	bool "Optimize for size (-Os)"
- 	help
-diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
-index 38ef6d06888e..0f78364efd4f 100644
---- a/kernel/Kconfig.hz
-+++ b/kernel/Kconfig.hz
-@@ -40,6 +40,27 @@ choice
- 	 on SMP and NUMA systems and exactly dividing by both PAL and
- 	 NTSC frame rates for video and multimedia work.
- 
-+	config HZ_500
-+		bool "500 HZ"
-+	help
-+	 500 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with good smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
-+	config HZ_600
-+		bool "600 HZ"
-+	help
-+	 600 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with good smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
-+	config HZ_750
-+		bool "750 HZ"
-+	help
-+	 750 Hz is a balanced timer frequency. Provides fast interactivity
-+	 on desktops with good smoothness without increasing CPU power
-+	 consumption and sacrificing the battery life on laptops.
-+
- 	config HZ_1000
- 		bool "1000 HZ"
- 	help
-@@ -53,6 +74,9 @@ config HZ
- 	default 100 if HZ_100
- 	default 250 if HZ_250
- 	default 300 if HZ_300
-+	default 500 if HZ_500
-+	default 600 if HZ_600
-+	default 750 if HZ_750
- 	default 1000 if HZ_1000
- 
- config SCHED_HRTICK
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 99076dbe27d8..18750b83c564 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -104,6 +104,10 @@
- #include <uapi/linux/pidfd.h>
- #include <linux/pidfs.h>
- 
-+#ifdef CONFIG_USER_NS
-+#include <linux/user_namespace.h>
-+#endif
-+
- #include <asm/pgalloc.h>
- #include <linux/uaccess.h>
- #include <asm/mmu_context.h>
-@@ -2154,6 +2158,10 @@ __latent_entropy struct task_struct *copy_process(
- 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
- 		return ERR_PTR(-EINVAL);
- 
-+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
-+		if (!capable(CAP_SYS_ADMIN))
-+			return ERR_PTR(-EPERM);
-+
- 	/*
- 	 * Thread groups must share signals as well, and detached threads
- 	 * can only be started up within the thread group.
-@@ -3301,6 +3309,12 @@ int ksys_unshare(unsigned long unshare_flags)
- 	if (unshare_flags & CLONE_NEWNS)
- 		unshare_flags |= CLONE_FS;
- 
-+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
-+		err = -EPERM;
-+		if (!capable(CAP_SYS_ADMIN))
-+			goto bad_unshare_out;
-+	}
-+
- 	err = check_unshare_flags(unshare_flags);
- 	if (err)
- 		goto bad_unshare_out;
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 24dda708b699..c2bb8eb1d6ba 100644
---- a/kernel/sched/fair.c
-+++ b/kernel/sched/fair.c
-@@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
-  */
-+#ifdef CONFIG_CACHY
-+unsigned int sysctl_sched_base_slice			= 350000ULL;
-+static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
-+#else
- unsigned int sysctl_sched_base_slice			= 750000ULL;
- static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
-+#endif
- 
-+#ifdef CONFIG_CACHY
-+const_debug unsigned int sysctl_sched_migration_cost	= 300000UL;
-+#else
- const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
-+#endif
- 
- static int __init setup_sched_thermal_decay_shift(char *str)
- {
-@@ -121,8 +130,12 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
-+#ifdef CONFIG_CACHY
-+static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
-+#else
- static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
-+#endif
- 
- #ifdef CONFIG_NUMA_BALANCING
- /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index ef20c61004eb..10c1caff5e06 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2544,7 +2544,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
- 
- extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
- 
--#ifdef CONFIG_PREEMPT_RT
-+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY)
- #define SCHED_NR_MIGRATE_BREAK 8
- #else
- #define SCHED_NR_MIGRATE_BREAK 32
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index e0b917328cf9..e70ae9c11dea 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -80,6 +80,9 @@
- #ifdef CONFIG_RT_MUTEXES
- #include <linux/rtmutex.h>
- #endif
-+#ifdef CONFIG_USER_NS
-+#include <linux/user_namespace.h>
-+#endif
- 
- /* shared constants to be used in various sysctls */
- const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
-@@ -1623,6 +1626,15 @@ static struct ctl_table kern_table[] = {
- 		.mode		= 0644,
- 		.proc_handler	= proc_dointvec,
- 	},
-+#ifdef CONFIG_USER_NS
-+	{
-+		.procname	= "unprivileged_userns_clone",
-+		.data		= &unprivileged_userns_clone,
-+		.maxlen		= sizeof(int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+#endif
- #ifdef CONFIG_PROC_SYSCTL
- 	{
- 		.procname	= "tainted",
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 0b0b95418b16..c4b835b91fc0 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -22,6 +22,13 @@
- #include <linux/bsearch.h>
- #include <linux/sort.h>
- 
-+/* sysctl */
-+#ifdef CONFIG_USER_NS_UNPRIVILEGED
-+int unprivileged_userns_clone = 1;
-+#else
-+int unprivileged_userns_clone;
-+#endif
-+
- static struct kmem_cache *user_ns_cachep __ro_after_init;
- static DEFINE_MUTEX(userns_state_mutex);
- 
-diff --git a/mm/Kconfig b/mm/Kconfig
-index b4cb45255a54..8635b3b24739 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -613,7 +613,7 @@ config COMPACTION
- config COMPACT_UNEVICTABLE_DEFAULT
- 	int
- 	depends on COMPACTION
--	default 0 if PREEMPT_RT
-+	default 0 if PREEMPT_RT || CACHY
- 	default 1
- 
- #
-diff --git a/mm/compaction.c b/mm/compaction.c
-index 739b1bf3d637..3a4269c02fb2 100644
---- a/mm/compaction.c
-+++ b/mm/compaction.c
-@@ -1950,7 +1950,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
-  * aggressively the kernel should compact memory in the
-  * background. It takes values in the range [0, 100].
-  */
-+#ifdef CONFIG_CACHY
-+static unsigned int __read_mostly sysctl_compaction_proactiveness;
-+#else
- static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
-+#endif
- static int sysctl_extfrag_threshold = 500;
- static int __read_mostly sysctl_compact_memory;
- 
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 2120f7478e55..765ea6197e1e 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -63,7 +63,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
- 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
- #endif
-+#ifdef CONFIG_CACHY
-+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
-+#else
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
-+#endif
- 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
- 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
- 
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index 8a1c92090129..a415623cde8f 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
- /*
-  * Start background writeback (via writeback threads) at this percentage
-  */
-+#ifdef CONFIG_CACHY
-+static int dirty_background_ratio = 5;
-+#else
- static int dirty_background_ratio = 10;
-+#endif
- 
- /*
-  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
-@@ -99,7 +103,11 @@ static unsigned long vm_dirty_bytes;
- /*
-  * The interval between `kupdate'-style writebacks
-  */
-+#ifdef CONFIG_CACHY
-+unsigned int dirty_writeback_interval = 10 * 100; /* centiseconds */
-+#else
- unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
-+#endif
- 
- EXPORT_SYMBOL_GPL(dirty_writeback_interval);
- 
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 9ecf99190ea2..6191be6c5de9 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
- 
- int min_free_kbytes = 1024;
- int user_min_free_kbytes = -1;
-+#ifdef CONFIG_CACHY
-+static int watermark_boost_factor __read_mostly;
-+#else
- static int watermark_boost_factor __read_mostly = 15000;
-+#endif
- static int watermark_scale_factor = 10;
- 
- /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
-diff --git a/mm/swap.c b/mm/swap.c
-index 67786cb77130..6a91db6f3302 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -1111,6 +1111,10 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
-  */
- void __init swap_setup(void)
- {
-+#ifdef CONFIG_CACHY
-+	/* Only swap-in pages requested, avoid readahead */
-+	page_cluster = 0;
-+#else
- 	unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
- 
- 	/* Use a smaller cluster for small-memory machines */
-@@ -1122,4 +1126,5 @@ void __init swap_setup(void)
- 	 * Right now other parts of the system means that we
- 	 * _really_ don't want to cluster much more
- 	 */
-+#endif
- }
-diff --git a/mm/vmpressure.c b/mm/vmpressure.c
-index bd5183dfd879..3a410f53a07c 100644
---- a/mm/vmpressure.c
-+++ b/mm/vmpressure.c
-@@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
-  * essence, they are percents: the higher the value, the more number
-  * unsuccessful reclaims there were.
-  */
-+#ifdef CONFIG_CACHY
-+static const unsigned int vmpressure_level_med = 65;
-+#else
- static const unsigned int vmpressure_level_med = 60;
-+#endif
- static const unsigned int vmpressure_level_critical = 95;
- 
- /*
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 2e34de9cd0d4..be9e40acc93b 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -191,7 +191,11 @@ struct scan_control {
- /*
-  * From 0 .. 200.  Higher means more swappy.
-  */
-+#ifdef CONFIG_CACHY
-+int vm_swappiness = 20;
-+#else
- int vm_swappiness = 60;
-+#endif
- 
- #ifdef CONFIG_MEMCG
- 
-@@ -3949,7 +3953,11 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
- }
- 
- /* to protect the working set of the last N jiffies */
-+#ifdef CONFIG_CACHY
-+static unsigned long lru_gen_min_ttl __read_mostly = 1000;
-+#else
- static unsigned long lru_gen_min_ttl __read_mostly;
-+#endif
- 
- static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- {
--- 
-2.46.0.rc1
-
-From e91af07ae5c96cff206bbbe52c16edb871050bc9 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:24:26 +0200
-Subject: [PATCH 05/11] crypto
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- arch/x86/crypto/Kconfig                  |    1 +
- arch/x86/crypto/Makefile                 |    8 +-
- arch/x86/crypto/aes-gcm-aesni-x86_64.S   | 1128 +++++++++
- arch/x86/crypto/aes-gcm-avx10-x86_64.S   | 1222 ++++++++++
- arch/x86/crypto/aesni-intel_asm.S        | 1503 +-----------
- arch/x86/crypto/aesni-intel_avx-x86_64.S | 2804 ----------------------
- arch/x86/crypto/aesni-intel_glue.c       | 1269 ++++++----
- 7 files changed, 3125 insertions(+), 4810 deletions(-)
- create mode 100644 arch/x86/crypto/aes-gcm-aesni-x86_64.S
- create mode 100644 arch/x86/crypto/aes-gcm-avx10-x86_64.S
- delete mode 100644 arch/x86/crypto/aesni-intel_avx-x86_64.S
-
-diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
-index c9e59589a1ce..24875e6295f2 100644
---- a/arch/x86/crypto/Kconfig
-+++ b/arch/x86/crypto/Kconfig
-@@ -18,6 +18,7 @@ config CRYPTO_AES_NI_INTEL
- 	depends on X86
- 	select CRYPTO_AEAD
- 	select CRYPTO_LIB_AES
-+	select CRYPTO_LIB_GF128MUL
- 	select CRYPTO_ALGAPI
- 	select CRYPTO_SKCIPHER
- 	select CRYPTO_SIMD
-diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
-index 9c5ce5613738..53b4a277809e 100644
---- a/arch/x86/crypto/Makefile
-+++ b/arch/x86/crypto/Makefile
-@@ -48,8 +48,12 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o
- 
- obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
- aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
--aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o \
--	aes_ctrby8_avx-x86_64.o aes-xts-avx-x86_64.o
-+aesni-intel-$(CONFIG_64BIT) += aes_ctrby8_avx-x86_64.o \
-+			       aes-gcm-aesni-x86_64.o \
-+			       aes-xts-avx-x86_64.o
-+ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy)
-+aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o
-+endif
- 
- obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
- sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o
-diff --git a/arch/x86/crypto/aes-gcm-aesni-x86_64.S b/arch/x86/crypto/aes-gcm-aesni-x86_64.S
-new file mode 100644
-index 000000000000..45940e2883a0
---- /dev/null
-+++ b/arch/x86/crypto/aes-gcm-aesni-x86_64.S
-@@ -0,0 +1,1128 @@
-+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-+//
-+// AES-NI optimized AES-GCM for x86_64
-+//
-+// Copyright 2024 Google LLC
-+//
-+// Author: Eric Biggers <ebiggers@google.com>
-+//
-+//------------------------------------------------------------------------------
-+//
-+// This file is dual-licensed, meaning that you can use it under your choice of
-+// either of the following two licenses:
-+//
-+// Licensed under the Apache License 2.0 (the "License").  You may obtain a copy
-+// of the License at
-+//
-+//	http://www.apache.org/licenses/LICENSE-2.0
-+//
-+// Unless required by applicable law or agreed to in writing, software
-+// distributed under the License is distributed on an "AS IS" BASIS,
-+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+// See the License for the specific language governing permissions and
-+// limitations under the License.
-+//
-+// or
-+//
-+// Redistribution and use in source and binary forms, with or without
-+// modification, are permitted provided that the following conditions are met:
-+//
-+// 1. Redistributions of source code must retain the above copyright notice,
-+//    this list of conditions and the following disclaimer.
-+//
-+// 2. Redistributions in binary form must reproduce the above copyright
-+//    notice, this list of conditions and the following disclaimer in the
-+//    documentation and/or other materials provided with the distribution.
-+//
-+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-+// POSSIBILITY OF SUCH DAMAGE.
-+//
-+//------------------------------------------------------------------------------
-+//
-+// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that
-+// support the original set of AES instructions, i.e. AES-NI.  Two
-+// implementations are provided, one that uses AVX and one that doesn't.  They
-+// are very similar, being generated by the same macros.  The only difference is
-+// that the AVX implementation takes advantage of VEX-coded instructions in some
-+// places to avoid some 'movdqu' and 'movdqa' instructions.  The AVX
-+// implementation does *not* use 256-bit vectors, as AES is not supported on
-+// 256-bit vectors until the VAES feature (which this file doesn't target).
-+//
-+// The specific CPU feature prerequisites are AES-NI and PCLMULQDQ, plus SSE4.1
-+// for the *_aesni functions or AVX for the *_aesni_avx ones.  (But it seems
-+// there are no CPUs that support AES-NI without also PCLMULQDQ and SSE4.1.)
-+//
-+// The design generally follows that of aes-gcm-avx10-x86_64.S, and that file is
-+// more thoroughly commented.  This file has the following notable changes:
-+//
-+//    - The vector length is fixed at 128-bit, i.e. xmm registers.  This means
-+//      there is only one AES block (and GHASH block) per register.
-+//
-+//    - Without AVX512 / AVX10, only 16 SIMD registers are available instead of
-+//      32.  We work around this by being much more careful about using
-+//      registers, relying heavily on loads to load values as they are needed.
-+//
-+//    - Masking is not available either.  We work around this by implementing
-+//      partial block loads and stores using overlapping scalar loads and stores
-+//      combined with shifts and SSE4.1 insertion and extraction instructions.
-+//
-+//    - The main loop is organized differently due to the different design
-+//      constraints.  First, with just one AES block per SIMD register, on some
-+//      CPUs 4 registers don't saturate the 'aesenc' throughput.  We therefore
-+//      do an 8-register wide loop.  Considering that and the fact that we have
-+//      just 16 SIMD registers to work with, it's not feasible to cache AES
-+//      round keys and GHASH key powers in registers across loop iterations.
-+//      That's not ideal, but also not actually that bad, since loads can run in
-+//      parallel with other instructions.  Significantly, this also makes it
-+//      possible to roll up the inner loops, relying on hardware loop unrolling
-+//      instead of software loop unrolling, greatly reducing code size.
-+//
-+//    - We implement the GHASH multiplications in the main loop using Karatsuba
-+//      multiplication instead of schoolbook multiplication.  This saves one
-+//      pclmulqdq instruction per block, at the cost of one 64-bit load, one
-+//      pshufd, and 0.25 pxors per block.  (This is without the three-argument
-+//      XOR support that would be provided by AVX512 / AVX10, which would be
-+//      more beneficial to schoolbook than Karatsuba.)
-+//
-+//      As a rough approximation, we can assume that Karatsuba multiplication is
-+//      faster than schoolbook multiplication in this context if one pshufd and
-+//      0.25 pxors are cheaper than a pclmulqdq.  (We assume that the 64-bit
-+//      load is "free" due to running in parallel with arithmetic instructions.)
-+//      This is true on AMD CPUs, including all that support pclmulqdq up to at
-+//      least Zen 3.  It's also true on older Intel CPUs: Westmere through
-+//      Haswell on the Core side, and Silvermont through Goldmont Plus on the
-+//      low-power side.  On some of these CPUs, pclmulqdq is quite slow, and the
-+//      benefit of Karatsuba should be substantial.  On newer Intel CPUs,
-+//      schoolbook multiplication should be faster, but only marginally.
-+//
-+//      Not all these CPUs were available to be tested.  However, benchmarks on
-+//      available CPUs suggest that this approximation is plausible.  Switching
-+//      to Karatsuba showed negligible change (< 1%) on Intel Broadwell,
-+//      Skylake, and Cascade Lake, but it improved AMD Zen 1-3 by 6-7%.
-+//      Considering that and the fact that Karatsuba should be even more
-+//      beneficial on older Intel CPUs, it seems like the right choice here.
-+//
-+//      An additional 0.25 pclmulqdq per block (2 per 8 blocks) could be
-+//      saved by using a multiplication-less reduction method.  We don't do that
-+//      because it would require a large number of shift and xor instructions,
-+//      making it less worthwhile and likely harmful on newer CPUs.
-+//
-+//      It does make sense to sometimes use a different reduction optimization
-+//      that saves a pclmulqdq, though: precompute the hash key times x^64, and
-+//      multiply the low half of the data block by the hash key with the extra
-+//      factor of x^64.  This eliminates one step of the reduction.  However,
-+//      this is incompatible with Karatsuba multiplication.  Therefore, for
-+//      multi-block processing we use Karatsuba multiplication with a regular
-+//      reduction.  For single-block processing, we use the x^64 optimization.
-+
-+#include <linux/linkage.h>
-+
-+.section .rodata
-+.p2align 4
-+.Lbswap_mask:
-+	.octa   0x000102030405060708090a0b0c0d0e0f
-+.Lgfpoly:
-+	.quad	0xc200000000000000
-+.Lone:
-+	.quad	1
-+.Lgfpoly_and_internal_carrybit:
-+	.octa	0xc2000000000000010000000000000001
-+	// Loading 16 bytes from '.Lzeropad_mask + 16 - len' produces a mask of
-+	// 'len' 0xff bytes and the rest zeroes.
-+.Lzeropad_mask:
-+	.octa	0xffffffffffffffffffffffffffffffff
-+	.octa	0
-+
-+// Offsets in struct aes_gcm_key_aesni
-+#define OFFSETOF_AESKEYLEN	480
-+#define OFFSETOF_H_POWERS	496
-+#define OFFSETOF_H_POWERS_XORED	624
-+#define OFFSETOF_H_TIMES_X64	688
-+
-+.text
-+
-+// Do a vpclmulqdq, or fall back to a movdqa and a pclmulqdq.  The fallback
-+// assumes that all operands are distinct and that any mem operand is aligned.
-+.macro	_vpclmulqdq	imm, src1, src2, dst
-+.if USE_AVX
-+	vpclmulqdq	\imm, \src1, \src2, \dst
-+.else
-+	movdqa		\src2, \dst
-+	pclmulqdq	\imm, \src1, \dst
-+.endif
-+.endm
-+
-+// Do a vpshufb, or fall back to a movdqa and a pshufb.  The fallback assumes
-+// that all operands are distinct and that any mem operand is aligned.
-+.macro	_vpshufb	src1, src2, dst
-+.if USE_AVX
-+	vpshufb		\src1, \src2, \dst
-+.else
-+	movdqa		\src2, \dst
-+	pshufb		\src1, \dst
-+.endif
-+.endm
-+
-+// Do a vpand, or fall back to a movdqu and a pand.  The fallback assumes that
-+// all operands are distinct.
-+.macro	_vpand		src1, src2, dst
-+.if USE_AVX
-+	vpand		\src1, \src2, \dst
-+.else
-+	movdqu		\src1, \dst
-+	pand		\src2, \dst
-+.endif
-+.endm
-+
-+// XOR the unaligned memory operand \mem into the xmm register \reg.  \tmp must
-+// be a temporary xmm register.
-+.macro	_xor_mem_to_reg	mem, reg, tmp
-+.if USE_AVX
-+	vpxor		\mem, \reg, \reg
-+.else
-+	movdqu		\mem, \tmp
-+	pxor		\tmp, \reg
-+.endif
-+.endm
-+
-+// Test the unaligned memory operand \mem against the xmm register \reg.  \tmp
-+// must be a temporary xmm register.
-+.macro	_test_mem	mem, reg, tmp
-+.if USE_AVX
-+	vptest		\mem, \reg
-+.else
-+	movdqu		\mem, \tmp
-+	ptest		\tmp, \reg
-+.endif
-+.endm
-+
-+// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst
-+// and zeroize any remaining bytes.  Clobbers %rax, %rcx, and \tmp{64,32}.
-+.macro	_load_partial_block	src, dst, tmp64, tmp32
-+	sub		$8, %ecx		// LEN - 8
-+	jle		.Lle8\@
-+
-+	// Load 9 <= LEN <= 15 bytes.
-+	movq		(\src), \dst		// Load first 8 bytes
-+	mov		(\src, %rcx), %rax	// Load last 8 bytes
-+	neg		%ecx
-+	shl		$3, %ecx
-+	shr		%cl, %rax		// Discard overlapping bytes
-+	pinsrq		$1, %rax, \dst
-+	jmp		.Ldone\@
-+
-+.Lle8\@:
-+	add		$4, %ecx		// LEN - 4
-+	jl		.Llt4\@
-+
-+	// Load 4 <= LEN <= 8 bytes.
-+	mov		(\src), %eax		// Load first 4 bytes
-+	mov		(\src, %rcx), \tmp32	// Load last 4 bytes
-+	jmp		.Lcombine\@
-+
-+.Llt4\@:
-+	// Load 1 <= LEN <= 3 bytes.
-+	add		$2, %ecx		// LEN - 2
-+	movzbl		(\src), %eax		// Load first byte
-+	jl		.Lmovq\@
-+	movzwl		(\src, %rcx), \tmp32	// Load last 2 bytes
-+.Lcombine\@:
-+	shl		$3, %ecx
-+	shl		%cl, \tmp64
-+	or		\tmp64, %rax		// Combine the two parts
-+.Lmovq\@:
-+	movq		%rax, \dst
-+.Ldone\@:
-+.endm
-+
-+// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst.
-+// Clobbers %rax, %rcx, and %rsi.
-+.macro	_store_partial_block	src, dst
-+	sub		$8, %ecx		// LEN - 8
-+	jl		.Llt8\@
-+
-+	// Store 8 <= LEN <= 15 bytes.
-+	pextrq		$1, \src, %rax
-+	mov		%ecx, %esi
-+	shl		$3, %ecx
-+	ror		%cl, %rax
-+	mov		%rax, (\dst, %rsi)	// Store last LEN - 8 bytes
-+	movq		\src, (\dst)		// Store first 8 bytes
-+	jmp		.Ldone\@
-+
-+.Llt8\@:
-+	add		$4, %ecx		// LEN - 4
-+	jl		.Llt4\@
-+
-+	// Store 4 <= LEN <= 7 bytes.
-+	pextrd		$1, \src, %eax
-+	mov		%ecx, %esi
-+	shl		$3, %ecx
-+	ror		%cl, %eax
-+	mov		%eax, (\dst, %rsi)	// Store last LEN - 4 bytes
-+	movd		\src, (\dst)		// Store first 4 bytes
-+	jmp		.Ldone\@
-+
-+.Llt4\@:
-+	// Store 1 <= LEN <= 3 bytes.
-+	pextrb		$0, \src, 0(\dst)
-+	cmp		$-2, %ecx		// LEN - 4 == -2, i.e. LEN == 2?
-+	jl		.Ldone\@
-+	pextrb		$1, \src, 1(\dst)
-+	je		.Ldone\@
-+	pextrb		$2, \src, 2(\dst)
-+.Ldone\@:
-+.endm
-+
-+// Do one step of GHASH-multiplying \a by \b and storing the reduced product in
-+// \b.  To complete all steps, this must be invoked with \i=0 through \i=9.
-+// \a_times_x64 must contain \a * x^64 in reduced form, \gfpoly must contain the
-+// .Lgfpoly constant, and \t0-\t1 must be temporary registers.
-+.macro	_ghash_mul_step	i, a, a_times_x64, b, gfpoly, t0, t1
-+
-+	// MI = (a_L * b_H) + ((a*x^64)_L * b_L)
-+.if \i == 0
-+	_vpclmulqdq	$0x01, \a, \b, \t0
-+.elseif \i == 1
-+	_vpclmulqdq	$0x00, \a_times_x64, \b, \t1
-+.elseif \i == 2
-+	pxor		\t1, \t0
-+
-+	// HI = (a_H * b_H) + ((a*x^64)_H * b_L)
-+.elseif \i == 3
-+	_vpclmulqdq	$0x11, \a, \b, \t1
-+.elseif \i == 4
-+	pclmulqdq	$0x10, \a_times_x64, \b
-+.elseif \i == 5
-+	pxor		\t1, \b
-+.elseif \i == 6
-+
-+	// Fold MI into HI.
-+	pshufd		$0x4e, \t0, \t1		// Swap halves of MI
-+.elseif \i == 7
-+	pclmulqdq	$0x00, \gfpoly, \t0	// MI_L*(x^63 + x^62 + x^57)
-+.elseif \i == 8
-+	pxor		\t1, \b
-+.elseif \i == 9
-+	pxor		\t0, \b
-+.endif
-+.endm
-+
-+// GHASH-multiply \a by \b and store the reduced product in \b.
-+// See _ghash_mul_step for details.
-+.macro	_ghash_mul	a, a_times_x64, b, gfpoly, t0, t1
-+.irp i, 0,1,2,3,4,5,6,7,8,9
-+	_ghash_mul_step	\i, \a, \a_times_x64, \b, \gfpoly, \t0, \t1
-+.endr
-+.endm
-+
-+// GHASH-multiply \a by \b and add the unreduced product to \lo, \mi, and \hi.
-+// This does Karatsuba multiplication and must be paired with _ghash_reduce.  On
-+// the first call, \lo, \mi, and \hi must be zero.  \a_xored must contain the
-+// two halves of \a XOR'd together, i.e. a_L + a_H.  \b is clobbered.
-+.macro	_ghash_mul_noreduce	a, a_xored, b, lo, mi, hi, t0
-+
-+	// LO += a_L * b_L
-+	_vpclmulqdq	$0x00, \a, \b, \t0
-+	pxor		\t0, \lo
-+
-+	// b_L + b_H
-+	pshufd		$0x4e, \b, \t0
-+	pxor		\b, \t0
-+
-+	// HI += a_H * b_H
-+	pclmulqdq	$0x11, \a, \b
-+	pxor		\b, \hi
-+
-+	// MI += (a_L + a_H) * (b_L + b_H)
-+	pclmulqdq	$0x00, \a_xored, \t0
-+	pxor		\t0, \mi
-+.endm
-+
-+// Reduce the product from \lo, \mi, and \hi, and store the result in \dst.
-+// This assumes that _ghash_mul_noreduce was used.
-+.macro	_ghash_reduce	lo, mi, hi, dst, t0
-+
-+	movq		.Lgfpoly(%rip), \t0
-+
-+	// MI += LO + HI (needed because we used Karatsuba multiplication)
-+	pxor		\lo, \mi
-+	pxor		\hi, \mi
-+
-+	// Fold LO into MI.
-+	pshufd		$0x4e, \lo, \dst
-+	pclmulqdq	$0x00, \t0, \lo
-+	pxor		\dst, \mi
-+	pxor		\lo, \mi
-+
-+	// Fold MI into HI.
-+	pshufd		$0x4e, \mi, \dst
-+	pclmulqdq	$0x00, \t0, \mi
-+	pxor		\hi, \dst
-+	pxor		\mi, \dst
-+.endm
-+
-+// Do the first step of the GHASH update of a set of 8 ciphertext blocks.
-+//
-+// The whole GHASH update does:
-+//
-+//	GHASH_ACC = (blk0+GHASH_ACC)*H^8 + blk1*H^7 + blk2*H^6 + blk3*H^5 +
-+//				blk4*H^4 + blk5*H^3 + blk6*H^2 + blk7*H^1
-+//
-+// This macro just does the first step: it does the unreduced multiplication
-+// (blk0+GHASH_ACC)*H^8 and starts gathering the unreduced product in the xmm
-+// registers LO, MI, and GHASH_ACC a.k.a. HI.  It also zero-initializes the
-+// inner block counter in %rax, which is a value that counts up by 8 for each
-+// block in the set of 8 and is used later to index by 8*blknum and 16*blknum.
-+//
-+// To reduce the number of pclmulqdq instructions required, both this macro and
-+// _ghash_update_continue_8x use Karatsuba multiplication instead of schoolbook
-+// multiplication.  See the file comment for more details about this choice.
-+//
-+// Both macros expect the ciphertext blocks blk[0-7] to be available at DST if
-+// encrypting, or SRC if decrypting.  They also expect the precomputed hash key
-+// powers H^i and their XOR'd-together halves to be available in the struct
-+// pointed to by KEY.  Both macros clobber TMP[0-2].
-+.macro	_ghash_update_begin_8x	enc
-+
-+	// Initialize the inner block counter.
-+	xor		%eax, %eax
-+
-+	// Load the highest hash key power, H^8.
-+	movdqa		OFFSETOF_H_POWERS(KEY), TMP0
-+
-+	// Load the first ciphertext block and byte-reflect it.
-+.if \enc
-+	movdqu		(DST), TMP1
-+.else
-+	movdqu		(SRC), TMP1
-+.endif
-+	pshufb		BSWAP_MASK, TMP1
-+
-+	// Add the GHASH accumulator to the ciphertext block to get the block
-+	// 'b' that needs to be multiplied with the hash key power 'a'.
-+	pxor		TMP1, GHASH_ACC
-+
-+	// b_L + b_H
-+	pshufd		$0x4e, GHASH_ACC, MI
-+	pxor		GHASH_ACC, MI
-+
-+	// LO = a_L * b_L
-+	_vpclmulqdq	$0x00, TMP0, GHASH_ACC, LO
-+
-+	// HI = a_H * b_H
-+	pclmulqdq	$0x11, TMP0, GHASH_ACC
-+
-+	// MI = (a_L + a_H) * (b_L + b_H)
-+	pclmulqdq	$0x00, OFFSETOF_H_POWERS_XORED(KEY), MI
-+.endm
-+
-+// Continue the GHASH update of 8 ciphertext blocks as described above by doing
-+// an unreduced multiplication of the next ciphertext block by the next lowest
-+// key power and accumulating the result into LO, MI, and GHASH_ACC a.k.a. HI.
-+.macro	_ghash_update_continue_8x enc
-+	add		$8, %eax
-+
-+	// Load the next lowest key power.
-+	movdqa		OFFSETOF_H_POWERS(KEY,%rax,2), TMP0
-+
-+	// Load the next ciphertext block and byte-reflect it.
-+.if \enc
-+	movdqu		(DST,%rax,2), TMP1
-+.else
-+	movdqu		(SRC,%rax,2), TMP1
-+.endif
-+	pshufb		BSWAP_MASK, TMP1
-+
-+	// LO += a_L * b_L
-+	_vpclmulqdq	$0x00, TMP0, TMP1, TMP2
-+	pxor		TMP2, LO
-+
-+	// b_L + b_H
-+	pshufd		$0x4e, TMP1, TMP2
-+	pxor		TMP1, TMP2
-+
-+	// HI += a_H * b_H
-+	pclmulqdq	$0x11, TMP0, TMP1
-+	pxor		TMP1, GHASH_ACC
-+
-+	// MI += (a_L + a_H) * (b_L + b_H)
-+	movq		OFFSETOF_H_POWERS_XORED(KEY,%rax), TMP1
-+	pclmulqdq	$0x00, TMP1, TMP2
-+	pxor		TMP2, MI
-+.endm
-+
-+// Reduce LO, MI, and GHASH_ACC a.k.a. HI into GHASH_ACC.  This is similar to
-+// _ghash_reduce, but it's hardcoded to use the registers of the main loop and
-+// it uses the same register for HI and the destination.  It's also divided into
-+// two steps.  TMP1 must be preserved across steps.
-+//
-+// One pshufd could be saved by shuffling MI and XOR'ing LO into it, instead of
-+// shuffling LO, XOR'ing LO into MI, and shuffling MI.  However, this would
-+// increase the critical path length, and it seems to slightly hurt performance.
-+.macro	_ghash_update_end_8x_step	i
-+.if \i == 0
-+	movq		.Lgfpoly(%rip), TMP1
-+	pxor		LO, MI
-+	pxor		GHASH_ACC, MI
-+	pshufd		$0x4e, LO, TMP2
-+	pclmulqdq	$0x00, TMP1, LO
-+	pxor		TMP2, MI
-+	pxor		LO, MI
-+.elseif \i == 1
-+	pshufd		$0x4e, MI, TMP2
-+	pclmulqdq	$0x00, TMP1, MI
-+	pxor		TMP2, GHASH_ACC
-+	pxor		MI, GHASH_ACC
-+.endif
-+.endm
-+
-+// void aes_gcm_precompute_##suffix(struct aes_gcm_key_aesni *key);
-+//
-+// Given the expanded AES key, derive the GHASH subkey and initialize the GHASH
-+// related fields in the key struct.
-+.macro	_aes_gcm_precompute
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+
-+	// Additional local variables.
-+	// %xmm0-%xmm1 and %rax are used as temporaries.
-+	.set	RNDKEYLAST_PTR,	%rsi
-+	.set	H_CUR,		%xmm2
-+	.set	H_POW1,		%xmm3	// H^1
-+	.set	H_POW1_X64,	%xmm4	// H^1 * x^64
-+	.set	GFPOLY,		%xmm5
-+
-+	// Encrypt an all-zeroes block to get the raw hash subkey.
-+	movl		OFFSETOF_AESKEYLEN(KEY), %eax
-+	lea		6*16(KEY,%rax,4), RNDKEYLAST_PTR
-+	movdqa		(KEY), H_POW1  // Zero-th round key XOR all-zeroes block
-+	lea		16(KEY), %rax
-+1:
-+	aesenc		(%rax), H_POW1
-+	add		$16, %rax
-+	cmp		%rax, RNDKEYLAST_PTR
-+	jne		1b
-+	aesenclast	(RNDKEYLAST_PTR), H_POW1
-+
-+	// Preprocess the raw hash subkey as needed to operate on GHASH's
-+	// bit-reflected values directly: reflect its bytes, then multiply it by
-+	// x^-1 (using the backwards interpretation of polynomial coefficients
-+	// from the GCM spec) or equivalently x^1 (using the alternative,
-+	// natural interpretation of polynomial coefficients).
-+	pshufb		.Lbswap_mask(%rip), H_POW1
-+	movdqa		H_POW1, %xmm0
-+	pshufd		$0xd3, %xmm0, %xmm0
-+	psrad		$31, %xmm0
-+	paddq		H_POW1, H_POW1
-+	pand		.Lgfpoly_and_internal_carrybit(%rip), %xmm0
-+	pxor		%xmm0, H_POW1
-+
-+	// Store H^1.
-+	movdqa		H_POW1, OFFSETOF_H_POWERS+7*16(KEY)
-+
-+	// Compute and store H^1 * x^64.
-+	movq		.Lgfpoly(%rip), GFPOLY
-+	pshufd		$0x4e, H_POW1, %xmm0
-+	_vpclmulqdq	$0x00, H_POW1, GFPOLY, H_POW1_X64
-+	pxor		%xmm0, H_POW1_X64
-+	movdqa		H_POW1_X64, OFFSETOF_H_TIMES_X64(KEY)
-+
-+	// Compute and store the halves of H^1 XOR'd together.
-+	pxor		H_POW1, %xmm0
-+	movq		%xmm0, OFFSETOF_H_POWERS_XORED+7*8(KEY)
-+
-+	// Compute and store the remaining key powers H^2 through H^8.
-+	movdqa		H_POW1, H_CUR
-+	mov		$6*8, %eax
-+.Lprecompute_next\@:
-+	// Compute H^i = H^{i-1} * H^1.
-+	_ghash_mul	H_POW1, H_POW1_X64, H_CUR, GFPOLY, %xmm0, %xmm1
-+	// Store H^i.
-+	movdqa		H_CUR, OFFSETOF_H_POWERS(KEY,%rax,2)
-+	// Compute and store the halves of H^i XOR'd together.
-+	pshufd		$0x4e, H_CUR, %xmm0
-+	pxor		H_CUR, %xmm0
-+	movq		%xmm0, OFFSETOF_H_POWERS_XORED(KEY,%rax)
-+	sub		$8, %eax
-+	jge		.Lprecompute_next\@
-+
-+	RET
-+.endm
-+
-+// void aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key,
-+//				 u8 ghash_acc[16], const u8 *aad, int aadlen);
-+//
-+// This function processes the AAD (Additional Authenticated Data) in GCM.
-+// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the
-+// data given by |aad| and |aadlen|.  On the first call, |ghash_acc| must be all
-+// zeroes.  |aadlen| must be a multiple of 16, except on the last call where it
-+// can be any length.  The caller must do any buffering needed to ensure this.
-+.macro	_aes_gcm_aad_update
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	GHASH_ACC_PTR,	%rsi
-+	.set	AAD,		%rdx
-+	.set	AADLEN,		%ecx
-+	// Note: _load_partial_block relies on AADLEN being in %ecx.
-+
-+	// Additional local variables.
-+	// %rax, %r10, and %xmm0-%xmm1 are used as temporary registers.
-+	.set	BSWAP_MASK,	%xmm2
-+	.set	GHASH_ACC,	%xmm3
-+	.set	H_POW1,		%xmm4	// H^1
-+	.set	H_POW1_X64,	%xmm5	// H^1 * x^64
-+	.set	GFPOLY,		%xmm6
-+
-+	movdqa		.Lbswap_mask(%rip), BSWAP_MASK
-+	movdqu		(GHASH_ACC_PTR), GHASH_ACC
-+	movdqa		OFFSETOF_H_POWERS+7*16(KEY), H_POW1
-+	movdqa		OFFSETOF_H_TIMES_X64(KEY), H_POW1_X64
-+	movq		.Lgfpoly(%rip), GFPOLY
-+
-+	// Process the AAD one full block at a time.
-+	sub		$16, AADLEN
-+	jl		.Laad_loop_1x_done\@
-+.Laad_loop_1x\@:
-+	movdqu		(AAD), %xmm0
-+	pshufb		BSWAP_MASK, %xmm0
-+	pxor		%xmm0, GHASH_ACC
-+	_ghash_mul	H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm0, %xmm1
-+	add		$16, AAD
-+	sub		$16, AADLEN
-+	jge		.Laad_loop_1x\@
-+.Laad_loop_1x_done\@:
-+	// Check whether there is a partial block at the end.
-+	add		$16, AADLEN
-+	jz		.Laad_done\@
-+
-+	// Process a partial block of length 1 <= AADLEN <= 15.
-+	// _load_partial_block assumes that %ecx contains AADLEN.
-+	_load_partial_block	AAD, %xmm0, %r10, %r10d
-+	pshufb		BSWAP_MASK, %xmm0
-+	pxor		%xmm0, GHASH_ACC
-+	_ghash_mul	H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm0, %xmm1
-+
-+.Laad_done\@:
-+	movdqu		GHASH_ACC, (GHASH_ACC_PTR)
-+	RET
-+.endm
-+
-+// Increment LE_CTR eight times to generate eight little-endian counter blocks,
-+// swap each to big-endian, and store them in AESDATA[0-7].  Also XOR them with
-+// the zero-th AES round key.  Clobbers TMP0 and TMP1.
-+.macro	_ctr_begin_8x
-+	movq		.Lone(%rip), TMP0
-+	movdqa		(KEY), TMP1		// zero-th round key
-+.irp i, 0,1,2,3,4,5,6,7
-+	_vpshufb	BSWAP_MASK, LE_CTR, AESDATA\i
-+	pxor		TMP1, AESDATA\i
-+	paddd		TMP0, LE_CTR
-+.endr
-+.endm
-+
-+// Do a non-last round of AES on AESDATA[0-7] using \round_key.
-+.macro	_aesenc_8x	round_key
-+.irp i, 0,1,2,3,4,5,6,7
-+	aesenc		\round_key, AESDATA\i
-+.endr
-+.endm
-+
-+// Do the last round of AES on AESDATA[0-7] using \round_key.
-+.macro	_aesenclast_8x	round_key
-+.irp i, 0,1,2,3,4,5,6,7
-+	aesenclast	\round_key, AESDATA\i
-+.endr
-+.endm
-+
-+// XOR eight blocks from SRC with the keystream blocks in AESDATA[0-7], and
-+// store the result to DST.  Clobbers TMP0.
-+.macro	_xor_data_8x
-+.irp i, 0,1,2,3,4,5,6,7
-+	_xor_mem_to_reg	\i*16(SRC), AESDATA\i, tmp=TMP0
-+.endr
-+.irp i, 0,1,2,3,4,5,6,7
-+	movdqu		AESDATA\i, \i*16(DST)
-+.endr
-+.endm
-+
-+// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_aesni *key,
-+//					  const u32 le_ctr[4], u8 ghash_acc[16],
-+//					  const u8 *src, u8 *dst, int datalen);
-+//
-+// This macro generates a GCM encryption or decryption update function with the
-+// above prototype (with \enc selecting which one).
-+//
-+// This function computes the next portion of the CTR keystream, XOR's it with
-+// |datalen| bytes from |src|, and writes the resulting encrypted or decrypted
-+// data to |dst|.  It also updates the GHASH accumulator |ghash_acc| using the
-+// next |datalen| ciphertext bytes.
-+//
-+// |datalen| must be a multiple of 16, except on the last call where it can be
-+// any length.  The caller must do any buffering needed to ensure this.  Both
-+// in-place and out-of-place en/decryption are supported.
-+//
-+// |le_ctr| must give the current counter in little-endian format.  For a new
-+// message, the low word of the counter must be 2.  This function loads the
-+// counter from |le_ctr| and increments the loaded counter as needed, but it
-+// does *not* store the updated counter back to |le_ctr|.  The caller must
-+// update |le_ctr| if any more data segments follow.  Internally, only the low
-+// 32-bit word of the counter is incremented, following the GCM standard.
-+.macro	_aes_gcm_update	enc
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	LE_CTR_PTR,	%rsi	// Note: overlaps with usage as temp reg
-+	.set	GHASH_ACC_PTR,	%rdx
-+	.set	SRC,		%rcx
-+	.set	DST,		%r8
-+	.set	DATALEN,	%r9d
-+	.set	DATALEN64,	%r9	// Zero-extend DATALEN before using!
-+	// Note: the code setting up for _load_partial_block assumes that SRC is
-+	// in %rcx (and that DATALEN is *not* in %rcx).
-+
-+	// Additional local variables
-+
-+	// %rax and %rsi are used as temporary registers.  Note: %rsi overlaps
-+	// with LE_CTR_PTR, which is used only at the beginning.
-+
-+	.set	AESKEYLEN,	%r10d	// AES key length in bytes
-+	.set	AESKEYLEN64,	%r10
-+	.set	RNDKEYLAST_PTR,	%r11	// Pointer to last AES round key
-+
-+	// Put the most frequently used values in %xmm0-%xmm7 to reduce code
-+	// size.  (%xmm0-%xmm7 take fewer bytes to encode than %xmm8-%xmm15.)
-+	.set	TMP0,		%xmm0
-+	.set	TMP1,		%xmm1
-+	.set	TMP2,		%xmm2
-+	.set	LO,		%xmm3	// Low part of unreduced product
-+	.set	MI,		%xmm4	// Middle part of unreduced product
-+	.set	GHASH_ACC,	%xmm5	// GHASH accumulator; in main loop also
-+					// the high part of unreduced product
-+	.set	BSWAP_MASK,	%xmm6	// Shuffle mask for reflecting bytes
-+	.set	LE_CTR,		%xmm7	// Little-endian counter value
-+	.set	AESDATA0,	%xmm8
-+	.set	AESDATA1,	%xmm9
-+	.set	AESDATA2,	%xmm10
-+	.set	AESDATA3,	%xmm11
-+	.set	AESDATA4,	%xmm12
-+	.set	AESDATA5,	%xmm13
-+	.set	AESDATA6,	%xmm14
-+	.set	AESDATA7,	%xmm15
-+
-+	movdqa		.Lbswap_mask(%rip), BSWAP_MASK
-+	movdqu		(GHASH_ACC_PTR), GHASH_ACC
-+	movdqu		(LE_CTR_PTR), LE_CTR
-+
-+	movl		OFFSETOF_AESKEYLEN(KEY), AESKEYLEN
-+	lea		6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
-+
-+	// If there are at least 8*16 bytes of data, then continue into the main
-+	// loop, which processes 8*16 bytes of data per iteration.
-+	//
-+	// The main loop interleaves AES and GHASH to improve performance on
-+	// CPUs that can execute these instructions in parallel.  When
-+	// decrypting, the GHASH input (the ciphertext) is immediately
-+	// available.  When encrypting, we instead encrypt a set of 8 blocks
-+	// first and then GHASH those blocks while encrypting the next set of 8,
-+	// repeat that as needed, and finally GHASH the last set of 8 blocks.
-+	//
-+	// Code size optimization: Prefer adding or subtracting -8*16 over 8*16,
-+	// as this makes the immediate fit in a signed byte, saving 3 bytes.
-+	add		$-8*16, DATALEN
-+	jl		.Lcrypt_loop_8x_done\@
-+.if \enc
-+	// Encrypt the first 8 plaintext blocks.
-+	_ctr_begin_8x
-+	lea		16(KEY), %rsi
-+	.p2align 4
-+1:
-+	movdqa		(%rsi), TMP0
-+	_aesenc_8x	TMP0
-+	add		$16, %rsi
-+	cmp		%rsi, RNDKEYLAST_PTR
-+	jne		1b
-+	movdqa		(%rsi), TMP0
-+	_aesenclast_8x	TMP0
-+	_xor_data_8x
-+	// Don't increment DST until the ciphertext blocks have been hashed.
-+	sub		$-8*16, SRC
-+	add		$-8*16, DATALEN
-+	jl		.Lghash_last_ciphertext_8x\@
-+.endif
-+
-+	.p2align 4
-+.Lcrypt_loop_8x\@:
-+
-+	// Generate the next set of 8 counter blocks and start encrypting them.
-+	_ctr_begin_8x
-+	lea		16(KEY), %rsi
-+
-+	// Do a round of AES, and start the GHASH update of 8 ciphertext blocks
-+	// by doing the unreduced multiplication for the first ciphertext block.
-+	movdqa		(%rsi), TMP0
-+	add		$16, %rsi
-+	_aesenc_8x	TMP0
-+	_ghash_update_begin_8x \enc
-+
-+	// Do 7 more rounds of AES, and continue the GHASH update by doing the
-+	// unreduced multiplication for the remaining ciphertext blocks.
-+	.p2align 4
-+1:
-+	movdqa		(%rsi), TMP0
-+	add		$16, %rsi
-+	_aesenc_8x	TMP0
-+	_ghash_update_continue_8x \enc
-+	cmp		$7*8, %eax
-+	jne		1b
-+
-+	// Do the remaining AES rounds.
-+	.p2align 4
-+1:
-+	movdqa		(%rsi), TMP0
-+	add		$16, %rsi
-+	_aesenc_8x	TMP0
-+	cmp		%rsi, RNDKEYLAST_PTR
-+	jne		1b
-+
-+	// Do the GHASH reduction and the last round of AES.
-+	movdqa		(RNDKEYLAST_PTR), TMP0
-+	_ghash_update_end_8x_step	0
-+	_aesenclast_8x	TMP0
-+	_ghash_update_end_8x_step	1
-+
-+	// XOR the data with the AES-CTR keystream blocks.
-+.if \enc
-+	sub		$-8*16, DST
-+.endif
-+	_xor_data_8x
-+	sub		$-8*16, SRC
-+.if !\enc
-+	sub		$-8*16, DST
-+.endif
-+	add		$-8*16, DATALEN
-+	jge		.Lcrypt_loop_8x\@
-+
-+.if \enc
-+.Lghash_last_ciphertext_8x\@:
-+	// Update GHASH with the last set of 8 ciphertext blocks.
-+	_ghash_update_begin_8x		\enc
-+	.p2align 4
-+1:
-+	_ghash_update_continue_8x	\enc
-+	cmp		$7*8, %eax
-+	jne		1b
-+	_ghash_update_end_8x_step	0
-+	_ghash_update_end_8x_step	1
-+	sub		$-8*16, DST
-+.endif
-+
-+.Lcrypt_loop_8x_done\@:
-+
-+	sub		$-8*16, DATALEN
-+	jz		.Ldone\@
-+
-+	// Handle the remainder of length 1 <= DATALEN < 8*16 bytes.  We keep
-+	// things simple and keep the code size down by just going one block at
-+	// a time, again taking advantage of hardware loop unrolling.  Since
-+	// there are enough key powers available for all remaining data, we do
-+	// the GHASH multiplications unreduced, and only reduce at the very end.
-+
-+	.set	HI,		TMP2
-+	.set	H_POW,		AESDATA0
-+	.set	H_POW_XORED,	AESDATA1
-+	.set	ONE,		AESDATA2
-+
-+	movq		.Lone(%rip), ONE
-+
-+	// Start collecting the unreduced GHASH intermediate value LO, MI, HI.
-+	pxor		LO, LO
-+	pxor		MI, MI
-+	pxor		HI, HI
-+
-+	// Set up a block counter %rax to contain 8*(8-n), where n is the number
-+	// of blocks that remain, counting any partial block.  This will be used
-+	// to access the key powers H^n through H^1.
-+	mov		DATALEN, %eax
-+	neg		%eax
-+	and		$~15, %eax
-+	sar		$1, %eax
-+	add		$64, %eax
-+
-+	sub		$16, DATALEN
-+	jl		.Lcrypt_loop_1x_done\@
-+
-+	// Process the data one full block at a time.
-+.Lcrypt_loop_1x\@:
-+
-+	// Encrypt the next counter block.
-+	_vpshufb	BSWAP_MASK, LE_CTR, TMP0
-+	paddd		ONE, LE_CTR
-+	pxor		(KEY), TMP0
-+	lea		-6*16(RNDKEYLAST_PTR), %rsi	// Reduce code size
-+	cmp		$24, AESKEYLEN
-+	jl		128f	// AES-128?
-+	je		192f	// AES-192?
-+	// AES-256
-+	aesenc		-7*16(%rsi), TMP0
-+	aesenc		-6*16(%rsi), TMP0
-+192:
-+	aesenc		-5*16(%rsi), TMP0
-+	aesenc		-4*16(%rsi), TMP0
-+128:
-+.irp i, -3,-2,-1,0,1,2,3,4,5
-+	aesenc		\i*16(%rsi), TMP0
-+.endr
-+	aesenclast	(RNDKEYLAST_PTR), TMP0
-+
-+	// Load the next key power H^i.
-+	movdqa		OFFSETOF_H_POWERS(KEY,%rax,2), H_POW
-+	movq		OFFSETOF_H_POWERS_XORED(KEY,%rax), H_POW_XORED
-+
-+	// XOR the keystream block that was just generated in TMP0 with the next
-+	// source data block and store the resulting en/decrypted data to DST.
-+.if \enc
-+	_xor_mem_to_reg	(SRC), TMP0, tmp=TMP1
-+	movdqu		TMP0, (DST)
-+.else
-+	movdqu		(SRC), TMP1
-+	pxor		TMP1, TMP0
-+	movdqu		TMP0, (DST)
-+.endif
-+
-+	// Update GHASH with the ciphertext block.
-+.if \enc
-+	pshufb		BSWAP_MASK, TMP0
-+	pxor		TMP0, GHASH_ACC
-+.else
-+	pshufb		BSWAP_MASK, TMP1
-+	pxor		TMP1, GHASH_ACC
-+.endif
-+	_ghash_mul_noreduce	H_POW, H_POW_XORED, GHASH_ACC, LO, MI, HI, TMP0
-+	pxor		GHASH_ACC, GHASH_ACC
-+
-+	add		$8, %eax
-+	add		$16, SRC
-+	add		$16, DST
-+	sub		$16, DATALEN
-+	jge		.Lcrypt_loop_1x\@
-+.Lcrypt_loop_1x_done\@:
-+	// Check whether there is a partial block at the end.
-+	add		$16, DATALEN
-+	jz		.Lghash_reduce\@
-+
-+	// Process a partial block of length 1 <= DATALEN <= 15.
-+
-+	// Encrypt a counter block for the last time.
-+	pshufb		BSWAP_MASK, LE_CTR
-+	pxor		(KEY), LE_CTR
-+	lea		16(KEY), %rsi
-+1:
-+	aesenc		(%rsi), LE_CTR
-+	add		$16, %rsi
-+	cmp		%rsi, RNDKEYLAST_PTR
-+	jne		1b
-+	aesenclast	(RNDKEYLAST_PTR), LE_CTR
-+
-+	// Load the lowest key power, H^1.
-+	movdqa		OFFSETOF_H_POWERS(KEY,%rax,2), H_POW
-+	movq		OFFSETOF_H_POWERS_XORED(KEY,%rax), H_POW_XORED
-+
-+	// Load and zero-pad 1 <= DATALEN <= 15 bytes of data from SRC.  SRC is
-+	// in %rcx, but _load_partial_block needs DATALEN in %rcx instead.
-+	// RNDKEYLAST_PTR is no longer needed, so reuse it for SRC.
-+	mov		SRC, RNDKEYLAST_PTR
-+	mov		DATALEN, %ecx
-+	_load_partial_block	RNDKEYLAST_PTR, TMP0, %rsi, %esi
-+
-+	// XOR the keystream block that was just generated in LE_CTR with the
-+	// source data block and store the resulting en/decrypted data to DST.
-+	pxor		TMP0, LE_CTR
-+	mov		DATALEN, %ecx
-+	_store_partial_block	LE_CTR, DST
-+
-+	// If encrypting, zero-pad the final ciphertext block for GHASH.  (If
-+	// decrypting, this was already done by _load_partial_block.)
-+.if \enc
-+	lea		.Lzeropad_mask+16(%rip), %rax
-+	sub		DATALEN64, %rax
-+	_vpand		(%rax), LE_CTR, TMP0
-+.endif
-+
-+	// Update GHASH with the final ciphertext block.
-+	pshufb		BSWAP_MASK, TMP0
-+	pxor		TMP0, GHASH_ACC
-+	_ghash_mul_noreduce	H_POW, H_POW_XORED, GHASH_ACC, LO, MI, HI, TMP0
-+
-+.Lghash_reduce\@:
-+	// Finally, do the GHASH reduction.
-+	_ghash_reduce	LO, MI, HI, GHASH_ACC, TMP0
-+
-+.Ldone\@:
-+	// Store the updated GHASH accumulator back to memory.
-+	movdqu		GHASH_ACC, (GHASH_ACC_PTR)
-+
-+	RET
-+.endm
-+
-+// void aes_gcm_enc_final_##suffix(const struct aes_gcm_key_aesni *key,
-+//				   const u32 le_ctr[4], u8 ghash_acc[16],
-+//				   u64 total_aadlen, u64 total_datalen);
-+// bool aes_gcm_dec_final_##suffix(const struct aes_gcm_key_aesni *key,
-+//				   const u32 le_ctr[4], const u8 ghash_acc[16],
-+//				   u64 total_aadlen, u64 total_datalen,
-+//				   const u8 tag[16], int taglen);
-+//
-+// This macro generates one of the above two functions (with \enc selecting
-+// which one).  Both functions finish computing the GCM authentication tag by
-+// updating GHASH with the lengths block and encrypting the GHASH accumulator.
-+// |total_aadlen| and |total_datalen| must be the total length of the additional
-+// authenticated data and the en/decrypted data in bytes, respectively.
-+//
-+// The encryption function then stores the full-length (16-byte) computed
-+// authentication tag to |ghash_acc|.  The decryption function instead loads the
-+// expected authentication tag (the one that was transmitted) from the 16-byte
-+// buffer |tag|, compares the first 4 <= |taglen| <= 16 bytes of it to the
-+// computed tag in constant time, and returns true if and only if they match.
-+.macro	_aes_gcm_final	enc
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	LE_CTR_PTR,	%rsi
-+	.set	GHASH_ACC_PTR,	%rdx
-+	.set	TOTAL_AADLEN,	%rcx
-+	.set	TOTAL_DATALEN,	%r8
-+	.set	TAG,		%r9
-+	.set	TAGLEN,		%r10d	// Originally at 8(%rsp)
-+	.set	TAGLEN64,	%r10
-+
-+	// Additional local variables.
-+	// %rax and %xmm0-%xmm2 are used as temporary registers.
-+	.set	AESKEYLEN,	%r11d
-+	.set	AESKEYLEN64,	%r11
-+	.set	BSWAP_MASK,	%xmm3
-+	.set	GHASH_ACC,	%xmm4
-+	.set	H_POW1,		%xmm5	// H^1
-+	.set	H_POW1_X64,	%xmm6	// H^1 * x^64
-+	.set	GFPOLY,		%xmm7
-+
-+	movdqa		.Lbswap_mask(%rip), BSWAP_MASK
-+	movl		OFFSETOF_AESKEYLEN(KEY), AESKEYLEN
-+
-+	// Set up a counter block with 1 in the low 32-bit word.  This is the
-+	// counter that produces the ciphertext needed to encrypt the auth tag.
-+	movdqu		(LE_CTR_PTR), %xmm0
-+	mov		$1, %eax
-+	pinsrd		$0, %eax, %xmm0
-+
-+	// Build the lengths block and XOR it into the GHASH accumulator.
-+	movq		TOTAL_DATALEN, GHASH_ACC
-+	pinsrq		$1, TOTAL_AADLEN, GHASH_ACC
-+	psllq		$3, GHASH_ACC	// Bytes to bits
-+	_xor_mem_to_reg	(GHASH_ACC_PTR), GHASH_ACC, %xmm1
-+
-+	movdqa		OFFSETOF_H_POWERS+7*16(KEY), H_POW1
-+	movdqa		OFFSETOF_H_TIMES_X64(KEY), H_POW1_X64
-+	movq		.Lgfpoly(%rip), GFPOLY
-+
-+	// Make %rax point to the 6th from last AES round key.  (Using signed
-+	// byte offsets -7*16 through 6*16 decreases code size.)
-+	lea		(KEY,AESKEYLEN64,4), %rax
-+
-+	// AES-encrypt the counter block and also multiply GHASH_ACC by H^1.
-+	// Interleave the AES and GHASH instructions to improve performance.
-+	pshufb		BSWAP_MASK, %xmm0
-+	pxor		(KEY), %xmm0
-+	cmp		$24, AESKEYLEN
-+	jl		128f	// AES-128?
-+	je		192f	// AES-192?
-+	// AES-256
-+	aesenc		-7*16(%rax), %xmm0
-+	aesenc		-6*16(%rax), %xmm0
-+192:
-+	aesenc		-5*16(%rax), %xmm0
-+	aesenc		-4*16(%rax), %xmm0
-+128:
-+.irp i, 0,1,2,3,4,5,6,7,8
-+	aesenc		(\i-3)*16(%rax), %xmm0
-+	_ghash_mul_step	\i, H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm1, %xmm2
-+.endr
-+	aesenclast	6*16(%rax), %xmm0
-+	_ghash_mul_step	9, H_POW1, H_POW1_X64, GHASH_ACC, GFPOLY, %xmm1, %xmm2
-+
-+	// Undo the byte reflection of the GHASH accumulator.
-+	pshufb		BSWAP_MASK, GHASH_ACC
-+
-+	// Encrypt the GHASH accumulator.
-+	pxor		%xmm0, GHASH_ACC
-+
-+.if \enc
-+	// Return the computed auth tag.
-+	movdqu		GHASH_ACC, (GHASH_ACC_PTR)
-+.else
-+	.set		ZEROPAD_MASK_PTR, TOTAL_AADLEN // Reusing TOTAL_AADLEN!
-+
-+	// Verify the auth tag in constant time by XOR'ing the transmitted and
-+	// computed auth tags together and using the ptest instruction to check
-+	// whether the first TAGLEN bytes of the result are zero.
-+	_xor_mem_to_reg	(TAG), GHASH_ACC, tmp=%xmm0
-+	movl		8(%rsp), TAGLEN
-+	lea		.Lzeropad_mask+16(%rip), ZEROPAD_MASK_PTR
-+	sub		TAGLEN64, ZEROPAD_MASK_PTR
-+	xor		%eax, %eax
-+	_test_mem	(ZEROPAD_MASK_PTR), GHASH_ACC, tmp=%xmm0
-+	sete		%al
-+.endif
-+	RET
-+.endm
-+
-+.set	USE_AVX, 0
-+SYM_FUNC_START(aes_gcm_precompute_aesni)
-+	_aes_gcm_precompute
-+SYM_FUNC_END(aes_gcm_precompute_aesni)
-+SYM_FUNC_START(aes_gcm_aad_update_aesni)
-+	_aes_gcm_aad_update
-+SYM_FUNC_END(aes_gcm_aad_update_aesni)
-+SYM_FUNC_START(aes_gcm_enc_update_aesni)
-+	_aes_gcm_update	1
-+SYM_FUNC_END(aes_gcm_enc_update_aesni)
-+SYM_FUNC_START(aes_gcm_dec_update_aesni)
-+	_aes_gcm_update	0
-+SYM_FUNC_END(aes_gcm_dec_update_aesni)
-+SYM_FUNC_START(aes_gcm_enc_final_aesni)
-+	_aes_gcm_final	1
-+SYM_FUNC_END(aes_gcm_enc_final_aesni)
-+SYM_FUNC_START(aes_gcm_dec_final_aesni)
-+	_aes_gcm_final	0
-+SYM_FUNC_END(aes_gcm_dec_final_aesni)
-+
-+.set	USE_AVX, 1
-+SYM_FUNC_START(aes_gcm_precompute_aesni_avx)
-+	_aes_gcm_precompute
-+SYM_FUNC_END(aes_gcm_precompute_aesni_avx)
-+SYM_FUNC_START(aes_gcm_aad_update_aesni_avx)
-+	_aes_gcm_aad_update
-+SYM_FUNC_END(aes_gcm_aad_update_aesni_avx)
-+SYM_FUNC_START(aes_gcm_enc_update_aesni_avx)
-+	_aes_gcm_update	1
-+SYM_FUNC_END(aes_gcm_enc_update_aesni_avx)
-+SYM_FUNC_START(aes_gcm_dec_update_aesni_avx)
-+	_aes_gcm_update	0
-+SYM_FUNC_END(aes_gcm_dec_update_aesni_avx)
-+SYM_FUNC_START(aes_gcm_enc_final_aesni_avx)
-+	_aes_gcm_final	1
-+SYM_FUNC_END(aes_gcm_enc_final_aesni_avx)
-+SYM_FUNC_START(aes_gcm_dec_final_aesni_avx)
-+	_aes_gcm_final	0
-+SYM_FUNC_END(aes_gcm_dec_final_aesni_avx)
-diff --git a/arch/x86/crypto/aes-gcm-avx10-x86_64.S b/arch/x86/crypto/aes-gcm-avx10-x86_64.S
-new file mode 100644
-index 000000000000..97e0ee515fc5
---- /dev/null
-+++ b/arch/x86/crypto/aes-gcm-avx10-x86_64.S
-@@ -0,0 +1,1222 @@
-+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-+//
-+// VAES and VPCLMULQDQ optimized AES-GCM for x86_64
-+//
-+// Copyright 2024 Google LLC
-+//
-+// Author: Eric Biggers <ebiggers@google.com>
-+//
-+//------------------------------------------------------------------------------
-+//
-+// This file is dual-licensed, meaning that you can use it under your choice of
-+// either of the following two licenses:
-+//
-+// Licensed under the Apache License 2.0 (the "License").  You may obtain a copy
-+// of the License at
-+//
-+//	http://www.apache.org/licenses/LICENSE-2.0
-+//
-+// Unless required by applicable law or agreed to in writing, software
-+// distributed under the License is distributed on an "AS IS" BASIS,
-+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+// See the License for the specific language governing permissions and
-+// limitations under the License.
-+//
-+// or
-+//
-+// Redistribution and use in source and binary forms, with or without
-+// modification, are permitted provided that the following conditions are met:
-+//
-+// 1. Redistributions of source code must retain the above copyright notice,
-+//    this list of conditions and the following disclaimer.
-+//
-+// 2. Redistributions in binary form must reproduce the above copyright
-+//    notice, this list of conditions and the following disclaimer in the
-+//    documentation and/or other materials provided with the distribution.
-+//
-+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-+// POSSIBILITY OF SUCH DAMAGE.
-+//
-+//------------------------------------------------------------------------------
-+//
-+// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that
-+// support VAES (vector AES), VPCLMULQDQ (vector carryless multiplication), and
-+// either AVX512 or AVX10.  Some of the functions, notably the encryption and
-+// decryption update functions which are the most performance-critical, are
-+// provided in two variants generated from a macro: one using 256-bit vectors
-+// (suffix: vaes_avx10_256) and one using 512-bit vectors (vaes_avx10_512).  The
-+// other, "shared" functions (vaes_avx10) use at most 256-bit vectors.
-+//
-+// The functions that use 512-bit vectors are intended for CPUs that support
-+// 512-bit vectors *and* where using them doesn't cause significant
-+// downclocking.  They require the following CPU features:
-+//
-+//	VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/512)
-+//
-+// The other functions require the following CPU features:
-+//
-+//	VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/256)
-+//
-+// All functions use the "System V" ABI.  The Windows ABI is not supported.
-+//
-+// Note that we use "avx10" in the names of the functions as a shorthand to
-+// really mean "AVX10 or a certain set of AVX512 features".  Due to Intel's
-+// introduction of AVX512 and then its replacement by AVX10, there doesn't seem
-+// to be a simple way to name things that makes sense on all CPUs.
-+//
-+// Note that the macros that support both 256-bit and 512-bit vectors could
-+// fairly easily be changed to support 128-bit too.  However, this would *not*
-+// be sufficient to allow the code to run on CPUs without AVX512 or AVX10,
-+// because the code heavily uses several features of these extensions other than
-+// the vector length: the increase in the number of SIMD registers from 16 to
-+// 32, masking support, and new instructions such as vpternlogd (which can do a
-+// three-argument XOR).  These features are very useful for AES-GCM.
-+
-+#include <linux/linkage.h>
-+
-+.section .rodata
-+.p2align 6
-+
-+	// A shuffle mask that reflects the bytes of 16-byte blocks
-+.Lbswap_mask:
-+	.octa   0x000102030405060708090a0b0c0d0e0f
-+
-+	// This is the GHASH reducing polynomial without its constant term, i.e.
-+	// x^128 + x^7 + x^2 + x, represented using the backwards mapping
-+	// between bits and polynomial coefficients.
-+	//
-+	// Alternatively, it can be interpreted as the naturally-ordered
-+	// representation of the polynomial x^127 + x^126 + x^121 + 1, i.e. the
-+	// "reversed" GHASH reducing polynomial without its x^128 term.
-+.Lgfpoly:
-+	.octa	0xc2000000000000000000000000000001
-+
-+	// Same as above, but with the (1 << 64) bit set.
-+.Lgfpoly_and_internal_carrybit:
-+	.octa	0xc2000000000000010000000000000001
-+
-+	// The below constants are used for incrementing the counter blocks.
-+	// ctr_pattern points to the four 128-bit values [0, 1, 2, 3].
-+	// inc_2blocks and inc_4blocks point to the single 128-bit values 2 and
-+	// 4.  Note that the same '2' is reused in ctr_pattern and inc_2blocks.
-+.Lctr_pattern:
-+	.octa	0
-+	.octa	1
-+.Linc_2blocks:
-+	.octa	2
-+	.octa	3
-+.Linc_4blocks:
-+	.octa	4
-+
-+// Number of powers of the hash key stored in the key struct.  The powers are
-+// stored from highest (H^NUM_H_POWERS) to lowest (H^1).
-+#define NUM_H_POWERS		16
-+
-+// Offset to AES key length (in bytes) in the key struct
-+#define OFFSETOF_AESKEYLEN	480
-+
-+// Offset to start of hash key powers array in the key struct
-+#define OFFSETOF_H_POWERS	512
-+
-+// Offset to end of hash key powers array in the key struct.
-+//
-+// This is immediately followed by three zeroized padding blocks, which are
-+// included so that partial vectors can be handled more easily.  E.g. if VL=64
-+// and two blocks remain, we load the 4 values [H^2, H^1, 0, 0].  The most
-+// padding blocks needed is 3, which occurs if [H^1, 0, 0, 0] is loaded.
-+#define OFFSETOFEND_H_POWERS	(OFFSETOF_H_POWERS + (NUM_H_POWERS * 16))
-+
-+.text
-+
-+// Set the vector length in bytes.  This sets the VL variable and defines
-+// register aliases V0-V31 that map to the ymm or zmm registers.
-+.macro	_set_veclen	vl
-+	.set	VL,	\vl
-+.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
-+	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
-+.if VL == 32
-+	.set	V\i,	%ymm\i
-+.elseif VL == 64
-+	.set	V\i,	%zmm\i
-+.else
-+	.error "Unsupported vector length"
-+.endif
-+.endr
-+.endm
-+
-+// The _ghash_mul_step macro does one step of GHASH multiplication of the
-+// 128-bit lanes of \a by the corresponding 128-bit lanes of \b and storing the
-+// reduced products in \dst.  \t0, \t1, and \t2 are temporary registers of the
-+// same size as \a and \b.  To complete all steps, this must invoked with \i=0
-+// through \i=9.  The division into steps allows users of this macro to
-+// optionally interleave the computation with other instructions.  Users of this
-+// macro must preserve the parameter registers across steps.
-+//
-+// The multiplications are done in GHASH's representation of the finite field
-+// GF(2^128).  Elements of GF(2^128) are represented as binary polynomials
-+// (i.e. polynomials whose coefficients are bits) modulo a reducing polynomial
-+// G.  The GCM specification uses G = x^128 + x^7 + x^2 + x + 1.  Addition is
-+// just XOR, while multiplication is more complex and has two parts: (a) do
-+// carryless multiplication of two 128-bit input polynomials to get a 256-bit
-+// intermediate product polynomial, and (b) reduce the intermediate product to
-+// 128 bits by adding multiples of G that cancel out terms in it.  (Adding
-+// multiples of G doesn't change which field element the polynomial represents.)
-+//
-+// Unfortunately, the GCM specification maps bits to/from polynomial
-+// coefficients backwards from the natural order.  In each byte it specifies the
-+// highest bit to be the lowest order polynomial coefficient, *not* the highest!
-+// This makes it nontrivial to work with the GHASH polynomials.  We could
-+// reflect the bits, but x86 doesn't have an instruction that does that.
-+//
-+// Instead, we operate on the values without bit-reflecting them.  This *mostly*
-+// just works, since XOR and carryless multiplication are symmetric with respect
-+// to bit order, but it has some consequences.  First, due to GHASH's byte
-+// order, by skipping bit reflection, *byte* reflection becomes necessary to
-+// give the polynomial terms a consistent order.  E.g., considering an N-bit
-+// value interpreted using the G = x^128 + x^7 + x^2 + x + 1 convention, bits 0
-+// through N-1 of the byte-reflected value represent the coefficients of x^(N-1)
-+// through x^0, whereas bits 0 through N-1 of the non-byte-reflected value
-+// represent x^7...x^0, x^15...x^8, ..., x^(N-1)...x^(N-8) which can't be worked
-+// with.  Fortunately, x86's vpshufb instruction can do byte reflection.
-+//
-+// Second, forgoing the bit reflection causes an extra multiple of x (still
-+// using the G = x^128 + x^7 + x^2 + x + 1 convention) to be introduced by each
-+// multiplication.  This is because an M-bit by N-bit carryless multiplication
-+// really produces a (M+N-1)-bit product, but in practice it's zero-extended to
-+// M+N bits.  In the G = x^128 + x^7 + x^2 + x + 1 convention, which maps bits
-+// to polynomial coefficients backwards, this zero-extension actually changes
-+// the product by introducing an extra factor of x.  Therefore, users of this
-+// macro must ensure that one of the inputs has an extra factor of x^-1, i.e.
-+// the multiplicative inverse of x, to cancel out the extra x.
-+//
-+// Third, the backwards coefficients convention is just confusing to work with,
-+// since it makes "low" and "high" in the polynomial math mean the opposite of
-+// their normal meaning in computer programming.  This can be solved by using an
-+// alternative interpretation: the polynomial coefficients are understood to be
-+// in the natural order, and the multiplication is actually \a * \b * x^-128 mod
-+// x^128 + x^127 + x^126 + x^121 + 1.  This doesn't change the inputs, outputs,
-+// or the implementation at all; it just changes the mathematical interpretation
-+// of what each instruction is doing.  Starting from here, we'll use this
-+// alternative interpretation, as it's easier to understand the code that way.
-+//
-+// Moving onto the implementation, the vpclmulqdq instruction does 64 x 64 =>
-+// 128-bit carryless multiplication, so we break the 128 x 128 multiplication
-+// into parts as follows (the _L and _H suffixes denote low and high 64 bits):
-+//
-+//     LO = a_L * b_L
-+//     MI = (a_L * b_H) + (a_H * b_L)
-+//     HI = a_H * b_H
-+//
-+// The 256-bit product is x^128*HI + x^64*MI + LO.  LO, MI, and HI are 128-bit.
-+// Note that MI "overlaps" with LO and HI.  We don't consolidate MI into LO and
-+// HI right away, since the way the reduction works makes that unnecessary.
-+//
-+// For the reduction, we cancel out the low 128 bits by adding multiples of G =
-+// x^128 + x^127 + x^126 + x^121 + 1.  This is done by two iterations, each of
-+// which cancels out the next lowest 64 bits.  Consider a value x^64*A + B,
-+// where A and B are 128-bit.  Adding B_L*G to that value gives:
-+//
-+//       x^64*A + B + B_L*G
-+//     = x^64*A + x^64*B_H + B_L + B_L*(x^128 + x^127 + x^126 + x^121 + 1)
-+//     = x^64*A + x^64*B_H + B_L + x^128*B_L + x^64*B_L*(x^63 + x^62 + x^57) + B_L
-+//     = x^64*A + x^64*B_H + x^128*B_L + x^64*B_L*(x^63 + x^62 + x^57) + B_L + B_L
-+//     = x^64*(A + B_H + x^64*B_L + B_L*(x^63 + x^62 + x^57))
-+//
-+// So: if we sum A, B with its halves swapped, and the low half of B times x^63
-+// + x^62 + x^57, we get a 128-bit value C where x^64*C is congruent to the
-+// original value x^64*A + B.  I.e., the low 64 bits got canceled out.
-+//
-+// We just need to apply this twice: first to fold LO into MI, and second to
-+// fold the updated MI into HI.
-+//
-+// The needed three-argument XORs are done using the vpternlogd instruction with
-+// immediate 0x96, since this is faster than two vpxord instructions.
-+//
-+// A potential optimization, assuming that b is fixed per-key (if a is fixed
-+// per-key it would work the other way around), is to use one iteration of the
-+// reduction described above to precompute a value c such that x^64*c = b mod G,
-+// and then multiply a_L by c (and implicitly by x^64) instead of by b:
-+//
-+//     MI = (a_L * c_L) + (a_H * b_L)
-+//     HI = (a_L * c_H) + (a_H * b_H)
-+//
-+// This would eliminate the LO part of the intermediate product, which would
-+// eliminate the need to fold LO into MI.  This would save two instructions,
-+// including a vpclmulqdq.  However, we currently don't use this optimization
-+// because it would require twice as many per-key precomputed values.
-+//
-+// Using Karatsuba multiplication instead of "schoolbook" multiplication
-+// similarly would save a vpclmulqdq but does not seem to be worth it.
-+.macro	_ghash_mul_step	i, a, b, dst, gfpoly, t0, t1, t2
-+.if \i == 0
-+	vpclmulqdq	$0x00, \a, \b, \t0	  // LO = a_L * b_L
-+	vpclmulqdq	$0x01, \a, \b, \t1	  // MI_0 = a_L * b_H
-+.elseif \i == 1
-+	vpclmulqdq	$0x10, \a, \b, \t2	  // MI_1 = a_H * b_L
-+.elseif \i == 2
-+	vpxord		\t2, \t1, \t1		  // MI = MI_0 + MI_1
-+.elseif \i == 3
-+	vpclmulqdq	$0x01, \t0, \gfpoly, \t2  // LO_L*(x^63 + x^62 + x^57)
-+.elseif \i == 4
-+	vpshufd		$0x4e, \t0, \t0		  // Swap halves of LO
-+.elseif \i == 5
-+	vpternlogd	$0x96, \t2, \t0, \t1	  // Fold LO into MI
-+.elseif \i == 6
-+	vpclmulqdq	$0x11, \a, \b, \dst	  // HI = a_H * b_H
-+.elseif \i == 7
-+	vpclmulqdq	$0x01, \t1, \gfpoly, \t0  // MI_L*(x^63 + x^62 + x^57)
-+.elseif \i == 8
-+	vpshufd		$0x4e, \t1, \t1		  // Swap halves of MI
-+.elseif \i == 9
-+	vpternlogd	$0x96, \t0, \t1, \dst	  // Fold MI into HI
-+.endif
-+.endm
-+
-+// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and store
-+// the reduced products in \dst.  See _ghash_mul_step for full explanation.
-+.macro	_ghash_mul	a, b, dst, gfpoly, t0, t1, t2
-+.irp i, 0,1,2,3,4,5,6,7,8,9
-+	_ghash_mul_step	\i, \a, \b, \dst, \gfpoly, \t0, \t1, \t2
-+.endr
-+.endm
-+
-+// GHASH-multiply the 128-bit lanes of \a by the 128-bit lanes of \b and add the
-+// *unreduced* products to \lo, \mi, and \hi.
-+.macro	_ghash_mul_noreduce	a, b, lo, mi, hi, t0, t1, t2, t3
-+	vpclmulqdq	$0x00, \a, \b, \t0	// a_L * b_L
-+	vpclmulqdq	$0x01, \a, \b, \t1	// a_L * b_H
-+	vpclmulqdq	$0x10, \a, \b, \t2	// a_H * b_L
-+	vpclmulqdq	$0x11, \a, \b, \t3	// a_H * b_H
-+	vpxord		\t0, \lo, \lo
-+	vpternlogd	$0x96, \t2, \t1, \mi
-+	vpxord		\t3, \hi, \hi
-+.endm
-+
-+// Reduce the unreduced products from \lo, \mi, and \hi and store the 128-bit
-+// reduced products in \hi.  See _ghash_mul_step for explanation of reduction.
-+.macro	_ghash_reduce	lo, mi, hi, gfpoly, t0
-+	vpclmulqdq	$0x01, \lo, \gfpoly, \t0
-+	vpshufd		$0x4e, \lo, \lo
-+	vpternlogd	$0x96, \t0, \lo, \mi
-+	vpclmulqdq	$0x01, \mi, \gfpoly, \t0
-+	vpshufd		$0x4e, \mi, \mi
-+	vpternlogd	$0x96, \t0, \mi, \hi
-+.endm
-+
-+// void aes_gcm_precompute_##suffix(struct aes_gcm_key_avx10 *key);
-+//
-+// Given the expanded AES key |key->aes_key|, this function derives the GHASH
-+// subkey and initializes |key->ghash_key_powers| with powers of it.
-+//
-+// The number of key powers initialized is NUM_H_POWERS, and they are stored in
-+// the order H^NUM_H_POWERS to H^1.  The zeroized padding blocks after the key
-+// powers themselves are also initialized.
-+//
-+// This macro supports both VL=32 and VL=64.  _set_veclen must have been invoked
-+// with the desired length.  In the VL=32 case, the function computes twice as
-+// many key powers than are actually used by the VL=32 GCM update functions.
-+// This is done to keep the key format the same regardless of vector length.
-+.macro	_aes_gcm_precompute
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+
-+	// Additional local variables.  V0-V2 and %rax are used as temporaries.
-+	.set	POWERS_PTR,	%rsi
-+	.set	RNDKEYLAST_PTR,	%rdx
-+	.set	H_CUR,		V3
-+	.set	H_CUR_YMM,	%ymm3
-+	.set	H_CUR_XMM,	%xmm3
-+	.set	H_INC,		V4
-+	.set	H_INC_YMM,	%ymm4
-+	.set	H_INC_XMM,	%xmm4
-+	.set	GFPOLY,		V5
-+	.set	GFPOLY_YMM,	%ymm5
-+	.set	GFPOLY_XMM,	%xmm5
-+
-+	// Get pointer to lowest set of key powers (located at end of array).
-+	lea		OFFSETOFEND_H_POWERS-VL(KEY), POWERS_PTR
-+
-+	// Encrypt an all-zeroes block to get the raw hash subkey.
-+	movl		OFFSETOF_AESKEYLEN(KEY), %eax
-+	lea		6*16(KEY,%rax,4), RNDKEYLAST_PTR
-+	vmovdqu		(KEY), %xmm0  // Zero-th round key XOR all-zeroes block
-+	add		$16, KEY
-+1:
-+	vaesenc		(KEY), %xmm0, %xmm0
-+	add		$16, KEY
-+	cmp		KEY, RNDKEYLAST_PTR
-+	jne		1b
-+	vaesenclast	(RNDKEYLAST_PTR), %xmm0, %xmm0
-+
-+	// Reflect the bytes of the raw hash subkey.
-+	vpshufb		.Lbswap_mask(%rip), %xmm0, H_CUR_XMM
-+
-+	// Zeroize the padding blocks.
-+	vpxor		%xmm0, %xmm0, %xmm0
-+	vmovdqu		%ymm0, VL(POWERS_PTR)
-+	vmovdqu		%xmm0, VL+2*16(POWERS_PTR)
-+
-+	// Finish preprocessing the first key power, H^1.  Since this GHASH
-+	// implementation operates directly on values with the backwards bit
-+	// order specified by the GCM standard, it's necessary to preprocess the
-+	// raw key as follows.  First, reflect its bytes.  Second, multiply it
-+	// by x^-1 mod x^128 + x^7 + x^2 + x + 1 (if using the backwards
-+	// interpretation of polynomial coefficients), which can also be
-+	// interpreted as multiplication by x mod x^128 + x^127 + x^126 + x^121
-+	// + 1 using the alternative, natural interpretation of polynomial
-+	// coefficients.  For details, see the comment above _ghash_mul_step.
-+	//
-+	// Either way, for the multiplication the concrete operation performed
-+	// is a left shift of the 128-bit value by 1 bit, then an XOR with (0xc2
-+	// << 120) | 1 if a 1 bit was carried out.  However, there's no 128-bit
-+	// wide shift instruction, so instead double each of the two 64-bit
-+	// halves and incorporate the internal carry bit into the value XOR'd.
-+	vpshufd		$0xd3, H_CUR_XMM, %xmm0
-+	vpsrad		$31, %xmm0, %xmm0
-+	vpaddq		H_CUR_XMM, H_CUR_XMM, H_CUR_XMM
-+	vpand		.Lgfpoly_and_internal_carrybit(%rip), %xmm0, %xmm0
-+	vpxor		%xmm0, H_CUR_XMM, H_CUR_XMM
-+
-+	// Load the gfpoly constant.
-+	vbroadcasti32x4	.Lgfpoly(%rip), GFPOLY
-+
-+	// Square H^1 to get H^2.
-+	//
-+	// Note that as with H^1, all higher key powers also need an extra
-+	// factor of x^-1 (or x using the natural interpretation).  Nothing
-+	// special needs to be done to make this happen, though: H^1 * H^1 would
-+	// end up with two factors of x^-1, but the multiplication consumes one.
-+	// So the product H^2 ends up with the desired one factor of x^-1.
-+	_ghash_mul	H_CUR_XMM, H_CUR_XMM, H_INC_XMM, GFPOLY_XMM, \
-+			%xmm0, %xmm1, %xmm2
-+
-+	// Create H_CUR_YMM = [H^2, H^1] and H_INC_YMM = [H^2, H^2].
-+	vinserti128	$1, H_CUR_XMM, H_INC_YMM, H_CUR_YMM
-+	vinserti128	$1, H_INC_XMM, H_INC_YMM, H_INC_YMM
-+
-+.if VL == 64
-+	// Create H_CUR = [H^4, H^3, H^2, H^1] and H_INC = [H^4, H^4, H^4, H^4].
-+	_ghash_mul	H_INC_YMM, H_CUR_YMM, H_INC_YMM, GFPOLY_YMM, \
-+			%ymm0, %ymm1, %ymm2
-+	vinserti64x4	$1, H_CUR_YMM, H_INC, H_CUR
-+	vshufi64x2	$0, H_INC, H_INC, H_INC
-+.endif
-+
-+	// Store the lowest set of key powers.
-+	vmovdqu8	H_CUR, (POWERS_PTR)
-+
-+	// Compute and store the remaining key powers.  With VL=32, repeatedly
-+	// multiply [H^(i+1), H^i] by [H^2, H^2] to get [H^(i+3), H^(i+2)].
-+	// With VL=64, repeatedly multiply [H^(i+3), H^(i+2), H^(i+1), H^i] by
-+	// [H^4, H^4, H^4, H^4] to get [H^(i+7), H^(i+6), H^(i+5), H^(i+4)].
-+	mov		$(NUM_H_POWERS*16/VL) - 1, %eax
-+.Lprecompute_next\@:
-+	sub		$VL, POWERS_PTR
-+	_ghash_mul	H_INC, H_CUR, H_CUR, GFPOLY, V0, V1, V2
-+	vmovdqu8	H_CUR, (POWERS_PTR)
-+	dec		%eax
-+	jnz		.Lprecompute_next\@
-+
-+	vzeroupper	// This is needed after using ymm or zmm registers.
-+	RET
-+.endm
-+
-+// XOR together the 128-bit lanes of \src (whose low lane is \src_xmm) and store
-+// the result in \dst_xmm.  This implicitly zeroizes the other lanes of dst.
-+.macro	_horizontal_xor	src, src_xmm, dst_xmm, t0_xmm, t1_xmm, t2_xmm
-+	vextracti32x4	$1, \src, \t0_xmm
-+.if VL == 32
-+	vpxord		\t0_xmm, \src_xmm, \dst_xmm
-+.elseif VL == 64
-+	vextracti32x4	$2, \src, \t1_xmm
-+	vextracti32x4	$3, \src, \t2_xmm
-+	vpxord		\t0_xmm, \src_xmm, \dst_xmm
-+	vpternlogd	$0x96, \t1_xmm, \t2_xmm, \dst_xmm
-+.else
-+	.error "Unsupported vector length"
-+.endif
-+.endm
-+
-+// Do one step of the GHASH update of the data blocks given in the vector
-+// registers GHASHDATA[0-3].  \i specifies the step to do, 0 through 9.  The
-+// division into steps allows users of this macro to optionally interleave the
-+// computation with other instructions.  This macro uses the vector register
-+// GHASH_ACC as input/output; GHASHDATA[0-3] as inputs that are clobbered;
-+// H_POW[4-1], GFPOLY, and BSWAP_MASK as inputs that aren't clobbered; and
-+// GHASHTMP[0-2] as temporaries.  This macro handles the byte-reflection of the
-+// data blocks.  The parameter registers must be preserved across steps.
-+//
-+// The GHASH update does: GHASH_ACC = H_POW4*(GHASHDATA0 + GHASH_ACC) +
-+// H_POW3*GHASHDATA1 + H_POW2*GHASHDATA2 + H_POW1*GHASHDATA3, where the
-+// operations are vectorized operations on vectors of 16-byte blocks.  E.g.,
-+// with VL=32 there are 2 blocks per vector and the vectorized terms correspond
-+// to the following non-vectorized terms:
-+//
-+//	H_POW4*(GHASHDATA0 + GHASH_ACC) => H^8*(blk0 + GHASH_ACC_XMM) and H^7*(blk1 + 0)
-+//	H_POW3*GHASHDATA1 => H^6*blk2 and H^5*blk3
-+//	H_POW2*GHASHDATA2 => H^4*blk4 and H^3*blk5
-+//	H_POW1*GHASHDATA3 => H^2*blk6 and H^1*blk7
-+//
-+// With VL=64, we use 4 blocks/vector, H^16 through H^1, and blk0 through blk15.
-+//
-+// More concretely, this code does:
-+//   - Do vectorized "schoolbook" multiplications to compute the intermediate
-+//     256-bit product of each block and its corresponding hash key power.
-+//     There are 4*VL/16 of these intermediate products.
-+//   - Sum (XOR) the intermediate 256-bit products across vectors.  This leaves
-+//     VL/16 256-bit intermediate values.
-+//   - Do a vectorized reduction of these 256-bit intermediate values to
-+//     128-bits each.  This leaves VL/16 128-bit intermediate values.
-+//   - Sum (XOR) these values and store the 128-bit result in GHASH_ACC_XMM.
-+//
-+// See _ghash_mul_step for the full explanation of the operations performed for
-+// each individual finite field multiplication and reduction.
-+.macro	_ghash_step_4x	i
-+.if \i == 0
-+	vpshufb		BSWAP_MASK, GHASHDATA0, GHASHDATA0
-+	vpxord		GHASH_ACC, GHASHDATA0, GHASHDATA0
-+	vpshufb		BSWAP_MASK, GHASHDATA1, GHASHDATA1
-+	vpshufb		BSWAP_MASK, GHASHDATA2, GHASHDATA2
-+.elseif \i == 1
-+	vpshufb		BSWAP_MASK, GHASHDATA3, GHASHDATA3
-+	vpclmulqdq	$0x00, H_POW4, GHASHDATA0, GHASH_ACC	// LO_0
-+	vpclmulqdq	$0x00, H_POW3, GHASHDATA1, GHASHTMP0	// LO_1
-+	vpclmulqdq	$0x00, H_POW2, GHASHDATA2, GHASHTMP1	// LO_2
-+.elseif \i == 2
-+	vpxord		GHASHTMP0, GHASH_ACC, GHASH_ACC		// sum(LO_{1,0})
-+	vpclmulqdq	$0x00, H_POW1, GHASHDATA3, GHASHTMP2	// LO_3
-+	vpternlogd	$0x96, GHASHTMP2, GHASHTMP1, GHASH_ACC	// LO = sum(LO_{3,2,1,0})
-+	vpclmulqdq	$0x01, H_POW4, GHASHDATA0, GHASHTMP0	// MI_0
-+.elseif \i == 3
-+	vpclmulqdq	$0x01, H_POW3, GHASHDATA1, GHASHTMP1	// MI_1
-+	vpclmulqdq	$0x01, H_POW2, GHASHDATA2, GHASHTMP2	// MI_2
-+	vpternlogd	$0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0	// sum(MI_{2,1,0})
-+	vpclmulqdq	$0x01, H_POW1, GHASHDATA3, GHASHTMP1	// MI_3
-+.elseif \i == 4
-+	vpclmulqdq	$0x10, H_POW4, GHASHDATA0, GHASHTMP2	// MI_4
-+	vpternlogd	$0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0	// sum(MI_{4,3,2,1,0})
-+	vpclmulqdq	$0x10, H_POW3, GHASHDATA1, GHASHTMP1	// MI_5
-+	vpclmulqdq	$0x10, H_POW2, GHASHDATA2, GHASHTMP2	// MI_6
-+.elseif \i == 5
-+	vpternlogd	$0x96, GHASHTMP2, GHASHTMP1, GHASHTMP0	// sum(MI_{6,5,4,3,2,1,0})
-+	vpclmulqdq	$0x01, GHASH_ACC, GFPOLY, GHASHTMP2	// LO_L*(x^63 + x^62 + x^57)
-+	vpclmulqdq	$0x10, H_POW1, GHASHDATA3, GHASHTMP1	// MI_7
-+	vpxord		GHASHTMP1, GHASHTMP0, GHASHTMP0		// MI = sum(MI_{7,6,5,4,3,2,1,0})
-+.elseif \i == 6
-+	vpshufd		$0x4e, GHASH_ACC, GHASH_ACC		// Swap halves of LO
-+	vpclmulqdq	$0x11, H_POW4, GHASHDATA0, GHASHDATA0	// HI_0
-+	vpclmulqdq	$0x11, H_POW3, GHASHDATA1, GHASHDATA1	// HI_1
-+	vpclmulqdq	$0x11, H_POW2, GHASHDATA2, GHASHDATA2	// HI_2
-+.elseif \i == 7
-+	vpternlogd	$0x96, GHASHTMP2, GHASH_ACC, GHASHTMP0	// Fold LO into MI
-+	vpclmulqdq	$0x11, H_POW1, GHASHDATA3, GHASHDATA3	// HI_3
-+	vpternlogd	$0x96, GHASHDATA2, GHASHDATA1, GHASHDATA0 // sum(HI_{2,1,0})
-+	vpclmulqdq	$0x01, GHASHTMP0, GFPOLY, GHASHTMP1	// MI_L*(x^63 + x^62 + x^57)
-+.elseif \i == 8
-+	vpxord		GHASHDATA3, GHASHDATA0, GHASH_ACC	// HI = sum(HI_{3,2,1,0})
-+	vpshufd		$0x4e, GHASHTMP0, GHASHTMP0		// Swap halves of MI
-+	vpternlogd	$0x96, GHASHTMP1, GHASHTMP0, GHASH_ACC	// Fold MI into HI
-+.elseif \i == 9
-+	_horizontal_xor	GHASH_ACC, GHASH_ACC_XMM, GHASH_ACC_XMM, \
-+			GHASHDATA0_XMM, GHASHDATA1_XMM, GHASHDATA2_XMM
-+.endif
-+.endm
-+
-+// Do one non-last round of AES encryption on the counter blocks in V0-V3 using
-+// the round key that has been broadcast to all 128-bit lanes of \round_key.
-+.macro	_vaesenc_4x	round_key
-+	vaesenc		\round_key, V0, V0
-+	vaesenc		\round_key, V1, V1
-+	vaesenc		\round_key, V2, V2
-+	vaesenc		\round_key, V3, V3
-+.endm
-+
-+// Start the AES encryption of four vectors of counter blocks.
-+.macro	_ctr_begin_4x
-+
-+	// Increment LE_CTR four times to generate four vectors of little-endian
-+	// counter blocks, swap each to big-endian, and store them in V0-V3.
-+	vpshufb		BSWAP_MASK, LE_CTR, V0
-+	vpaddd		LE_CTR_INC, LE_CTR, LE_CTR
-+	vpshufb		BSWAP_MASK, LE_CTR, V1
-+	vpaddd		LE_CTR_INC, LE_CTR, LE_CTR
-+	vpshufb		BSWAP_MASK, LE_CTR, V2
-+	vpaddd		LE_CTR_INC, LE_CTR, LE_CTR
-+	vpshufb		BSWAP_MASK, LE_CTR, V3
-+	vpaddd		LE_CTR_INC, LE_CTR, LE_CTR
-+
-+	// AES "round zero": XOR in the zero-th round key.
-+	vpxord		RNDKEY0, V0, V0
-+	vpxord		RNDKEY0, V1, V1
-+	vpxord		RNDKEY0, V2, V2
-+	vpxord		RNDKEY0, V3, V3
-+.endm
-+
-+// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_avx10 *key,
-+//					  const u32 le_ctr[4], u8 ghash_acc[16],
-+//					  const u8 *src, u8 *dst, int datalen);
-+//
-+// This macro generates a GCM encryption or decryption update function with the
-+// above prototype (with \enc selecting which one).  This macro supports both
-+// VL=32 and VL=64.  _set_veclen must have been invoked with the desired length.
-+//
-+// This function computes the next portion of the CTR keystream, XOR's it with
-+// |datalen| bytes from |src|, and writes the resulting encrypted or decrypted
-+// data to |dst|.  It also updates the GHASH accumulator |ghash_acc| using the
-+// next |datalen| ciphertext bytes.
-+//
-+// |datalen| must be a multiple of 16, except on the last call where it can be
-+// any length.  The caller must do any buffering needed to ensure this.  Both
-+// in-place and out-of-place en/decryption are supported.
-+//
-+// |le_ctr| must give the current counter in little-endian format.  For a new
-+// message, the low word of the counter must be 2.  This function loads the
-+// counter from |le_ctr| and increments the loaded counter as needed, but it
-+// does *not* store the updated counter back to |le_ctr|.  The caller must
-+// update |le_ctr| if any more data segments follow.  Internally, only the low
-+// 32-bit word of the counter is incremented, following the GCM standard.
-+.macro	_aes_gcm_update	enc
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	LE_CTR_PTR,	%rsi
-+	.set	GHASH_ACC_PTR,	%rdx
-+	.set	SRC,		%rcx
-+	.set	DST,		%r8
-+	.set	DATALEN,	%r9d
-+	.set	DATALEN64,	%r9	// Zero-extend DATALEN before using!
-+
-+	// Additional local variables
-+
-+	// %rax and %k1 are used as temporary registers.  LE_CTR_PTR is also
-+	// available as a temporary register after the counter is loaded.
-+
-+	// AES key length in bytes
-+	.set	AESKEYLEN,	%r10d
-+	.set	AESKEYLEN64,	%r10
-+
-+	// Pointer to the last AES round key for the chosen AES variant
-+	.set	RNDKEYLAST_PTR,	%r11
-+
-+	// In the main loop, V0-V3 are used as AES input and output.  Elsewhere
-+	// they are used as temporary registers.
-+
-+	// GHASHDATA[0-3] hold the ciphertext blocks and GHASH input data.
-+	.set	GHASHDATA0,	V4
-+	.set	GHASHDATA0_XMM,	%xmm4
-+	.set	GHASHDATA1,	V5
-+	.set	GHASHDATA1_XMM,	%xmm5
-+	.set	GHASHDATA2,	V6
-+	.set	GHASHDATA2_XMM,	%xmm6
-+	.set	GHASHDATA3,	V7
-+
-+	// BSWAP_MASK is the shuffle mask for byte-reflecting 128-bit values
-+	// using vpshufb, copied to all 128-bit lanes.
-+	.set	BSWAP_MASK,	V8
-+
-+	// RNDKEY temporarily holds the next AES round key.
-+	.set	RNDKEY,		V9
-+
-+	// GHASH_ACC is the accumulator variable for GHASH.  When fully reduced,
-+	// only the lowest 128-bit lane can be nonzero.  When not fully reduced,
-+	// more than one lane may be used, and they need to be XOR'd together.
-+	.set	GHASH_ACC,	V10
-+	.set	GHASH_ACC_XMM,	%xmm10
-+
-+	// LE_CTR_INC is the vector of 32-bit words that need to be added to a
-+	// vector of little-endian counter blocks to advance it forwards.
-+	.set	LE_CTR_INC,	V11
-+
-+	// LE_CTR contains the next set of little-endian counter blocks.
-+	.set	LE_CTR,		V12
-+
-+	// RNDKEY0, RNDKEYLAST, and RNDKEY_M[9-5] contain cached AES round keys,
-+	// copied to all 128-bit lanes.  RNDKEY0 is the zero-th round key,
-+	// RNDKEYLAST the last, and RNDKEY_M\i the one \i-th from the last.
-+	.set	RNDKEY0,	V13
-+	.set	RNDKEYLAST,	V14
-+	.set	RNDKEY_M9,	V15
-+	.set	RNDKEY_M8,	V16
-+	.set	RNDKEY_M7,	V17
-+	.set	RNDKEY_M6,	V18
-+	.set	RNDKEY_M5,	V19
-+
-+	// RNDKEYLAST[0-3] temporarily store the last AES round key XOR'd with
-+	// the corresponding block of source data.  This is useful because
-+	// vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a), and key ^ b can
-+	// be computed in parallel with the AES rounds.
-+	.set	RNDKEYLAST0,	V20
-+	.set	RNDKEYLAST1,	V21
-+	.set	RNDKEYLAST2,	V22
-+	.set	RNDKEYLAST3,	V23
-+
-+	// GHASHTMP[0-2] are temporary variables used by _ghash_step_4x.  These
-+	// cannot coincide with anything used for AES encryption, since for
-+	// performance reasons GHASH and AES encryption are interleaved.
-+	.set	GHASHTMP0,	V24
-+	.set	GHASHTMP1,	V25
-+	.set	GHASHTMP2,	V26
-+
-+	// H_POW[4-1] contain the powers of the hash key H^(4*VL/16)...H^1.  The
-+	// descending numbering reflects the order of the key powers.
-+	.set	H_POW4,		V27
-+	.set	H_POW3,		V28
-+	.set	H_POW2,		V29
-+	.set	H_POW1,		V30
-+
-+	// GFPOLY contains the .Lgfpoly constant, copied to all 128-bit lanes.
-+	.set	GFPOLY,		V31
-+
-+	// Load some constants.
-+	vbroadcasti32x4	.Lbswap_mask(%rip), BSWAP_MASK
-+	vbroadcasti32x4	.Lgfpoly(%rip), GFPOLY
-+
-+	// Load the GHASH accumulator and the starting counter.
-+	vmovdqu		(GHASH_ACC_PTR), GHASH_ACC_XMM
-+	vbroadcasti32x4	(LE_CTR_PTR), LE_CTR
-+
-+	// Load the AES key length in bytes.
-+	movl		OFFSETOF_AESKEYLEN(KEY), AESKEYLEN
-+
-+	// Make RNDKEYLAST_PTR point to the last AES round key.  This is the
-+	// round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256
-+	// respectively.  Then load the zero-th and last round keys.
-+	lea		6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
-+	vbroadcasti32x4	(KEY), RNDKEY0
-+	vbroadcasti32x4	(RNDKEYLAST_PTR), RNDKEYLAST
-+
-+	// Finish initializing LE_CTR by adding [0, 1, ...] to its low words.
-+	vpaddd		.Lctr_pattern(%rip), LE_CTR, LE_CTR
-+
-+	// Initialize LE_CTR_INC to contain VL/16 in all 128-bit lanes.
-+.if VL == 32
-+	vbroadcasti32x4	.Linc_2blocks(%rip), LE_CTR_INC
-+.elseif VL == 64
-+	vbroadcasti32x4	.Linc_4blocks(%rip), LE_CTR_INC
-+.else
-+	.error "Unsupported vector length"
-+.endif
-+
-+	// If there are at least 4*VL bytes of data, then continue into the loop
-+	// that processes 4*VL bytes of data at a time.  Otherwise skip it.
-+	//
-+	// Pre-subtracting 4*VL from DATALEN saves an instruction from the main
-+	// loop and also ensures that at least one write always occurs to
-+	// DATALEN, zero-extending it and allowing DATALEN64 to be used later.
-+	sub		$4*VL, DATALEN
-+	jl		.Lcrypt_loop_4x_done\@
-+
-+	// Load powers of the hash key.
-+	vmovdqu8	OFFSETOFEND_H_POWERS-4*VL(KEY), H_POW4
-+	vmovdqu8	OFFSETOFEND_H_POWERS-3*VL(KEY), H_POW3
-+	vmovdqu8	OFFSETOFEND_H_POWERS-2*VL(KEY), H_POW2
-+	vmovdqu8	OFFSETOFEND_H_POWERS-1*VL(KEY), H_POW1
-+
-+	// Main loop: en/decrypt and hash 4 vectors at a time.
-+	//
-+	// When possible, interleave the AES encryption of the counter blocks
-+	// with the GHASH update of the ciphertext blocks.  This improves
-+	// performance on many CPUs because the execution ports used by the VAES
-+	// instructions often differ from those used by vpclmulqdq and other
-+	// instructions used in GHASH.  For example, many Intel CPUs dispatch
-+	// vaesenc to ports 0 and 1 and vpclmulqdq to port 5.
-+	//
-+	// The interleaving is easiest to do during decryption, since during
-+	// decryption the ciphertext blocks are immediately available.  For
-+	// encryption, instead encrypt the first set of blocks, then hash those
-+	// blocks while encrypting the next set of blocks, repeat that as
-+	// needed, and finally hash the last set of blocks.
-+
-+.if \enc
-+	// Encrypt the first 4 vectors of plaintext blocks.  Leave the resulting
-+	// ciphertext in GHASHDATA[0-3] for GHASH.
-+	_ctr_begin_4x
-+	lea		16(KEY), %rax
-+1:
-+	vbroadcasti32x4	(%rax), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+	add		$16, %rax
-+	cmp		%rax, RNDKEYLAST_PTR
-+	jne		1b
-+	vpxord		0*VL(SRC), RNDKEYLAST, RNDKEYLAST0
-+	vpxord		1*VL(SRC), RNDKEYLAST, RNDKEYLAST1
-+	vpxord		2*VL(SRC), RNDKEYLAST, RNDKEYLAST2
-+	vpxord		3*VL(SRC), RNDKEYLAST, RNDKEYLAST3
-+	vaesenclast	RNDKEYLAST0, V0, GHASHDATA0
-+	vaesenclast	RNDKEYLAST1, V1, GHASHDATA1
-+	vaesenclast	RNDKEYLAST2, V2, GHASHDATA2
-+	vaesenclast	RNDKEYLAST3, V3, GHASHDATA3
-+	vmovdqu8	GHASHDATA0, 0*VL(DST)
-+	vmovdqu8	GHASHDATA1, 1*VL(DST)
-+	vmovdqu8	GHASHDATA2, 2*VL(DST)
-+	vmovdqu8	GHASHDATA3, 3*VL(DST)
-+	add		$4*VL, SRC
-+	add		$4*VL, DST
-+	sub		$4*VL, DATALEN
-+	jl		.Lghash_last_ciphertext_4x\@
-+.endif
-+
-+	// Cache as many additional AES round keys as possible.
-+.irp i, 9,8,7,6,5
-+	vbroadcasti32x4	-\i*16(RNDKEYLAST_PTR), RNDKEY_M\i
-+.endr
-+
-+.Lcrypt_loop_4x\@:
-+
-+	// If decrypting, load more ciphertext blocks into GHASHDATA[0-3].  If
-+	// encrypting, GHASHDATA[0-3] already contain the previous ciphertext.
-+.if !\enc
-+	vmovdqu8	0*VL(SRC), GHASHDATA0
-+	vmovdqu8	1*VL(SRC), GHASHDATA1
-+	vmovdqu8	2*VL(SRC), GHASHDATA2
-+	vmovdqu8	3*VL(SRC), GHASHDATA3
-+.endif
-+
-+	// Start the AES encryption of the counter blocks.
-+	_ctr_begin_4x
-+	cmp		$24, AESKEYLEN
-+	jl		128f	// AES-128?
-+	je		192f	// AES-192?
-+	// AES-256
-+	vbroadcasti32x4	-13*16(RNDKEYLAST_PTR), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+	vbroadcasti32x4	-12*16(RNDKEYLAST_PTR), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+192:
-+	vbroadcasti32x4	-11*16(RNDKEYLAST_PTR), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+	vbroadcasti32x4	-10*16(RNDKEYLAST_PTR), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+128:
-+
-+	// XOR the source data with the last round key, saving the result in
-+	// RNDKEYLAST[0-3].  This reduces latency by taking advantage of the
-+	// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a).
-+.if \enc
-+	vpxord		0*VL(SRC), RNDKEYLAST, RNDKEYLAST0
-+	vpxord		1*VL(SRC), RNDKEYLAST, RNDKEYLAST1
-+	vpxord		2*VL(SRC), RNDKEYLAST, RNDKEYLAST2
-+	vpxord		3*VL(SRC), RNDKEYLAST, RNDKEYLAST3
-+.else
-+	vpxord		GHASHDATA0, RNDKEYLAST, RNDKEYLAST0
-+	vpxord		GHASHDATA1, RNDKEYLAST, RNDKEYLAST1
-+	vpxord		GHASHDATA2, RNDKEYLAST, RNDKEYLAST2
-+	vpxord		GHASHDATA3, RNDKEYLAST, RNDKEYLAST3
-+.endif
-+
-+	// Finish the AES encryption of the counter blocks in V0-V3, interleaved
-+	// with the GHASH update of the ciphertext blocks in GHASHDATA[0-3].
-+.irp i, 9,8,7,6,5
-+	_vaesenc_4x	RNDKEY_M\i
-+	_ghash_step_4x	(9 - \i)
-+.endr
-+.irp i, 4,3,2,1
-+	vbroadcasti32x4	-\i*16(RNDKEYLAST_PTR), RNDKEY
-+	_vaesenc_4x	RNDKEY
-+	_ghash_step_4x	(9 - \i)
-+.endr
-+	_ghash_step_4x	9
-+
-+	// Do the last AES round.  This handles the XOR with the source data
-+	// too, as per the optimization described above.
-+	vaesenclast	RNDKEYLAST0, V0, GHASHDATA0
-+	vaesenclast	RNDKEYLAST1, V1, GHASHDATA1
-+	vaesenclast	RNDKEYLAST2, V2, GHASHDATA2
-+	vaesenclast	RNDKEYLAST3, V3, GHASHDATA3
-+
-+	// Store the en/decrypted data to DST.
-+	vmovdqu8	GHASHDATA0, 0*VL(DST)
-+	vmovdqu8	GHASHDATA1, 1*VL(DST)
-+	vmovdqu8	GHASHDATA2, 2*VL(DST)
-+	vmovdqu8	GHASHDATA3, 3*VL(DST)
-+
-+	add		$4*VL, SRC
-+	add		$4*VL, DST
-+	sub		$4*VL, DATALEN
-+	jge		.Lcrypt_loop_4x\@
-+
-+.if \enc
-+.Lghash_last_ciphertext_4x\@:
-+	// Update GHASH with the last set of ciphertext blocks.
-+.irp i, 0,1,2,3,4,5,6,7,8,9
-+	_ghash_step_4x	\i
-+.endr
-+.endif
-+
-+.Lcrypt_loop_4x_done\@:
-+
-+	// Undo the extra subtraction by 4*VL and check whether data remains.
-+	add		$4*VL, DATALEN
-+	jz		.Ldone\@
-+
-+	// The data length isn't a multiple of 4*VL.  Process the remaining data
-+	// of length 1 <= DATALEN < 4*VL, up to one vector (VL bytes) at a time.
-+	// Going one vector at a time may seem inefficient compared to having
-+	// separate code paths for each possible number of vectors remaining.
-+	// However, using a loop keeps the code size down, and it performs
-+	// surprising well; modern CPUs will start executing the next iteration
-+	// before the previous one finishes and also predict the number of loop
-+	// iterations.  For a similar reason, we roll up the AES rounds.
-+	//
-+	// On the last iteration, the remaining length may be less than VL.
-+	// Handle this using masking.
-+	//
-+	// Since there are enough key powers available for all remaining data,
-+	// there is no need to do a GHASH reduction after each iteration.
-+	// Instead, multiply each remaining block by its own key power, and only
-+	// do a GHASH reduction at the very end.
-+
-+	// Make POWERS_PTR point to the key powers [H^N, H^(N-1), ...] where N
-+	// is the number of blocks that remain.
-+	.set		POWERS_PTR, LE_CTR_PTR	// LE_CTR_PTR is free to be reused.
-+	mov		DATALEN, %eax
-+	neg		%rax
-+	and		$~15, %rax  // -round_up(DATALEN, 16)
-+	lea		OFFSETOFEND_H_POWERS(KEY,%rax), POWERS_PTR
-+
-+	// Start collecting the unreduced GHASH intermediate value LO, MI, HI.
-+	.set		LO, GHASHDATA0
-+	.set		LO_XMM, GHASHDATA0_XMM
-+	.set		MI, GHASHDATA1
-+	.set		MI_XMM, GHASHDATA1_XMM
-+	.set		HI, GHASHDATA2
-+	.set		HI_XMM, GHASHDATA2_XMM
-+	vpxor		LO_XMM, LO_XMM, LO_XMM
-+	vpxor		MI_XMM, MI_XMM, MI_XMM
-+	vpxor		HI_XMM, HI_XMM, HI_XMM
-+
-+.Lcrypt_loop_1x\@:
-+
-+	// Select the appropriate mask for this iteration: all 1's if
-+	// DATALEN >= VL, otherwise DATALEN 1's.  Do this branchlessly using the
-+	// bzhi instruction from BMI2.  (This relies on DATALEN <= 255.)
-+.if VL < 64
-+	mov		$-1, %eax
-+	bzhi		DATALEN, %eax, %eax
-+	kmovd		%eax, %k1
-+.else
-+	mov		$-1, %rax
-+	bzhi		DATALEN64, %rax, %rax
-+	kmovq		%rax, %k1
-+.endif
-+
-+	// Encrypt a vector of counter blocks.  This does not need to be masked.
-+	vpshufb		BSWAP_MASK, LE_CTR, V0
-+	vpaddd		LE_CTR_INC, LE_CTR, LE_CTR
-+	vpxord		RNDKEY0, V0, V0
-+	lea		16(KEY), %rax
-+1:
-+	vbroadcasti32x4	(%rax), RNDKEY
-+	vaesenc		RNDKEY, V0, V0
-+	add		$16, %rax
-+	cmp		%rax, RNDKEYLAST_PTR
-+	jne		1b
-+	vaesenclast	RNDKEYLAST, V0, V0
-+
-+	// XOR the data with the appropriate number of keystream bytes.
-+	vmovdqu8	(SRC), V1{%k1}{z}
-+	vpxord		V1, V0, V0
-+	vmovdqu8	V0, (DST){%k1}
-+
-+	// Update GHASH with the ciphertext block(s), without reducing.
-+	//
-+	// In the case of DATALEN < VL, the ciphertext is zero-padded to VL.
-+	// (If decrypting, it's done by the above masked load.  If encrypting,
-+	// it's done by the below masked register-to-register move.)  Note that
-+	// if DATALEN <= VL - 16, there will be additional padding beyond the
-+	// padding of the last block specified by GHASH itself; i.e., there may
-+	// be whole block(s) that get processed by the GHASH multiplication and
-+	// reduction instructions but should not actually be included in the
-+	// GHASH.  However, any such blocks are all-zeroes, and the values that
-+	// they're multiplied with are also all-zeroes.  Therefore they just add
-+	// 0 * 0 = 0 to the final GHASH result, which makes no difference.
-+	vmovdqu8        (POWERS_PTR), H_POW1
-+.if \enc
-+	vmovdqu8	V0, V1{%k1}{z}
-+.endif
-+	vpshufb		BSWAP_MASK, V1, V0
-+	vpxord		GHASH_ACC, V0, V0
-+	_ghash_mul_noreduce	H_POW1, V0, LO, MI, HI, GHASHDATA3, V1, V2, V3
-+	vpxor		GHASH_ACC_XMM, GHASH_ACC_XMM, GHASH_ACC_XMM
-+
-+	add		$VL, POWERS_PTR
-+	add		$VL, SRC
-+	add		$VL, DST
-+	sub		$VL, DATALEN
-+	jg		.Lcrypt_loop_1x\@
-+
-+	// Finally, do the GHASH reduction.
-+	_ghash_reduce	LO, MI, HI, GFPOLY, V0
-+	_horizontal_xor	HI, HI_XMM, GHASH_ACC_XMM, %xmm0, %xmm1, %xmm2
-+
-+.Ldone\@:
-+	// Store the updated GHASH accumulator back to memory.
-+	vmovdqu		GHASH_ACC_XMM, (GHASH_ACC_PTR)
-+
-+	vzeroupper	// This is needed after using ymm or zmm registers.
-+	RET
-+.endm
-+
-+// void aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+//				     const u32 le_ctr[4], u8 ghash_acc[16],
-+//				     u64 total_aadlen, u64 total_datalen);
-+// bool aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+//				     const u32 le_ctr[4],
-+//				     const u8 ghash_acc[16],
-+//				     u64 total_aadlen, u64 total_datalen,
-+//				     const u8 tag[16], int taglen);
-+//
-+// This macro generates one of the above two functions (with \enc selecting
-+// which one).  Both functions finish computing the GCM authentication tag by
-+// updating GHASH with the lengths block and encrypting the GHASH accumulator.
-+// |total_aadlen| and |total_datalen| must be the total length of the additional
-+// authenticated data and the en/decrypted data in bytes, respectively.
-+//
-+// The encryption function then stores the full-length (16-byte) computed
-+// authentication tag to |ghash_acc|.  The decryption function instead loads the
-+// expected authentication tag (the one that was transmitted) from the 16-byte
-+// buffer |tag|, compares the first 4 <= |taglen| <= 16 bytes of it to the
-+// computed tag in constant time, and returns true if and only if they match.
-+.macro	_aes_gcm_final	enc
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	LE_CTR_PTR,	%rsi
-+	.set	GHASH_ACC_PTR,	%rdx
-+	.set	TOTAL_AADLEN,	%rcx
-+	.set	TOTAL_DATALEN,	%r8
-+	.set	TAG,		%r9
-+	.set	TAGLEN,		%r10d	// Originally at 8(%rsp)
-+
-+	// Additional local variables.
-+	// %rax, %xmm0-%xmm3, and %k1 are used as temporary registers.
-+	.set	AESKEYLEN,	%r11d
-+	.set	AESKEYLEN64,	%r11
-+	.set	GFPOLY,		%xmm4
-+	.set	BSWAP_MASK,	%xmm5
-+	.set	LE_CTR,		%xmm6
-+	.set	GHASH_ACC,	%xmm7
-+	.set	H_POW1,		%xmm8
-+
-+	// Load some constants.
-+	vmovdqa		.Lgfpoly(%rip), GFPOLY
-+	vmovdqa		.Lbswap_mask(%rip), BSWAP_MASK
-+
-+	// Load the AES key length in bytes.
-+	movl		OFFSETOF_AESKEYLEN(KEY), AESKEYLEN
-+
-+	// Set up a counter block with 1 in the low 32-bit word.  This is the
-+	// counter that produces the ciphertext needed to encrypt the auth tag.
-+	// GFPOLY has 1 in the low word, so grab the 1 from there using a blend.
-+	vpblendd	$0xe, (LE_CTR_PTR), GFPOLY, LE_CTR
-+
-+	// Build the lengths block and XOR it with the GHASH accumulator.
-+	// Although the lengths block is defined as the AAD length followed by
-+	// the en/decrypted data length, both in big-endian byte order, a byte
-+	// reflection of the full block is needed because of the way we compute
-+	// GHASH (see _ghash_mul_step).  By using little-endian values in the
-+	// opposite order, we avoid having to reflect any bytes here.
-+	vmovq		TOTAL_DATALEN, %xmm0
-+	vpinsrq		$1, TOTAL_AADLEN, %xmm0, %xmm0
-+	vpsllq		$3, %xmm0, %xmm0	// Bytes to bits
-+	vpxor		(GHASH_ACC_PTR), %xmm0, GHASH_ACC
-+
-+	// Load the first hash key power (H^1), which is stored last.
-+	vmovdqu8	OFFSETOFEND_H_POWERS-16(KEY), H_POW1
-+
-+.if !\enc
-+	// Prepare a mask of TAGLEN one bits.
-+	movl		8(%rsp), TAGLEN
-+	mov		$-1, %eax
-+	bzhi		TAGLEN, %eax, %eax
-+	kmovd		%eax, %k1
-+.endif
-+
-+	// Make %rax point to the last AES round key for the chosen AES variant.
-+	lea		6*16(KEY,AESKEYLEN64,4), %rax
-+
-+	// Start the AES encryption of the counter block by swapping the counter
-+	// block to big-endian and XOR-ing it with the zero-th AES round key.
-+	vpshufb		BSWAP_MASK, LE_CTR, %xmm0
-+	vpxor		(KEY), %xmm0, %xmm0
-+
-+	// Complete the AES encryption and multiply GHASH_ACC by H^1.
-+	// Interleave the AES and GHASH instructions to improve performance.
-+	cmp		$24, AESKEYLEN
-+	jl		128f	// AES-128?
-+	je		192f	// AES-192?
-+	// AES-256
-+	vaesenc		-13*16(%rax), %xmm0, %xmm0
-+	vaesenc		-12*16(%rax), %xmm0, %xmm0
-+192:
-+	vaesenc		-11*16(%rax), %xmm0, %xmm0
-+	vaesenc		-10*16(%rax), %xmm0, %xmm0
-+128:
-+.irp i, 0,1,2,3,4,5,6,7,8
-+	_ghash_mul_step	\i, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \
-+			%xmm1, %xmm2, %xmm3
-+	vaesenc		(\i-9)*16(%rax), %xmm0, %xmm0
-+.endr
-+	_ghash_mul_step	9, H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \
-+			%xmm1, %xmm2, %xmm3
-+
-+	// Undo the byte reflection of the GHASH accumulator.
-+	vpshufb		BSWAP_MASK, GHASH_ACC, GHASH_ACC
-+
-+	// Do the last AES round and XOR the resulting keystream block with the
-+	// GHASH accumulator to produce the full computed authentication tag.
-+	//
-+	// Reduce latency by taking advantage of the property vaesenclast(key,
-+	// a) ^ b == vaesenclast(key ^ b, a).  I.e., XOR GHASH_ACC into the last
-+	// round key, instead of XOR'ing the final AES output with GHASH_ACC.
-+	//
-+	// enc_final then returns the computed auth tag, while dec_final
-+	// compares it with the transmitted one and returns a bool.  To compare
-+	// the tags, dec_final XORs them together and uses vptest to check
-+	// whether the result is all-zeroes.  This should be constant-time.
-+	// dec_final applies the vaesenclast optimization to this additional
-+	// value XOR'd too, using vpternlogd to XOR the last round key, GHASH
-+	// accumulator, and transmitted auth tag together in one instruction.
-+.if \enc
-+	vpxor		(%rax), GHASH_ACC, %xmm1
-+	vaesenclast	%xmm1, %xmm0, GHASH_ACC
-+	vmovdqu		GHASH_ACC, (GHASH_ACC_PTR)
-+.else
-+	vmovdqu		(TAG), %xmm1
-+	vpternlogd	$0x96, (%rax), GHASH_ACC, %xmm1
-+	vaesenclast	%xmm1, %xmm0, %xmm0
-+	xor		%eax, %eax
-+	vmovdqu8	%xmm0, %xmm0{%k1}{z}	// Truncate to TAGLEN bytes
-+	vptest		%xmm0, %xmm0
-+	sete		%al
-+.endif
-+	// No need for vzeroupper here, since only used xmm registers were used.
-+	RET
-+.endm
-+
-+_set_veclen 32
-+SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_256)
-+	_aes_gcm_precompute
-+SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_256)
-+SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_256)
-+	_aes_gcm_update	1
-+SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_256)
-+SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_256)
-+	_aes_gcm_update	0
-+SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_256)
-+
-+_set_veclen 64
-+SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_512)
-+	_aes_gcm_precompute
-+SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_512)
-+SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_512)
-+	_aes_gcm_update	1
-+SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_512)
-+SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_512)
-+	_aes_gcm_update	0
-+SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_512)
-+
-+// void aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+//				      u8 ghash_acc[16],
-+//				      const u8 *aad, int aadlen);
-+//
-+// This function processes the AAD (Additional Authenticated Data) in GCM.
-+// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the
-+// data given by |aad| and |aadlen|.  |key->ghash_key_powers| must have been
-+// initialized.  On the first call, |ghash_acc| must be all zeroes.  |aadlen|
-+// must be a multiple of 16, except on the last call where it can be any length.
-+// The caller must do any buffering needed to ensure this.
-+//
-+// AES-GCM is almost always used with small amounts of AAD, less than 32 bytes.
-+// Therefore, for AAD processing we currently only provide this implementation
-+// which uses 256-bit vectors (ymm registers) and only has a 1x-wide loop.  This
-+// keeps the code size down, and it enables some micro-optimizations, e.g. using
-+// VEX-coded instructions instead of EVEX-coded to save some instruction bytes.
-+// To optimize for large amounts of AAD, we could implement a 4x-wide loop and
-+// provide a version using 512-bit vectors, but that doesn't seem to be useful.
-+SYM_FUNC_START(aes_gcm_aad_update_vaes_avx10)
-+
-+	// Function arguments
-+	.set	KEY,		%rdi
-+	.set	GHASH_ACC_PTR,	%rsi
-+	.set	AAD,		%rdx
-+	.set	AADLEN,		%ecx
-+	.set	AADLEN64,	%rcx	// Zero-extend AADLEN before using!
-+
-+	// Additional local variables.
-+	// %rax, %ymm0-%ymm3, and %k1 are used as temporary registers.
-+	.set	BSWAP_MASK,	%ymm4
-+	.set	GFPOLY,		%ymm5
-+	.set	GHASH_ACC,	%ymm6
-+	.set	GHASH_ACC_XMM,	%xmm6
-+	.set	H_POW1,		%ymm7
-+
-+	// Load some constants.
-+	vbroadcasti128	.Lbswap_mask(%rip), BSWAP_MASK
-+	vbroadcasti128	.Lgfpoly(%rip), GFPOLY
-+
-+	// Load the GHASH accumulator.
-+	vmovdqu		(GHASH_ACC_PTR), GHASH_ACC_XMM
-+
-+	// Update GHASH with 32 bytes of AAD at a time.
-+	//
-+	// Pre-subtracting 32 from AADLEN saves an instruction from the loop and
-+	// also ensures that at least one write always occurs to AADLEN,
-+	// zero-extending it and allowing AADLEN64 to be used later.
-+	sub		$32, AADLEN
-+	jl		.Laad_loop_1x_done
-+	vmovdqu8	OFFSETOFEND_H_POWERS-32(KEY), H_POW1	// [H^2, H^1]
-+.Laad_loop_1x:
-+	vmovdqu		(AAD), %ymm0
-+	vpshufb		BSWAP_MASK, %ymm0, %ymm0
-+	vpxor		%ymm0, GHASH_ACC, GHASH_ACC
-+	_ghash_mul	H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \
-+			%ymm0, %ymm1, %ymm2
-+	vextracti128	$1, GHASH_ACC, %xmm0
-+	vpxor		%xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM
-+	add		$32, AAD
-+	sub		$32, AADLEN
-+	jge		.Laad_loop_1x
-+.Laad_loop_1x_done:
-+	add		$32, AADLEN
-+	jz		.Laad_done
-+
-+	// Update GHASH with the remaining 1 <= AADLEN < 32 bytes of AAD.
-+	mov		$-1, %eax
-+	bzhi		AADLEN, %eax, %eax
-+	kmovd		%eax, %k1
-+	vmovdqu8	(AAD), %ymm0{%k1}{z}
-+	neg		AADLEN64
-+	and		$~15, AADLEN64  // -round_up(AADLEN, 16)
-+	vmovdqu8	OFFSETOFEND_H_POWERS(KEY,AADLEN64), H_POW1
-+	vpshufb		BSWAP_MASK, %ymm0, %ymm0
-+	vpxor		%ymm0, GHASH_ACC, GHASH_ACC
-+	_ghash_mul	H_POW1, GHASH_ACC, GHASH_ACC, GFPOLY, \
-+			%ymm0, %ymm1, %ymm2
-+	vextracti128	$1, GHASH_ACC, %xmm0
-+	vpxor		%xmm0, GHASH_ACC_XMM, GHASH_ACC_XMM
-+
-+.Laad_done:
-+	// Store the updated GHASH accumulator back to memory.
-+	vmovdqu		GHASH_ACC_XMM, (GHASH_ACC_PTR)
-+
-+	vzeroupper	// This is needed after using ymm or zmm registers.
-+	RET
-+SYM_FUNC_END(aes_gcm_aad_update_vaes_avx10)
-+
-+SYM_FUNC_START(aes_gcm_enc_final_vaes_avx10)
-+	_aes_gcm_final	1
-+SYM_FUNC_END(aes_gcm_enc_final_vaes_avx10)
-+SYM_FUNC_START(aes_gcm_dec_final_vaes_avx10)
-+	_aes_gcm_final	0
-+SYM_FUNC_END(aes_gcm_dec_final_vaes_avx10)
-diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
-index 39066b57a70e..eb153eff9331 100644
---- a/arch/x86/crypto/aesni-intel_asm.S
-+++ b/arch/x86/crypto/aesni-intel_asm.S
-@@ -10,16 +10,7 @@
-  *            Vinodh Gopal <vinodh.gopal@intel.com>
-  *            Kahraman Akdemir
-  *
-- * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
-- * interface for 64-bit kernels.
-- *    Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
-- *             Aidan O'Mahony (aidan.o.mahony@intel.com)
-- *             Adrian Hoban <adrian.hoban@intel.com>
-- *             James Guilford (james.guilford@intel.com)
-- *             Gabriele Paoloni <gabriele.paoloni@intel.com>
-- *             Tadeusz Struk (tadeusz.struk@intel.com)
-- *             Wajdi Feghali (wajdi.k.feghali@intel.com)
-- *    Copyright (c) 2010, Intel Corporation.
-+ * Copyright (c) 2010, Intel Corporation.
-  *
-  * Ported x86_64 version to x86:
-  *    Author: Mathias Krause <minipli@googlemail.com>
-@@ -27,95 +18,6 @@
- 
- #include <linux/linkage.h>
- #include <asm/frame.h>
--#include <asm/nospec-branch.h>
--
--/*
-- * The following macros are used to move an (un)aligned 16 byte value to/from
-- * an XMM register.  This can done for either FP or integer values, for FP use
-- * movaps (move aligned packed single) or integer use movdqa (move double quad
-- * aligned).  It doesn't make a performance difference which instruction is used
-- * since Nehalem (original Core i7) was released.  However, the movaps is a byte
-- * shorter, so that is the one we'll use for now. (same for unaligned).
-- */
--#define MOVADQ	movaps
--#define MOVUDQ	movups
--
--#ifdef __x86_64__
--
--# constants in mergeable sections, linker can reorder and merge
--.section	.rodata.cst16.POLY, "aM", @progbits, 16
--.align 16
--POLY:   .octa 0xC2000000000000000000000000000001
--.section	.rodata.cst16.TWOONE, "aM", @progbits, 16
--.align 16
--TWOONE: .octa 0x00000001000000000000000000000001
--
--.section	.rodata.cst16.SHUF_MASK, "aM", @progbits, 16
--.align 16
--SHUF_MASK:  .octa 0x000102030405060708090A0B0C0D0E0F
--.section	.rodata.cst16.MASK1, "aM", @progbits, 16
--.align 16
--MASK1:      .octa 0x0000000000000000ffffffffffffffff
--.section	.rodata.cst16.MASK2, "aM", @progbits, 16
--.align 16
--MASK2:      .octa 0xffffffffffffffff0000000000000000
--.section	.rodata.cst16.ONE, "aM", @progbits, 16
--.align 16
--ONE:        .octa 0x00000000000000000000000000000001
--.section	.rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
--.align 16
--F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
--.section	.rodata.cst16.dec, "aM", @progbits, 16
--.align 16
--dec:        .octa 0x1
--.section	.rodata.cst16.enc, "aM", @progbits, 16
--.align 16
--enc:        .octa 0x2
--
--# order of these constants should not change.
--# more specifically, ALL_F should follow SHIFT_MASK,
--# and zero should follow ALL_F
--.section	.rodata, "a", @progbits
--.align 16
--SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
--ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
--            .octa 0x00000000000000000000000000000000
--
--.text
--
--#define AadHash 16*0
--#define AadLen 16*1
--#define InLen (16*1)+8
--#define PBlockEncKey 16*2
--#define OrigIV 16*3
--#define CurCount 16*4
--#define PBlockLen 16*5
--#define	HashKey		16*6	// store HashKey <<1 mod poly here
--#define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
--#define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
--#define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
--#define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
--				// bits of  HashKey <<1 mod poly here
--				//(for Karatsuba purposes)
--#define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
--				// bits of  HashKey^2 <<1 mod poly here
--				// (for Karatsuba purposes)
--#define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
--				// bits of  HashKey^3 <<1 mod poly here
--				// (for Karatsuba purposes)
--#define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
--				// bits of  HashKey^4 <<1 mod poly here
--				// (for Karatsuba purposes)
--
--#define arg1 rdi
--#define arg2 rsi
--#define arg3 rdx
--#define arg4 rcx
--#define arg5 r8
--#define arg6 r9
--#define keysize 2*15*16(%arg1)
--#endif
--
- 
- #define STATE1	%xmm0
- #define STATE2	%xmm4
-@@ -162,1409 +64,6 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
- #define TKEYP	T1
- #endif
- 
--.macro FUNC_SAVE
--	push	%r12
--	push	%r13
--	push	%r14
--#
--# states of %xmm registers %xmm6:%xmm15 not saved
--# all %xmm registers are clobbered
--#
--.endm
--
--
--.macro FUNC_RESTORE
--	pop	%r14
--	pop	%r13
--	pop	%r12
--.endm
--
--# Precompute hashkeys.
--# Input: Hash subkey.
--# Output: HashKeys stored in gcm_context_data.  Only needs to be called
--# once per key.
--# clobbers r12, and tmp xmm registers.
--.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
--	mov	\SUBKEY, %r12
--	movdqu	(%r12), \TMP3
--	movdqa	SHUF_MASK(%rip), \TMP2
--	pshufb	\TMP2, \TMP3
--
--	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
--
--	movdqa	\TMP3, \TMP2
--	psllq	$1, \TMP3
--	psrlq	$63, \TMP2
--	movdqa	\TMP2, \TMP1
--	pslldq	$8, \TMP2
--	psrldq	$8, \TMP1
--	por	\TMP2, \TMP3
--
--	# reduce HashKey<<1
--
--	pshufd	$0x24, \TMP1, \TMP2
--	pcmpeqd TWOONE(%rip), \TMP2
--	pand	POLY(%rip), \TMP2
--	pxor	\TMP2, \TMP3
--	movdqu	\TMP3, HashKey(%arg2)
--
--	movdqa	   \TMP3, \TMP5
--	pshufd	   $78, \TMP3, \TMP1
--	pxor	   \TMP3, \TMP1
--	movdqu	   \TMP1, HashKey_k(%arg2)
--
--	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
--# TMP5 = HashKey^2<<1 (mod poly)
--	movdqu	   \TMP5, HashKey_2(%arg2)
--# HashKey_2 = HashKey^2<<1 (mod poly)
--	pshufd	   $78, \TMP5, \TMP1
--	pxor	   \TMP5, \TMP1
--	movdqu	   \TMP1, HashKey_2_k(%arg2)
--
--	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
--# TMP5 = HashKey^3<<1 (mod poly)
--	movdqu	   \TMP5, HashKey_3(%arg2)
--	pshufd	   $78, \TMP5, \TMP1
--	pxor	   \TMP5, \TMP1
--	movdqu	   \TMP1, HashKey_3_k(%arg2)
--
--	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
--# TMP5 = HashKey^3<<1 (mod poly)
--	movdqu	   \TMP5, HashKey_4(%arg2)
--	pshufd	   $78, \TMP5, \TMP1
--	pxor	   \TMP5, \TMP1
--	movdqu	   \TMP1, HashKey_4_k(%arg2)
--.endm
--
--# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
--# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
--.macro GCM_INIT Iv SUBKEY AAD AADLEN
--	mov \AADLEN, %r11
--	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
--	xor %r11d, %r11d
--	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
--	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
--	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
--	mov \Iv, %rax
--	movdqu (%rax), %xmm0
--	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
--
--	movdqa  SHUF_MASK(%rip), %xmm2
--	pshufb %xmm2, %xmm0
--	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
--
--	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
--	movdqu HashKey(%arg2), %xmm13
--
--	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
--	%xmm4, %xmm5, %xmm6
--.endm
--
--# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
--# struct has been initialized by GCM_INIT.
--# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
--# Clobbers rax, r10-r13, and xmm0-xmm15
--.macro GCM_ENC_DEC operation
--	movdqu AadHash(%arg2), %xmm8
--	movdqu HashKey(%arg2), %xmm13
--	add %arg5, InLen(%arg2)
--
--	xor %r11d, %r11d # initialise the data pointer offset as zero
--	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
--
--	sub %r11, %arg5		# sub partial block data used
--	mov %arg5, %r13		# save the number of bytes
--
--	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
--	mov %r13, %r12
--	# Encrypt/Decrypt first few blocks
--
--	and	$(3<<4), %r12
--	jz	.L_initial_num_blocks_is_0_\@
--	cmp	$(2<<4), %r12
--	jb	.L_initial_num_blocks_is_1_\@
--	je	.L_initial_num_blocks_is_2_\@
--.L_initial_num_blocks_is_3_\@:
--	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
--%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
--	sub	$48, %r13
--	jmp	.L_initial_blocks_\@
--.L_initial_num_blocks_is_2_\@:
--	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
--%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
--	sub	$32, %r13
--	jmp	.L_initial_blocks_\@
--.L_initial_num_blocks_is_1_\@:
--	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
--%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
--	sub	$16, %r13
--	jmp	.L_initial_blocks_\@
--.L_initial_num_blocks_is_0_\@:
--	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
--%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
--.L_initial_blocks_\@:
--
--	# Main loop - Encrypt/Decrypt remaining blocks
--
--	test	%r13, %r13
--	je	.L_zero_cipher_left_\@
--	sub	$64, %r13
--	je	.L_four_cipher_left_\@
--.L_crypt_by_4_\@:
--	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
--	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
--	%xmm7, %xmm8, enc
--	add	$64, %r11
--	sub	$64, %r13
--	jne	.L_crypt_by_4_\@
--.L_four_cipher_left_\@:
--	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
--%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
--.L_zero_cipher_left_\@:
--	movdqu %xmm8, AadHash(%arg2)
--	movdqu %xmm0, CurCount(%arg2)
--
--	mov	%arg5, %r13
--	and	$15, %r13			# %r13 = arg5 (mod 16)
--	je	.L_multiple_of_16_bytes_\@
--
--	mov %r13, PBlockLen(%arg2)
--
--	# Handle the last <16 Byte block separately
--	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
--	movdqu %xmm0, CurCount(%arg2)
--	movdqa SHUF_MASK(%rip), %xmm10
--	pshufb %xmm10, %xmm0
--
--	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
--	movdqu %xmm0, PBlockEncKey(%arg2)
--
--	cmp	$16, %arg5
--	jge	.L_large_enough_update_\@
--
--	lea (%arg4,%r11,1), %r10
--	mov %r13, %r12
--	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
--	jmp	.L_data_read_\@
--
--.L_large_enough_update_\@:
--	sub	$16, %r11
--	add	%r13, %r11
--
--	# receive the last <16 Byte block
--	movdqu	(%arg4, %r11, 1), %xmm1
--
--	sub	%r13, %r11
--	add	$16, %r11
--
--	lea	SHIFT_MASK+16(%rip), %r12
--	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
--	# (r13 is the number of bytes in plaintext mod 16)
--	sub	%r13, %r12
--	# get the appropriate shuffle mask
--	movdqu	(%r12), %xmm2
--	# shift right 16-r13 bytes
--	pshufb  %xmm2, %xmm1
--
--.L_data_read_\@:
--	lea ALL_F+16(%rip), %r12
--	sub %r13, %r12
--
--.ifc \operation, dec
--	movdqa  %xmm1, %xmm2
--.endif
--	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
--	movdqu	(%r12), %xmm1
--	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
--	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
--.ifc \operation, dec
--	pand    %xmm1, %xmm2
--	movdqa SHUF_MASK(%rip), %xmm10
--	pshufb %xmm10 ,%xmm2
--
--	pxor %xmm2, %xmm8
--.else
--	movdqa SHUF_MASK(%rip), %xmm10
--	pshufb %xmm10,%xmm0
--
--	pxor	%xmm0, %xmm8
--.endif
--
--	movdqu %xmm8, AadHash(%arg2)
--.ifc \operation, enc
--	# GHASH computation for the last <16 byte block
--	movdqa SHUF_MASK(%rip), %xmm10
--	# shuffle xmm0 back to output as ciphertext
--	pshufb %xmm10, %xmm0
--.endif
--
--	# Output %r13 bytes
--	movq %xmm0, %rax
--	cmp $8, %r13
--	jle .L_less_than_8_bytes_left_\@
--	mov %rax, (%arg3 , %r11, 1)
--	add $8, %r11
--	psrldq $8, %xmm0
--	movq %xmm0, %rax
--	sub $8, %r13
--.L_less_than_8_bytes_left_\@:
--	mov %al,  (%arg3, %r11, 1)
--	add $1, %r11
--	shr $8, %rax
--	sub $1, %r13
--	jne .L_less_than_8_bytes_left_\@
--.L_multiple_of_16_bytes_\@:
--.endm
--
--# GCM_COMPLETE Finishes update of tag of last partial block
--# Output: Authorization Tag (AUTH_TAG)
--# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
--.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
--	movdqu AadHash(%arg2), %xmm8
--	movdqu HashKey(%arg2), %xmm13
--
--	mov PBlockLen(%arg2), %r12
--
--	test %r12, %r12
--	je .L_partial_done\@
--
--	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
--
--.L_partial_done\@:
--	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
--	shl	$3, %r12		  # convert into number of bits
--	movd	%r12d, %xmm15		  # len(A) in %xmm15
--	mov InLen(%arg2), %r12
--	shl     $3, %r12                  # len(C) in bits (*128)
--	movq    %r12, %xmm1
--
--	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
--	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
--	pxor	%xmm15, %xmm8
--	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
--	# final GHASH computation
--	movdqa SHUF_MASK(%rip), %xmm10
--	pshufb %xmm10, %xmm8
--
--	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
--	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
--	pxor	%xmm8, %xmm0
--.L_return_T_\@:
--	mov	\AUTHTAG, %r10                     # %r10 = authTag
--	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
--	cmp	$16, %r11
--	je	.L_T_16_\@
--	cmp	$8, %r11
--	jl	.L_T_4_\@
--.L_T_8_\@:
--	movq	%xmm0, %rax
--	mov	%rax, (%r10)
--	add	$8, %r10
--	sub	$8, %r11
--	psrldq	$8, %xmm0
--	test	%r11, %r11
--	je	.L_return_T_done_\@
--.L_T_4_\@:
--	movd	%xmm0, %eax
--	mov	%eax, (%r10)
--	add	$4, %r10
--	sub	$4, %r11
--	psrldq	$4, %xmm0
--	test	%r11, %r11
--	je	.L_return_T_done_\@
--.L_T_123_\@:
--	movd	%xmm0, %eax
--	cmp	$2, %r11
--	jl	.L_T_1_\@
--	mov	%ax, (%r10)
--	cmp	$2, %r11
--	je	.L_return_T_done_\@
--	add	$2, %r10
--	sar	$16, %eax
--.L_T_1_\@:
--	mov	%al, (%r10)
--	jmp	.L_return_T_done_\@
--.L_T_16_\@:
--	movdqu	%xmm0, (%r10)
--.L_return_T_done_\@:
--.endm
--
--#ifdef __x86_64__
--/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
--*
--*
--* Input: A and B (128-bits each, bit-reflected)
--* Output: C = A*B*x mod poly, (i.e. >>1 )
--* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
--* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
--*
--*/
--.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
--	movdqa	  \GH, \TMP1
--	pshufd	  $78, \GH, \TMP2
--	pshufd	  $78, \HK, \TMP3
--	pxor	  \GH, \TMP2            # TMP2 = a1+a0
--	pxor	  \HK, \TMP3            # TMP3 = b1+b0
--	pclmulqdq $0x11, \HK, \TMP1     # TMP1 = a1*b1
--	pclmulqdq $0x00, \HK, \GH       # GH = a0*b0
--	pclmulqdq $0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
--	pxor	  \GH, \TMP2
--	pxor	  \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
--	movdqa	  \TMP2, \TMP3
--	pslldq	  $8, \TMP3             # left shift TMP3 2 DWs
--	psrldq	  $8, \TMP2             # right shift TMP2 2 DWs
--	pxor	  \TMP3, \GH
--	pxor	  \TMP2, \TMP1          # TMP2:GH holds the result of GH*HK
--
--        # first phase of the reduction
--
--	movdqa    \GH, \TMP2
--	movdqa    \GH, \TMP3
--	movdqa    \GH, \TMP4            # copy GH into TMP2,TMP3 and TMP4
--					# in in order to perform
--					# independent shifts
--	pslld     $31, \TMP2            # packed right shift <<31
--	pslld     $30, \TMP3            # packed right shift <<30
--	pslld     $25, \TMP4            # packed right shift <<25
--	pxor      \TMP3, \TMP2          # xor the shifted versions
--	pxor      \TMP4, \TMP2
--	movdqa    \TMP2, \TMP5
--	psrldq    $4, \TMP5             # right shift TMP5 1 DW
--	pslldq    $12, \TMP2            # left shift TMP2 3 DWs
--	pxor      \TMP2, \GH
--
--        # second phase of the reduction
--
--	movdqa    \GH,\TMP2             # copy GH into TMP2,TMP3 and TMP4
--					# in in order to perform
--					# independent shifts
--	movdqa    \GH,\TMP3
--	movdqa    \GH,\TMP4
--	psrld     $1,\TMP2              # packed left shift >>1
--	psrld     $2,\TMP3              # packed left shift >>2
--	psrld     $7,\TMP4              # packed left shift >>7
--	pxor      \TMP3,\TMP2		# xor the shifted versions
--	pxor      \TMP4,\TMP2
--	pxor      \TMP5, \TMP2
--	pxor      \TMP2, \GH
--	pxor      \TMP1, \GH            # result is in TMP1
--.endm
--
--# Reads DLEN bytes starting at DPTR and stores in XMMDst
--# where 0 < DLEN < 16
--# Clobbers %rax, DLEN and XMM1
--.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
--        cmp $8, \DLEN
--        jl .L_read_lt8_\@
--        mov (\DPTR), %rax
--        movq %rax, \XMMDst
--        sub $8, \DLEN
--        jz .L_done_read_partial_block_\@
--	xor %eax, %eax
--.L_read_next_byte_\@:
--        shl $8, %rax
--        mov 7(\DPTR, \DLEN, 1), %al
--        dec \DLEN
--        jnz .L_read_next_byte_\@
--        movq %rax, \XMM1
--	pslldq $8, \XMM1
--        por \XMM1, \XMMDst
--	jmp .L_done_read_partial_block_\@
--.L_read_lt8_\@:
--	xor %eax, %eax
--.L_read_next_byte_lt8_\@:
--        shl $8, %rax
--        mov -1(\DPTR, \DLEN, 1), %al
--        dec \DLEN
--        jnz .L_read_next_byte_lt8_\@
--        movq %rax, \XMMDst
--.L_done_read_partial_block_\@:
--.endm
--
--# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
--# clobbers r10-11, xmm14
--.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
--	TMP6 TMP7
--	MOVADQ	   SHUF_MASK(%rip), %xmm14
--	mov	   \AAD, %r10		# %r10 = AAD
--	mov	   \AADLEN, %r11		# %r11 = aadLen
--	pxor	   \TMP7, \TMP7
--	pxor	   \TMP6, \TMP6
--
--	cmp	   $16, %r11
--	jl	   .L_get_AAD_rest\@
--.L_get_AAD_blocks\@:
--	movdqu	   (%r10), \TMP7
--	pshufb	   %xmm14, \TMP7 # byte-reflect the AAD data
--	pxor	   \TMP7, \TMP6
--	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
--	add	   $16, %r10
--	sub	   $16, %r11
--	cmp	   $16, %r11
--	jge	   .L_get_AAD_blocks\@
--
--	movdqu	   \TMP6, \TMP7
--
--	/* read the last <16B of AAD */
--.L_get_AAD_rest\@:
--	test	   %r11, %r11
--	je	   .L_get_AAD_done\@
--
--	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
--	pshufb	   %xmm14, \TMP7 # byte-reflect the AAD data
--	pxor	   \TMP6, \TMP7
--	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
--	movdqu \TMP7, \TMP6
--
--.L_get_AAD_done\@:
--	movdqu \TMP6, AadHash(%arg2)
--.endm
--
--# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
--# between update calls.
--# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
--# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
--# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
--.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
--	AAD_HASH operation
--	mov 	PBlockLen(%arg2), %r13
--	test	%r13, %r13
--	je	.L_partial_block_done_\@	# Leave Macro if no partial blocks
--	# Read in input data without over reading
--	cmp	$16, \PLAIN_CYPH_LEN
--	jl	.L_fewer_than_16_bytes_\@
--	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
--	jmp	.L_data_read_\@
--
--.L_fewer_than_16_bytes_\@:
--	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
--	mov	\PLAIN_CYPH_LEN, %r12
--	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
--
--	mov PBlockLen(%arg2), %r13
--
--.L_data_read_\@:				# Finished reading in data
--
--	movdqu	PBlockEncKey(%arg2), %xmm9
--	movdqu	HashKey(%arg2), %xmm13
--
--	lea	SHIFT_MASK(%rip), %r12
--
--	# adjust the shuffle mask pointer to be able to shift r13 bytes
--	# r16-r13 is the number of bytes in plaintext mod 16)
--	add	%r13, %r12
--	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
--	pshufb	%xmm2, %xmm9		# shift right r13 bytes
--
--.ifc \operation, dec
--	movdqa	%xmm1, %xmm3
--	pxor	%xmm1, %xmm9		# Ciphertext XOR E(K, Yn)
--
--	mov	\PLAIN_CYPH_LEN, %r10
--	add	%r13, %r10
--	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
--	sub	$16, %r10
--	# Determine if partial block is not being filled and
--	# shift mask accordingly
--	jge	.L_no_extra_mask_1_\@
--	sub	%r10, %r12
--.L_no_extra_mask_1_\@:
--
--	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
--	# get the appropriate mask to mask out bottom r13 bytes of xmm9
--	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
--
--	pand	%xmm1, %xmm3
--	movdqa	SHUF_MASK(%rip), %xmm10
--	pshufb	%xmm10, %xmm3
--	pshufb	%xmm2, %xmm3
--	pxor	%xmm3, \AAD_HASH
--
--	test	%r10, %r10
--	jl	.L_partial_incomplete_1_\@
--
--	# GHASH computation for the last <16 Byte block
--	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
--	xor	%eax, %eax
--
--	mov	%rax, PBlockLen(%arg2)
--	jmp	.L_dec_done_\@
--.L_partial_incomplete_1_\@:
--	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
--.L_dec_done_\@:
--	movdqu	\AAD_HASH, AadHash(%arg2)
--.else
--	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
--
--	mov	\PLAIN_CYPH_LEN, %r10
--	add	%r13, %r10
--	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
--	sub	$16, %r10
--	# Determine if partial block is not being filled and
--	# shift mask accordingly
--	jge	.L_no_extra_mask_2_\@
--	sub	%r10, %r12
--.L_no_extra_mask_2_\@:
--
--	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
--	# get the appropriate mask to mask out bottom r13 bytes of xmm9
--	pand	%xmm1, %xmm9
--
--	movdqa	SHUF_MASK(%rip), %xmm1
--	pshufb	%xmm1, %xmm9
--	pshufb	%xmm2, %xmm9
--	pxor	%xmm9, \AAD_HASH
--
--	test	%r10, %r10
--	jl	.L_partial_incomplete_2_\@
--
--	# GHASH computation for the last <16 Byte block
--	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
--	xor	%eax, %eax
--
--	mov	%rax, PBlockLen(%arg2)
--	jmp	.L_encode_done_\@
--.L_partial_incomplete_2_\@:
--	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
--.L_encode_done_\@:
--	movdqu	\AAD_HASH, AadHash(%arg2)
--
--	movdqa	SHUF_MASK(%rip), %xmm10
--	# shuffle xmm9 back to output as ciphertext
--	pshufb	%xmm10, %xmm9
--	pshufb	%xmm2, %xmm9
--.endif
--	# output encrypted Bytes
--	test	%r10, %r10
--	jl	.L_partial_fill_\@
--	mov	%r13, %r12
--	mov	$16, %r13
--	# Set r13 to be the number of bytes to write out
--	sub	%r12, %r13
--	jmp	.L_count_set_\@
--.L_partial_fill_\@:
--	mov	\PLAIN_CYPH_LEN, %r13
--.L_count_set_\@:
--	movdqa	%xmm9, %xmm0
--	movq	%xmm0, %rax
--	cmp	$8, %r13
--	jle	.L_less_than_8_bytes_left_\@
--
--	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
--	add	$8, \DATA_OFFSET
--	psrldq	$8, %xmm0
--	movq	%xmm0, %rax
--	sub	$8, %r13
--.L_less_than_8_bytes_left_\@:
--	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
--	add	$1, \DATA_OFFSET
--	shr	$8, %rax
--	sub	$1, %r13
--	jne	.L_less_than_8_bytes_left_\@
--.L_partial_block_done_\@:
--.endm # PARTIAL_BLOCK
--
--/*
--* if a = number of total plaintext bytes
--* b = floor(a/16)
--* num_initial_blocks = b mod 4
--* encrypt the initial num_initial_blocks blocks and apply ghash on
--* the ciphertext
--* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
--* are clobbered
--* arg1, %arg2, %arg3 are used as a pointer only, not modified
--*/
--
--
--.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
--	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
--	MOVADQ		SHUF_MASK(%rip), %xmm14
--
--	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
--
--	# start AES for num_initial_blocks blocks
--
--	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
--
--.if (\i == 5) || (\i == 6) || (\i == 7)
--
--	MOVADQ		ONE(%RIP),\TMP1
--	MOVADQ		0(%arg1),\TMP2
--.irpc index, \i_seq
--	paddd		\TMP1, \XMM0                 # INCR Y0
--.ifc \operation, dec
--        movdqa     \XMM0, %xmm\index
--.else
--	MOVADQ		\XMM0, %xmm\index
--.endif
--	pshufb	%xmm14, %xmm\index      # perform a 16 byte swap
--	pxor		\TMP2, %xmm\index
--.endr
--	lea	0x10(%arg1),%r10
--	mov	keysize,%eax
--	shr	$2,%eax				# 128->4, 192->6, 256->8
--	add	$5,%eax			      # 128->9, 192->11, 256->13
--
--.Laes_loop_initial_\@:
--	MOVADQ	(%r10),\TMP1
--.irpc	index, \i_seq
--	aesenc	\TMP1, %xmm\index
--.endr
--	add	$16,%r10
--	sub	$1,%eax
--	jnz	.Laes_loop_initial_\@
--
--	MOVADQ	(%r10), \TMP1
--.irpc index, \i_seq
--	aesenclast \TMP1, %xmm\index         # Last Round
--.endr
--.irpc index, \i_seq
--	movdqu	   (%arg4 , %r11, 1), \TMP1
--	pxor	   \TMP1, %xmm\index
--	movdqu	   %xmm\index, (%arg3 , %r11, 1)
--	# write back plaintext/ciphertext for num_initial_blocks
--	add	   $16, %r11
--
--.ifc \operation, dec
--	movdqa     \TMP1, %xmm\index
--.endif
--	pshufb	   %xmm14, %xmm\index
--
--		# prepare plaintext/ciphertext for GHASH computation
--.endr
--.endif
--
--        # apply GHASH on num_initial_blocks blocks
--
--.if \i == 5
--        pxor       %xmm5, %xmm6
--	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--        pxor       %xmm6, %xmm7
--	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--        pxor       %xmm7, %xmm8
--	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--.elseif \i == 6
--        pxor       %xmm6, %xmm7
--	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--        pxor       %xmm7, %xmm8
--	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--.elseif \i == 7
--        pxor       %xmm7, %xmm8
--	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
--.endif
--	cmp	   $64, %r13
--	jl	.L_initial_blocks_done\@
--	# no need for precomputed values
--/*
--*
--* Precomputations for HashKey parallel with encryption of first 4 blocks.
--* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
--*/
--	MOVADQ	   ONE(%RIP),\TMP1
--	paddd	   \TMP1, \XMM0              # INCR Y0
--	MOVADQ	   \XMM0, \XMM1
--	pshufb  %xmm14, \XMM1        # perform a 16 byte swap
--
--	paddd	   \TMP1, \XMM0              # INCR Y0
--	MOVADQ	   \XMM0, \XMM2
--	pshufb  %xmm14, \XMM2        # perform a 16 byte swap
--
--	paddd	   \TMP1, \XMM0              # INCR Y0
--	MOVADQ	   \XMM0, \XMM3
--	pshufb %xmm14, \XMM3        # perform a 16 byte swap
--
--	paddd	   \TMP1, \XMM0              # INCR Y0
--	MOVADQ	   \XMM0, \XMM4
--	pshufb %xmm14, \XMM4        # perform a 16 byte swap
--
--	MOVADQ	   0(%arg1),\TMP1
--	pxor	   \TMP1, \XMM1
--	pxor	   \TMP1, \XMM2
--	pxor	   \TMP1, \XMM3
--	pxor	   \TMP1, \XMM4
--.irpc index, 1234 # do 4 rounds
--	movaps 0x10*\index(%arg1), \TMP1
--	aesenc	   \TMP1, \XMM1
--	aesenc	   \TMP1, \XMM2
--	aesenc	   \TMP1, \XMM3
--	aesenc	   \TMP1, \XMM4
--.endr
--.irpc index, 56789 # do next 5 rounds
--	movaps 0x10*\index(%arg1), \TMP1
--	aesenc	   \TMP1, \XMM1
--	aesenc	   \TMP1, \XMM2
--	aesenc	   \TMP1, \XMM3
--	aesenc	   \TMP1, \XMM4
--.endr
--	lea	   0xa0(%arg1),%r10
--	mov	   keysize,%eax
--	shr	   $2,%eax			# 128->4, 192->6, 256->8
--	sub	   $4,%eax			# 128->0, 192->2, 256->4
--	jz	   .Laes_loop_pre_done\@
--
--.Laes_loop_pre_\@:
--	MOVADQ	   (%r10),\TMP2
--.irpc	index, 1234
--	aesenc	   \TMP2, %xmm\index
--.endr
--	add	   $16,%r10
--	sub	   $1,%eax
--	jnz	   .Laes_loop_pre_\@
--
--.Laes_loop_pre_done\@:
--	MOVADQ	   (%r10), \TMP2
--	aesenclast \TMP2, \XMM1
--	aesenclast \TMP2, \XMM2
--	aesenclast \TMP2, \XMM3
--	aesenclast \TMP2, \XMM4
--	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
--	pxor	   \TMP1, \XMM1
--.ifc \operation, dec
--	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
--	movdqa     \TMP1, \XMM1
--.endif
--	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
--	pxor	   \TMP1, \XMM2
--.ifc \operation, dec
--	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
--	movdqa     \TMP1, \XMM2
--.endif
--	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
--	pxor	   \TMP1, \XMM3
--.ifc \operation, dec
--	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
--	movdqa     \TMP1, \XMM3
--.endif
--	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
--	pxor	   \TMP1, \XMM4
--.ifc \operation, dec
--	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
--	movdqa     \TMP1, \XMM4
--.else
--	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
--	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
--	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
--	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
--.endif
--
--	add	   $64, %r11
--	pshufb %xmm14, \XMM1 # perform a 16 byte swap
--	pxor	   \XMMDst, \XMM1
--# combine GHASHed value with the corresponding ciphertext
--	pshufb %xmm14, \XMM2 # perform a 16 byte swap
--	pshufb %xmm14, \XMM3 # perform a 16 byte swap
--	pshufb %xmm14, \XMM4 # perform a 16 byte swap
--
--.L_initial_blocks_done\@:
--
--.endm
--
--/*
--* encrypt 4 blocks at a time
--* ghash the 4 previously encrypted ciphertext blocks
--* arg1, %arg3, %arg4 are used as pointers only, not modified
--* %r11 is the data offset value
--*/
--.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
--TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
--
--	movdqa	  \XMM1, \XMM5
--	movdqa	  \XMM2, \XMM6
--	movdqa	  \XMM3, \XMM7
--	movdqa	  \XMM4, \XMM8
--
--        movdqa    SHUF_MASK(%rip), %xmm15
--        # multiply TMP5 * HashKey using karatsuba
--
--	movdqa	  \XMM5, \TMP4
--	pshufd	  $78, \XMM5, \TMP6
--	pxor	  \XMM5, \TMP6
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqu	  HashKey_4(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
--	movdqa    \XMM0, \XMM1
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM2
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM3
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM4
--	pshufb %xmm15, \XMM1	# perform a 16 byte swap
--	pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
--	pshufb %xmm15, \XMM2	# perform a 16 byte swap
--	pshufb %xmm15, \XMM3	# perform a 16 byte swap
--	pshufb %xmm15, \XMM4	# perform a 16 byte swap
--
--	pxor	  (%arg1), \XMM1
--	pxor	  (%arg1), \XMM2
--	pxor	  (%arg1), \XMM3
--	pxor	  (%arg1), \XMM4
--	movdqu	  HashKey_4_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
--	movaps 0x10(%arg1), \TMP1
--	aesenc	  \TMP1, \XMM1              # Round 1
--	aesenc	  \TMP1, \XMM2
--	aesenc	  \TMP1, \XMM3
--	aesenc	  \TMP1, \XMM4
--	movaps 0x20(%arg1), \TMP1
--	aesenc	  \TMP1, \XMM1              # Round 2
--	aesenc	  \TMP1, \XMM2
--	aesenc	  \TMP1, \XMM3
--	aesenc	  \TMP1, \XMM4
--	movdqa	  \XMM6, \TMP1
--	pshufd	  $78, \XMM6, \TMP2
--	pxor	  \XMM6, \TMP2
--	movdqu	  HashKey_3(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
--	movaps 0x30(%arg1), \TMP3
--	aesenc    \TMP3, \XMM1              # Round 3
--	aesenc    \TMP3, \XMM2
--	aesenc    \TMP3, \XMM3
--	aesenc    \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
--	movaps 0x40(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 4
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	movdqu	  HashKey_3_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	movaps 0x50(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 5
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pxor	  \TMP1, \TMP4
--# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
--	pxor	  \XMM6, \XMM5
--	pxor	  \TMP2, \TMP6
--	movdqa	  \XMM7, \TMP1
--	pshufd	  $78, \XMM7, \TMP2
--	pxor	  \XMM7, \TMP2
--	movdqu	  HashKey_2(%arg2), \TMP5
--
--        # Multiply TMP5 * HashKey using karatsuba
--
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
--	movaps 0x60(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 6
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
--	movaps 0x70(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 7
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	movdqu	  HashKey_2_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	movaps 0x80(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 8
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pxor	  \TMP1, \TMP4
--# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
--	pxor	  \XMM7, \XMM5
--	pxor	  \TMP2, \TMP6
--
--        # Multiply XMM8 * HashKey
--        # XMM8 and TMP5 hold the values for the two operands
--
--	movdqa	  \XMM8, \TMP1
--	pshufd	  $78, \XMM8, \TMP2
--	pxor	  \XMM8, \TMP2
--	movdqu	  HashKey(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
--	movaps 0x90(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1             # Round 9
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
--	lea	  0xa0(%arg1),%r10
--	mov	  keysize,%eax
--	shr	  $2,%eax			# 128->4, 192->6, 256->8
--	sub	  $4,%eax			# 128->0, 192->2, 256->4
--	jz	  .Laes_loop_par_enc_done\@
--
--.Laes_loop_par_enc\@:
--	MOVADQ	  (%r10),\TMP3
--.irpc	index, 1234
--	aesenc	  \TMP3, %xmm\index
--.endr
--	add	  $16,%r10
--	sub	  $1,%eax
--	jnz	  .Laes_loop_par_enc\@
--
--.Laes_loop_par_enc_done\@:
--	MOVADQ	  (%r10), \TMP3
--	aesenclast \TMP3, \XMM1           # Round 10
--	aesenclast \TMP3, \XMM2
--	aesenclast \TMP3, \XMM3
--	aesenclast \TMP3, \XMM4
--	movdqu    HashKey_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
--	movdqu	  (%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
--	movdqu	  16(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
--	movdqu	  32(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
--	movdqu	  48(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
--        movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
--        movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
--        movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
--        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
--	pshufb %xmm15, \XMM1        # perform a 16 byte swap
--	pshufb %xmm15, \XMM2	# perform a 16 byte swap
--	pshufb %xmm15, \XMM3	# perform a 16 byte swap
--	pshufb %xmm15, \XMM4	# perform a 16 byte swap
--
--	pxor	  \TMP4, \TMP1
--	pxor	  \XMM8, \XMM5
--	pxor	  \TMP6, \TMP2
--	pxor	  \TMP1, \TMP2
--	pxor	  \XMM5, \TMP2
--	movdqa	  \TMP2, \TMP3
--	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
--	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
--	pxor	  \TMP3, \XMM5
--	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
--
--        # first phase of reduction
--
--	movdqa    \XMM5, \TMP2
--	movdqa    \XMM5, \TMP3
--	movdqa    \XMM5, \TMP4
--# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
--	pslld     $31, \TMP2                   # packed right shift << 31
--	pslld     $30, \TMP3                   # packed right shift << 30
--	pslld     $25, \TMP4                   # packed right shift << 25
--	pxor      \TMP3, \TMP2	               # xor the shifted versions
--	pxor      \TMP4, \TMP2
--	movdqa    \TMP2, \TMP5
--	psrldq    $4, \TMP5                    # right shift T5 1 DW
--	pslldq    $12, \TMP2                   # left shift T2 3 DWs
--	pxor      \TMP2, \XMM5
--
--        # second phase of reduction
--
--	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
--	movdqa    \XMM5,\TMP3
--	movdqa    \XMM5,\TMP4
--	psrld     $1, \TMP2                    # packed left shift >>1
--	psrld     $2, \TMP3                    # packed left shift >>2
--	psrld     $7, \TMP4                    # packed left shift >>7
--	pxor      \TMP3,\TMP2		       # xor the shifted versions
--	pxor      \TMP4,\TMP2
--	pxor      \TMP5, \TMP2
--	pxor      \TMP2, \XMM5
--	pxor      \TMP1, \XMM5                 # result is in TMP1
--
--	pxor	  \XMM5, \XMM1
--.endm
--
--/*
--* decrypt 4 blocks at a time
--* ghash the 4 previously decrypted ciphertext blocks
--* arg1, %arg3, %arg4 are used as pointers only, not modified
--* %r11 is the data offset value
--*/
--.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
--TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
--
--	movdqa	  \XMM1, \XMM5
--	movdqa	  \XMM2, \XMM6
--	movdqa	  \XMM3, \XMM7
--	movdqa	  \XMM4, \XMM8
--
--        movdqa    SHUF_MASK(%rip), %xmm15
--        # multiply TMP5 * HashKey using karatsuba
--
--	movdqa	  \XMM5, \TMP4
--	pshufd	  $78, \XMM5, \TMP6
--	pxor	  \XMM5, \TMP6
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqu	  HashKey_4(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
--	movdqa    \XMM0, \XMM1
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM2
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM3
--	paddd     ONE(%rip), \XMM0		# INCR CNT
--	movdqa    \XMM0, \XMM4
--	pshufb %xmm15, \XMM1	# perform a 16 byte swap
--	pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
--	pshufb %xmm15, \XMM2	# perform a 16 byte swap
--	pshufb %xmm15, \XMM3	# perform a 16 byte swap
--	pshufb %xmm15, \XMM4	# perform a 16 byte swap
--
--	pxor	  (%arg1), \XMM1
--	pxor	  (%arg1), \XMM2
--	pxor	  (%arg1), \XMM3
--	pxor	  (%arg1), \XMM4
--	movdqu	  HashKey_4_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
--	movaps 0x10(%arg1), \TMP1
--	aesenc	  \TMP1, \XMM1              # Round 1
--	aesenc	  \TMP1, \XMM2
--	aesenc	  \TMP1, \XMM3
--	aesenc	  \TMP1, \XMM4
--	movaps 0x20(%arg1), \TMP1
--	aesenc	  \TMP1, \XMM1              # Round 2
--	aesenc	  \TMP1, \XMM2
--	aesenc	  \TMP1, \XMM3
--	aesenc	  \TMP1, \XMM4
--	movdqa	  \XMM6, \TMP1
--	pshufd	  $78, \XMM6, \TMP2
--	pxor	  \XMM6, \TMP2
--	movdqu	  HashKey_3(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
--	movaps 0x30(%arg1), \TMP3
--	aesenc    \TMP3, \XMM1              # Round 3
--	aesenc    \TMP3, \XMM2
--	aesenc    \TMP3, \XMM3
--	aesenc    \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
--	movaps 0x40(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 4
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	movdqu	  HashKey_3_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	movaps 0x50(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 5
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pxor	  \TMP1, \TMP4
--# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
--	pxor	  \XMM6, \XMM5
--	pxor	  \TMP2, \TMP6
--	movdqa	  \XMM7, \TMP1
--	pshufd	  $78, \XMM7, \TMP2
--	pxor	  \XMM7, \TMP2
--	movdqu	  HashKey_2(%arg2), \TMP5
--
--        # Multiply TMP5 * HashKey using karatsuba
--
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
--	movaps 0x60(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 6
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
--	movaps 0x70(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 7
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	movdqu	  HashKey_2_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	movaps 0x80(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1              # Round 8
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pxor	  \TMP1, \TMP4
--# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
--	pxor	  \XMM7, \XMM5
--	pxor	  \TMP2, \TMP6
--
--        # Multiply XMM8 * HashKey
--        # XMM8 and TMP5 hold the values for the two operands
--
--	movdqa	  \XMM8, \TMP1
--	pshufd	  $78, \XMM8, \TMP2
--	pxor	  \XMM8, \TMP2
--	movdqu	  HashKey(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
--	movaps 0x90(%arg1), \TMP3
--	aesenc	  \TMP3, \XMM1             # Round 9
--	aesenc	  \TMP3, \XMM2
--	aesenc	  \TMP3, \XMM3
--	aesenc	  \TMP3, \XMM4
--	pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
--	lea	  0xa0(%arg1),%r10
--	mov	  keysize,%eax
--	shr	  $2,%eax		        # 128->4, 192->6, 256->8
--	sub	  $4,%eax			# 128->0, 192->2, 256->4
--	jz	  .Laes_loop_par_dec_done\@
--
--.Laes_loop_par_dec\@:
--	MOVADQ	  (%r10),\TMP3
--.irpc	index, 1234
--	aesenc	  \TMP3, %xmm\index
--.endr
--	add	  $16,%r10
--	sub	  $1,%eax
--	jnz	  .Laes_loop_par_dec\@
--
--.Laes_loop_par_dec_done\@:
--	MOVADQ	  (%r10), \TMP3
--	aesenclast \TMP3, \XMM1           # last round
--	aesenclast \TMP3, \XMM2
--	aesenclast \TMP3, \XMM3
--	aesenclast \TMP3, \XMM4
--	movdqu    HashKey_k(%arg2), \TMP5
--	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
--	movdqu	  (%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
--	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
--	movdqa    \TMP3, \XMM1
--	movdqu	  16(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
--	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
--	movdqa    \TMP3, \XMM2
--	movdqu	  32(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
--	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
--	movdqa    \TMP3, \XMM3
--	movdqu	  48(%arg4,%r11,1), \TMP3
--	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
--	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
--	movdqa    \TMP3, \XMM4
--	pshufb %xmm15, \XMM1        # perform a 16 byte swap
--	pshufb %xmm15, \XMM2	# perform a 16 byte swap
--	pshufb %xmm15, \XMM3	# perform a 16 byte swap
--	pshufb %xmm15, \XMM4	# perform a 16 byte swap
--
--	pxor	  \TMP4, \TMP1
--	pxor	  \XMM8, \XMM5
--	pxor	  \TMP6, \TMP2
--	pxor	  \TMP1, \TMP2
--	pxor	  \XMM5, \TMP2
--	movdqa	  \TMP2, \TMP3
--	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
--	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
--	pxor	  \TMP3, \XMM5
--	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
--
--        # first phase of reduction
--
--	movdqa    \XMM5, \TMP2
--	movdqa    \XMM5, \TMP3
--	movdqa    \XMM5, \TMP4
--# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
--	pslld     $31, \TMP2                   # packed right shift << 31
--	pslld     $30, \TMP3                   # packed right shift << 30
--	pslld     $25, \TMP4                   # packed right shift << 25
--	pxor      \TMP3, \TMP2	               # xor the shifted versions
--	pxor      \TMP4, \TMP2
--	movdqa    \TMP2, \TMP5
--	psrldq    $4, \TMP5                    # right shift T5 1 DW
--	pslldq    $12, \TMP2                   # left shift T2 3 DWs
--	pxor      \TMP2, \XMM5
--
--        # second phase of reduction
--
--	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
--	movdqa    \XMM5,\TMP3
--	movdqa    \XMM5,\TMP4
--	psrld     $1, \TMP2                    # packed left shift >>1
--	psrld     $2, \TMP3                    # packed left shift >>2
--	psrld     $7, \TMP4                    # packed left shift >>7
--	pxor      \TMP3,\TMP2		       # xor the shifted versions
--	pxor      \TMP4,\TMP2
--	pxor      \TMP5, \TMP2
--	pxor      \TMP2, \XMM5
--	pxor      \TMP1, \XMM5                 # result is in TMP1
--
--	pxor	  \XMM5, \XMM1
--.endm
--
--/* GHASH the last 4 ciphertext blocks. */
--.macro	GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
--TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
--
--        # Multiply TMP6 * HashKey (using Karatsuba)
--
--	movdqa	  \XMM1, \TMP6
--	pshufd	  $78, \XMM1, \TMP2
--	pxor	  \XMM1, \TMP2
--	movdqu	  HashKey_4(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP6       # TMP6 = a1*b1
--	pclmulqdq $0x00, \TMP5, \XMM1       # XMM1 = a0*b0
--	movdqu	  HashKey_4_k(%arg2), \TMP4
--	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	movdqa	  \XMM1, \XMMDst
--	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
--
--        # Multiply TMP1 * HashKey (using Karatsuba)
--
--	movdqa	  \XMM2, \TMP1
--	pshufd	  $78, \XMM2, \TMP2
--	pxor	  \XMM2, \TMP2
--	movdqu	  HashKey_3(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
--	pclmulqdq $0x00, \TMP5, \XMM2       # XMM2 = a0*b0
--	movdqu	  HashKey_3_k(%arg2), \TMP4
--	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	pxor	  \TMP1, \TMP6
--	pxor	  \XMM2, \XMMDst
--	pxor	  \TMP2, \XMM1
--# results accumulated in TMP6, XMMDst, XMM1
--
--        # Multiply TMP1 * HashKey (using Karatsuba)
--
--	movdqa	  \XMM3, \TMP1
--	pshufd	  $78, \XMM3, \TMP2
--	pxor	  \XMM3, \TMP2
--	movdqu	  HashKey_2(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
--	pclmulqdq $0x00, \TMP5, \XMM3       # XMM3 = a0*b0
--	movdqu	  HashKey_2_k(%arg2), \TMP4
--	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	pxor	  \TMP1, \TMP6
--	pxor	  \XMM3, \XMMDst
--	pxor	  \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
--
--        # Multiply TMP1 * HashKey (using Karatsuba)
--	movdqa	  \XMM4, \TMP1
--	pshufd	  $78, \XMM4, \TMP2
--	pxor	  \XMM4, \TMP2
--	movdqu	  HashKey(%arg2), \TMP5
--	pclmulqdq $0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
--	pclmulqdq $0x00, \TMP5, \XMM4       # XMM4 = a0*b0
--	movdqu	  HashKey_k(%arg2), \TMP4
--	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
--	pxor	  \TMP1, \TMP6
--	pxor	  \XMM4, \XMMDst
--	pxor	  \XMM1, \TMP2
--	pxor	  \TMP6, \TMP2
--	pxor	  \XMMDst, \TMP2
--	# middle section of the temp results combined as in karatsuba algorithm
--	movdqa	  \TMP2, \TMP4
--	pslldq	  $8, \TMP4                 # left shift TMP4 2 DWs
--	psrldq	  $8, \TMP2                 # right shift TMP2 2 DWs
--	pxor	  \TMP4, \XMMDst
--	pxor	  \TMP2, \TMP6
--# TMP6:XMMDst holds the result of the accumulated carry-less multiplications
--	# first phase of the reduction
--	movdqa    \XMMDst, \TMP2
--	movdqa    \XMMDst, \TMP3
--	movdqa    \XMMDst, \TMP4
--# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
--	pslld     $31, \TMP2                # packed right shifting << 31
--	pslld     $30, \TMP3                # packed right shifting << 30
--	pslld     $25, \TMP4                # packed right shifting << 25
--	pxor      \TMP3, \TMP2              # xor the shifted versions
--	pxor      \TMP4, \TMP2
--	movdqa    \TMP2, \TMP7
--	psrldq    $4, \TMP7                 # right shift TMP7 1 DW
--	pslldq    $12, \TMP2                # left shift TMP2 3 DWs
--	pxor      \TMP2, \XMMDst
--
--        # second phase of the reduction
--	movdqa    \XMMDst, \TMP2
--	# make 3 copies of XMMDst for doing 3 shift operations
--	movdqa    \XMMDst, \TMP3
--	movdqa    \XMMDst, \TMP4
--	psrld     $1, \TMP2                 # packed left shift >> 1
--	psrld     $2, \TMP3                 # packed left shift >> 2
--	psrld     $7, \TMP4                 # packed left shift >> 7
--	pxor      \TMP3, \TMP2              # xor the shifted versions
--	pxor      \TMP4, \TMP2
--	pxor      \TMP7, \TMP2
--	pxor      \TMP2, \XMMDst
--	pxor      \TMP6, \XMMDst            # reduced result is in XMMDst
--.endm
--
--
--/* Encryption of a single block
--* uses eax & r10
--*/
--
--.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
--
--	pxor		(%arg1), \XMM0
--	mov		keysize,%eax
--	shr		$2,%eax			# 128->4, 192->6, 256->8
--	add		$5,%eax			# 128->9, 192->11, 256->13
--	lea		16(%arg1), %r10	  # get first expanded key address
--
--_esb_loop_\@:
--	MOVADQ		(%r10),\TMP1
--	aesenc		\TMP1,\XMM0
--	add		$16,%r10
--	sub		$1,%eax
--	jnz		_esb_loop_\@
--
--	MOVADQ		(%r10),\TMP1
--	aesenclast	\TMP1,\XMM0
--.endm
--
--/*****************************************************************************
--* void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
--*                     struct gcm_context_data *data,
--*                                         // context data
--*                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
--*                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
--*                                         // concatenated with 0x00000001. 16-byte aligned pointer.
--*                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
--*                     const u8 *aad,      // Additional Authentication Data (AAD)
--*                     u64 aad_len)        // Length of AAD in bytes.
--*/
--SYM_FUNC_START(aesni_gcm_init)
--	FUNC_SAVE
--	GCM_INIT %arg3, %arg4,%arg5, %arg6
--	FUNC_RESTORE
--	RET
--SYM_FUNC_END(aesni_gcm_init)
--
--/*****************************************************************************
--* void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
--*                    struct gcm_context_data *data,
--*                                        // context data
--*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
--*                    const u8 *in,       // Plaintext input
--*                    u64 plaintext_len,  // Length of data in bytes for encryption.
--*/
--SYM_FUNC_START(aesni_gcm_enc_update)
--	FUNC_SAVE
--	GCM_ENC_DEC enc
--	FUNC_RESTORE
--	RET
--SYM_FUNC_END(aesni_gcm_enc_update)
--
--/*****************************************************************************
--* void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
--*                    struct gcm_context_data *data,
--*                                        // context data
--*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
--*                    const u8 *in,       // Plaintext input
--*                    u64 plaintext_len,  // Length of data in bytes for encryption.
--*/
--SYM_FUNC_START(aesni_gcm_dec_update)
--	FUNC_SAVE
--	GCM_ENC_DEC dec
--	FUNC_RESTORE
--	RET
--SYM_FUNC_END(aesni_gcm_dec_update)
--
--/*****************************************************************************
--* void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
--*                    struct gcm_context_data *data,
--*                                        // context data
--*                    u8 *auth_tag,       // Authenticated Tag output.
--*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
--*                                        // 12 or 8.
--*/
--SYM_FUNC_START(aesni_gcm_finalize)
--	FUNC_SAVE
--	GCM_COMPLETE %arg3 %arg4
--	FUNC_RESTORE
--	RET
--SYM_FUNC_END(aesni_gcm_finalize)
--
--#endif
--
- SYM_FUNC_START_LOCAL(_key_expansion_256a)
- 	pshufd $0b11111111, %xmm1, %xmm1
- 	shufps $0b00010000, %xmm0, %xmm4
-diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
-deleted file mode 100644
-index 8c9749ed0651..000000000000
---- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
-+++ /dev/null
-@@ -1,2804 +0,0 @@
--########################################################################
--# Copyright (c) 2013, Intel Corporation
--#
--# This software is available to you under a choice of one of two
--# licenses.  You may choose to be licensed under the terms of the GNU
--# General Public License (GPL) Version 2, available from the file
--# COPYING in the main directory of this source tree, or the
--# OpenIB.org BSD license below:
--#
--# Redistribution and use in source and binary forms, with or without
--# modification, are permitted provided that the following conditions are
--# met:
--#
--# * Redistributions of source code must retain the above copyright
--#   notice, this list of conditions and the following disclaimer.
--#
--# * Redistributions in binary form must reproduce the above copyright
--#   notice, this list of conditions and the following disclaimer in the
--#   documentation and/or other materials provided with the
--#   distribution.
--#
--# * Neither the name of the Intel Corporation nor the names of its
--#   contributors may be used to endorse or promote products derived from
--#   this software without specific prior written permission.
--#
--#
--# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
--# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
--# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
--# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
--# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
--# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
--# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR
--# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
--# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
--# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
--# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--########################################################################
--##
--## Authors:
--##	Erdinc Ozturk <erdinc.ozturk@intel.com>
--##	Vinodh Gopal <vinodh.gopal@intel.com>
--##	James Guilford <james.guilford@intel.com>
--##	Tim Chen <tim.c.chen@linux.intel.com>
--##
--## References:
--##       This code was derived and highly optimized from the code described in paper:
--##               Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
--##			on Intel Architecture Processors. August, 2010
--##       The details of the implementation is explained in:
--##               Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
--##			on Intel Architecture Processors. October, 2012.
--##
--## Assumptions:
--##
--##
--##
--## iv:
--##       0                   1                   2                   3
--##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                             Salt  (From the SA)               |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                     Initialization Vector                     |
--##       |         (This is the sequence number from IPSec header)       |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                              0x1                              |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##
--##
--##
--## AAD:
--##       AAD padded to 128 bits with 0
--##       for example, assume AAD is a u32 vector
--##
--##       if AAD is 8 bytes:
--##       AAD[3] = {A0, A1}#
--##       padded AAD in xmm register = {A1 A0 0 0}
--##
--##       0                   1                   2                   3
--##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                               SPI (A1)                        |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                     32-bit Sequence Number (A0)               |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                              0x0                              |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##
--##                                       AAD Format with 32-bit Sequence Number
--##
--##       if AAD is 12 bytes:
--##       AAD[3] = {A0, A1, A2}#
--##       padded AAD in xmm register = {A2 A1 A0 0}
--##
--##       0                   1                   2                   3
--##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                               SPI (A2)                        |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                 64-bit Extended Sequence Number {A1,A0}       |
--##       |                                                               |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##       |                              0x0                              |
--##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
--##
--##        AAD Format with 64-bit Extended Sequence Number
--##
--##
--## aadLen:
--##       from the definition of the spec, aadLen can only be 8 or 12 bytes.
--##	 The code additionally supports aadLen of length 16 bytes.
--##
--## TLen:
--##       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
--##
--## poly = x^128 + x^127 + x^126 + x^121 + 1
--## throughout the code, one tab and two tab indentations are used. one tab is
--## for GHASH part, two tabs is for AES part.
--##
--
--#include <linux/linkage.h>
--
--# constants in mergeable sections, linker can reorder and merge
--.section	.rodata.cst16.POLY, "aM", @progbits, 16
--.align 16
--POLY:            .octa     0xC2000000000000000000000000000001
--
--.section	.rodata.cst16.POLY2, "aM", @progbits, 16
--.align 16
--POLY2:           .octa     0xC20000000000000000000001C2000000
--
--.section	.rodata.cst16.TWOONE, "aM", @progbits, 16
--.align 16
--TWOONE:          .octa     0x00000001000000000000000000000001
--
--.section	.rodata.cst16.SHUF_MASK, "aM", @progbits, 16
--.align 16
--SHUF_MASK:       .octa     0x000102030405060708090A0B0C0D0E0F
--
--.section	.rodata.cst16.ONE, "aM", @progbits, 16
--.align 16
--ONE:             .octa     0x00000000000000000000000000000001
--
--.section	.rodata.cst16.ONEf, "aM", @progbits, 16
--.align 16
--ONEf:            .octa     0x01000000000000000000000000000000
--
--# order of these constants should not change.
--# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
--.section	.rodata, "a", @progbits
--.align 16
--SHIFT_MASK:      .octa     0x0f0e0d0c0b0a09080706050403020100
--ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
--                 .octa     0x00000000000000000000000000000000
--
--.text
--
--
--#define AadHash 16*0
--#define AadLen 16*1
--#define InLen (16*1)+8
--#define PBlockEncKey 16*2
--#define OrigIV 16*3
--#define CurCount 16*4
--#define PBlockLen 16*5
--
--HashKey        = 16*6   # store HashKey <<1 mod poly here
--HashKey_2      = 16*7   # store HashKey^2 <<1 mod poly here
--HashKey_3      = 16*8   # store HashKey^3 <<1 mod poly here
--HashKey_4      = 16*9   # store HashKey^4 <<1 mod poly here
--HashKey_5      = 16*10   # store HashKey^5 <<1 mod poly here
--HashKey_6      = 16*11   # store HashKey^6 <<1 mod poly here
--HashKey_7      = 16*12   # store HashKey^7 <<1 mod poly here
--HashKey_8      = 16*13   # store HashKey^8 <<1 mod poly here
--HashKey_k      = 16*14   # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
--HashKey_2_k    = 16*15   # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
--HashKey_3_k    = 16*16   # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
--HashKey_4_k    = 16*17   # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
--HashKey_5_k    = 16*18   # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
--HashKey_6_k    = 16*19   # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
--HashKey_7_k    = 16*20   # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
--HashKey_8_k    = 16*21   # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
--
--#define arg1 %rdi
--#define arg2 %rsi
--#define arg3 %rdx
--#define arg4 %rcx
--#define arg5 %r8
--#define arg6 %r9
--#define keysize 2*15*16(arg1)
--
--i = 0
--j = 0
--
--out_order = 0
--in_order = 1
--DEC = 0
--ENC = 1
--
--.macro define_reg r n
--reg_\r = %xmm\n
--.endm
--
--.macro setreg
--.altmacro
--define_reg i %i
--define_reg j %j
--.noaltmacro
--.endm
--
--TMP1 =   16*0    # Temporary storage for AAD
--TMP2 =   16*1    # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
--TMP3 =   16*2    # Temporary storage for AES State 3
--TMP4 =   16*3    # Temporary storage for AES State 4
--TMP5 =   16*4    # Temporary storage for AES State 5
--TMP6 =   16*5    # Temporary storage for AES State 6
--TMP7 =   16*6    # Temporary storage for AES State 7
--TMP8 =   16*7    # Temporary storage for AES State 8
--
--VARIABLE_OFFSET = 16*8
--
--################################
--# Utility Macros
--################################
--
--.macro FUNC_SAVE
--        push    %r12
--        push    %r13
--        push    %r15
--
--	push	%rbp
--	mov	%rsp, %rbp
--
--        sub     $VARIABLE_OFFSET, %rsp
--        and     $~63, %rsp                    # align rsp to 64 bytes
--.endm
--
--.macro FUNC_RESTORE
--        mov     %rbp, %rsp
--	pop	%rbp
--
--        pop     %r15
--        pop     %r13
--        pop     %r12
--.endm
--
--# Encryption of a single block
--.macro ENCRYPT_SINGLE_BLOCK REP XMM0
--                vpxor    (arg1), \XMM0, \XMM0
--               i = 1
--               setreg
--.rep \REP
--                vaesenc  16*i(arg1), \XMM0, \XMM0
--               i = (i+1)
--               setreg
--.endr
--                vaesenclast 16*i(arg1), \XMM0, \XMM0
--.endm
--
--# combined for GCM encrypt and decrypt functions
--# clobbering all xmm registers
--# clobbering r10, r11, r12, r13, r15, rax
--.macro  GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
--        vmovdqu AadHash(arg2), %xmm8
--        vmovdqu  HashKey(arg2), %xmm13      # xmm13 = HashKey
--        add arg5, InLen(arg2)
--
--        # initialize the data pointer offset as zero
--        xor     %r11d, %r11d
--
--        PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC
--        sub %r11, arg5
--
--        mov     arg5, %r13                  # save the number of bytes of plaintext/ciphertext
--        and     $-16, %r13                  # r13 = r13 - (r13 mod 16)
--
--        mov     %r13, %r12
--        shr     $4, %r12
--        and     $7, %r12
--        jz      .L_initial_num_blocks_is_0\@
--
--        cmp     $7, %r12
--        je      .L_initial_num_blocks_is_7\@
--        cmp     $6, %r12
--        je      .L_initial_num_blocks_is_6\@
--        cmp     $5, %r12
--        je      .L_initial_num_blocks_is_5\@
--        cmp     $4, %r12
--        je      .L_initial_num_blocks_is_4\@
--        cmp     $3, %r12
--        je      .L_initial_num_blocks_is_3\@
--        cmp     $2, %r12
--        je      .L_initial_num_blocks_is_2\@
--
--        jmp     .L_initial_num_blocks_is_1\@
--
--.L_initial_num_blocks_is_7\@:
--        \INITIAL_BLOCKS  \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*7, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_6\@:
--        \INITIAL_BLOCKS  \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*6, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_5\@:
--        \INITIAL_BLOCKS  \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*5, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_4\@:
--        \INITIAL_BLOCKS  \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*4, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_3\@:
--        \INITIAL_BLOCKS  \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*3, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_2\@:
--        \INITIAL_BLOCKS  \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*2, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_1\@:
--        \INITIAL_BLOCKS  \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--        sub     $16*1, %r13
--        jmp     .L_initial_blocks_encrypted\@
--
--.L_initial_num_blocks_is_0\@:
--        \INITIAL_BLOCKS  \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
--
--
--.L_initial_blocks_encrypted\@:
--        test    %r13, %r13
--        je      .L_zero_cipher_left\@
--
--        sub     $128, %r13
--        je      .L_eight_cipher_left\@
--
--
--
--
--        vmovd   %xmm9, %r15d
--        and     $255, %r15d
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--
--
--.L_encrypt_by_8_new\@:
--        cmp     $(255-8), %r15d
--        jg      .L_encrypt_by_8\@
--
--
--
--        add     $8, %r15b
--        \GHASH_8_ENCRYPT_8_PARALLEL      \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
--        add     $128, %r11
--        sub     $128, %r13
--        jne     .L_encrypt_by_8_new\@
--
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--        jmp     .L_eight_cipher_left\@
--
--.L_encrypt_by_8\@:
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--        add     $8, %r15b
--        \GHASH_8_ENCRYPT_8_PARALLEL      \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--        add     $128, %r11
--        sub     $128, %r13
--        jne     .L_encrypt_by_8_new\@
--
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--
--
--
--
--.L_eight_cipher_left\@:
--        \GHASH_LAST_8    %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
--
--
--.L_zero_cipher_left\@:
--        vmovdqu %xmm14, AadHash(arg2)
--        vmovdqu %xmm9, CurCount(arg2)
--
--        # check for 0 length
--        mov     arg5, %r13
--        and     $15, %r13                            # r13 = (arg5 mod 16)
--
--        je      .L_multiple_of_16_bytes\@
--
--        # handle the last <16 Byte block separately
--
--        mov %r13, PBlockLen(arg2)
--
--        vpaddd  ONE(%rip), %xmm9, %xmm9              # INCR CNT to get Yn
--        vmovdqu %xmm9, CurCount(arg2)
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--
--        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Yn)
--        vmovdqu %xmm9, PBlockEncKey(arg2)
--
--        cmp $16, arg5
--        jge .L_large_enough_update\@
--
--        lea (arg4,%r11,1), %r10
--        mov %r13, %r12
--
--        READ_PARTIAL_BLOCK %r10 %r12 %xmm1
--
--        lea     SHIFT_MASK+16(%rip), %r12
--        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
--						     # able to shift 16-r13 bytes (r13 is the
--	# number of bytes in plaintext mod 16)
--
--        jmp .L_final_ghash_mul\@
--
--.L_large_enough_update\@:
--        sub $16, %r11
--        add %r13, %r11
--
--        # receive the last <16 Byte block
--        vmovdqu	(arg4, %r11, 1), %xmm1
--
--        sub	%r13, %r11
--        add	$16, %r11
--
--        lea	SHIFT_MASK+16(%rip), %r12
--        # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
--        # (r13 is the number of bytes in plaintext mod 16)
--        sub	%r13, %r12
--        # get the appropriate shuffle mask
--        vmovdqu	(%r12), %xmm2
--        # shift right 16-r13 bytes
--        vpshufb  %xmm2, %xmm1, %xmm1
--
--.L_final_ghash_mul\@:
--        .if  \ENC_DEC ==  DEC
--        vmovdqa %xmm1, %xmm2
--        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
--        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
--						     # mask out top 16-r13 bytes of xmm9
--        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
--        vpand   %xmm1, %xmm2, %xmm2
--        vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
--        vpxor   %xmm2, %xmm14, %xmm14
--
--        vmovdqu %xmm14, AadHash(arg2)
--        .else
--        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
--        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
--						     # mask out top 16-r13 bytes of xmm9
--        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
--        vpxor   %xmm9, %xmm14, %xmm14
--
--        vmovdqu %xmm14, AadHash(arg2)
--        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9        # shuffle xmm9 back to output as ciphertext
--        .endif
--
--
--        #############################
--        # output r13 Bytes
--        vmovq   %xmm9, %rax
--        cmp     $8, %r13
--        jle     .L_less_than_8_bytes_left\@
--
--        mov     %rax, (arg3 , %r11)
--        add     $8, %r11
--        vpsrldq $8, %xmm9, %xmm9
--        vmovq   %xmm9, %rax
--        sub     $8, %r13
--
--.L_less_than_8_bytes_left\@:
--        movb    %al, (arg3 , %r11)
--        add     $1, %r11
--        shr     $8, %rax
--        sub     $1, %r13
--        jne     .L_less_than_8_bytes_left\@
--        #############################
--
--.L_multiple_of_16_bytes\@:
--.endm
--
--
--# GCM_COMPLETE Finishes update of tag of last partial block
--# Output: Authorization Tag (AUTH_TAG)
--# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
--.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN
--        vmovdqu AadHash(arg2), %xmm14
--        vmovdqu HashKey(arg2), %xmm13
--
--        mov PBlockLen(arg2), %r12
--        test %r12, %r12
--        je .L_partial_done\@
--
--	#GHASH computation for the last <16 Byte block
--        \GHASH_MUL       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
--
--.L_partial_done\@:
--        mov AadLen(arg2), %r12                          # r12 = aadLen (number of bytes)
--        shl     $3, %r12                             # convert into number of bits
--        vmovd   %r12d, %xmm15                        # len(A) in xmm15
--
--        mov InLen(arg2), %r12
--        shl     $3, %r12                        # len(C) in bits  (*128)
--        vmovq   %r12, %xmm1
--        vpslldq $8, %xmm15, %xmm15                   # xmm15 = len(A)|| 0x0000000000000000
--        vpxor   %xmm1, %xmm15, %xmm15                # xmm15 = len(A)||len(C)
--
--        vpxor   %xmm15, %xmm14, %xmm14
--        \GHASH_MUL       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6    # final GHASH computation
--        vpshufb SHUF_MASK(%rip), %xmm14, %xmm14      # perform a 16Byte swap
--
--        vmovdqu OrigIV(arg2), %xmm9
--
--        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Y0)
--
--        vpxor   %xmm14, %xmm9, %xmm9
--
--
--
--.L_return_T\@:
--        mov     \AUTH_TAG, %r10              # r10 = authTag
--        mov     \AUTH_TAG_LEN, %r11              # r11 = auth_tag_len
--
--        cmp     $16, %r11
--        je      .L_T_16\@
--
--        cmp     $8, %r11
--        jl      .L_T_4\@
--
--.L_T_8\@:
--        vmovq   %xmm9, %rax
--        mov     %rax, (%r10)
--        add     $8, %r10
--        sub     $8, %r11
--        vpsrldq $8, %xmm9, %xmm9
--        test    %r11, %r11
--        je     .L_return_T_done\@
--.L_T_4\@:
--        vmovd   %xmm9, %eax
--        mov     %eax, (%r10)
--        add     $4, %r10
--        sub     $4, %r11
--        vpsrldq     $4, %xmm9, %xmm9
--        test    %r11, %r11
--        je     .L_return_T_done\@
--.L_T_123\@:
--        vmovd     %xmm9, %eax
--        cmp     $2, %r11
--        jl     .L_T_1\@
--        mov     %ax, (%r10)
--        cmp     $2, %r11
--        je     .L_return_T_done\@
--        add     $2, %r10
--        sar     $16, %eax
--.L_T_1\@:
--        mov     %al, (%r10)
--        jmp     .L_return_T_done\@
--
--.L_T_16\@:
--        vmovdqu %xmm9, (%r10)
--
--.L_return_T_done\@:
--.endm
--
--.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8
--
--	mov     \AAD, %r10                      # r10 = AAD
--	mov     \AADLEN, %r12                      # r12 = aadLen
--
--
--	mov     %r12, %r11
--
--	vpxor   \T8, \T8, \T8
--	vpxor   \T7, \T7, \T7
--	cmp     $16, %r11
--	jl      .L_get_AAD_rest8\@
--.L_get_AAD_blocks\@:
--	vmovdqu (%r10), \T7
--	vpshufb SHUF_MASK(%rip), \T7, \T7
--	vpxor   \T7, \T8, \T8
--	\GHASH_MUL       \T8, \T2, \T1, \T3, \T4, \T5, \T6
--	add     $16, %r10
--	sub     $16, %r12
--	sub     $16, %r11
--	cmp     $16, %r11
--	jge     .L_get_AAD_blocks\@
--	vmovdqu \T8, \T7
--	test    %r11, %r11
--	je      .L_get_AAD_done\@
--
--	vpxor   \T7, \T7, \T7
--
--	/* read the last <16B of AAD. since we have at least 4B of
--	data right after the AAD (the ICV, and maybe some CT), we can
--	read 4B/8B blocks safely, and then get rid of the extra stuff */
--.L_get_AAD_rest8\@:
--	cmp     $4, %r11
--	jle     .L_get_AAD_rest4\@
--	movq    (%r10), \T1
--	add     $8, %r10
--	sub     $8, %r11
--	vpslldq $8, \T1, \T1
--	vpsrldq $8, \T7, \T7
--	vpxor   \T1, \T7, \T7
--	jmp     .L_get_AAD_rest8\@
--.L_get_AAD_rest4\@:
--	test    %r11, %r11
--	jle     .L_get_AAD_rest0\@
--	mov     (%r10), %eax
--	movq    %rax, \T1
--	add     $4, %r10
--	sub     $4, %r11
--	vpslldq $12, \T1, \T1
--	vpsrldq $4, \T7, \T7
--	vpxor   \T1, \T7, \T7
--.L_get_AAD_rest0\@:
--	/* finalize: shift out the extra bytes we read, and align
--	left. since pslldq can only shift by an immediate, we use
--	vpshufb and a pair of shuffle masks */
--	leaq	ALL_F(%rip), %r11
--	subq	%r12, %r11
--	vmovdqu	16(%r11), \T1
--	andq	$~3, %r11
--	vpshufb (%r11), \T7, \T7
--	vpand	\T1, \T7, \T7
--.L_get_AAD_rest_final\@:
--	vpshufb SHUF_MASK(%rip), \T7, \T7
--	vpxor   \T8, \T7, \T7
--	\GHASH_MUL       \T7, \T2, \T1, \T3, \T4, \T5, \T6
--
--.L_get_AAD_done\@:
--        vmovdqu \T7, AadHash(arg2)
--.endm
--
--.macro INIT GHASH_MUL PRECOMPUTE
--        mov arg6, %r11
--        mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length
--        xor %r11d, %r11d
--        mov %r11, InLen(arg2) # ctx_data.in_length = 0
--
--        mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0
--        mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0
--        mov arg3, %rax
--        movdqu (%rax), %xmm0
--        movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv
--
--        vpshufb SHUF_MASK(%rip), %xmm0, %xmm0
--        movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv
--
--        vmovdqu  (arg4), %xmm6              # xmm6 = HashKey
--
--        vpshufb  SHUF_MASK(%rip), %xmm6, %xmm6
--        ###############  PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
--        vmovdqa  %xmm6, %xmm2
--        vpsllq   $1, %xmm6, %xmm6
--        vpsrlq   $63, %xmm2, %xmm2
--        vmovdqa  %xmm2, %xmm1
--        vpslldq  $8, %xmm2, %xmm2
--        vpsrldq  $8, %xmm1, %xmm1
--        vpor     %xmm2, %xmm6, %xmm6
--        #reduction
--        vpshufd  $0b00100100, %xmm1, %xmm2
--        vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
--        vpand    POLY(%rip), %xmm2, %xmm2
--        vpxor    %xmm2, %xmm6, %xmm6        # xmm6 holds the HashKey<<1 mod poly
--        #######################################################################
--        vmovdqu  %xmm6, HashKey(arg2)       # store HashKey<<1 mod poly
--
--        CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0
--
--        \PRECOMPUTE  %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
--.endm
--
--
--# Reads DLEN bytes starting at DPTR and stores in XMMDst
--# where 0 < DLEN < 16
--# Clobbers %rax, DLEN
--.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
--        vpxor \XMMDst, \XMMDst, \XMMDst
--
--        cmp $8, \DLEN
--        jl .L_read_lt8_\@
--        mov (\DPTR), %rax
--        vpinsrq $0, %rax, \XMMDst, \XMMDst
--        sub $8, \DLEN
--        jz .L_done_read_partial_block_\@
--        xor %eax, %eax
--.L_read_next_byte_\@:
--        shl $8, %rax
--        mov 7(\DPTR, \DLEN, 1), %al
--        dec \DLEN
--        jnz .L_read_next_byte_\@
--        vpinsrq $1, %rax, \XMMDst, \XMMDst
--        jmp .L_done_read_partial_block_\@
--.L_read_lt8_\@:
--        xor %eax, %eax
--.L_read_next_byte_lt8_\@:
--        shl $8, %rax
--        mov -1(\DPTR, \DLEN, 1), %al
--        dec \DLEN
--        jnz .L_read_next_byte_lt8_\@
--        vpinsrq $0, %rax, \XMMDst, \XMMDst
--.L_done_read_partial_block_\@:
--.endm
--
--# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
--# between update calls.
--# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
--# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
--# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
--.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
--        AAD_HASH ENC_DEC
--        mov 	PBlockLen(arg2), %r13
--        test	%r13, %r13
--        je	.L_partial_block_done_\@	# Leave Macro if no partial blocks
--        # Read in input data without over reading
--        cmp	$16, \PLAIN_CYPH_LEN
--        jl	.L_fewer_than_16_bytes_\@
--        vmovdqu	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
--        jmp	.L_data_read_\@
--
--.L_fewer_than_16_bytes_\@:
--        lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
--        mov	\PLAIN_CYPH_LEN, %r12
--        READ_PARTIAL_BLOCK %r10 %r12 %xmm1
--
--        mov PBlockLen(arg2), %r13
--
--.L_data_read_\@:				# Finished reading in data
--
--        vmovdqu	PBlockEncKey(arg2), %xmm9
--        vmovdqu	HashKey(arg2), %xmm13
--
--        lea	SHIFT_MASK(%rip), %r12
--
--        # adjust the shuffle mask pointer to be able to shift r13 bytes
--        # r16-r13 is the number of bytes in plaintext mod 16)
--        add	%r13, %r12
--        vmovdqu	(%r12), %xmm2		# get the appropriate shuffle mask
--        vpshufb %xmm2, %xmm9, %xmm9		# shift right r13 bytes
--
--.if  \ENC_DEC ==  DEC
--        vmovdqa	%xmm1, %xmm3
--        pxor	%xmm1, %xmm9		# Ciphertext XOR E(K, Yn)
--
--        mov	\PLAIN_CYPH_LEN, %r10
--        add	%r13, %r10
--        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
--        sub	$16, %r10
--        # Determine if partial block is not being filled and
--        # shift mask accordingly
--        jge	.L_no_extra_mask_1_\@
--        sub	%r10, %r12
--.L_no_extra_mask_1_\@:
--
--        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
--        # get the appropriate mask to mask out bottom r13 bytes of xmm9
--        vpand	%xmm1, %xmm9, %xmm9		# mask out bottom r13 bytes of xmm9
--
--        vpand	%xmm1, %xmm3, %xmm3
--        vmovdqa	SHUF_MASK(%rip), %xmm10
--        vpshufb	%xmm10, %xmm3, %xmm3
--        vpshufb	%xmm2, %xmm3, %xmm3
--        vpxor	%xmm3, \AAD_HASH, \AAD_HASH
--
--        test	%r10, %r10
--        jl	.L_partial_incomplete_1_\@
--
--        # GHASH computation for the last <16 Byte block
--        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
--        xor	%eax,%eax
--
--        mov	%rax, PBlockLen(arg2)
--        jmp	.L_dec_done_\@
--.L_partial_incomplete_1_\@:
--        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
--.L_dec_done_\@:
--        vmovdqu	\AAD_HASH, AadHash(arg2)
--.else
--        vpxor	%xmm1, %xmm9, %xmm9			# Plaintext XOR E(K, Yn)
--
--        mov	\PLAIN_CYPH_LEN, %r10
--        add	%r13, %r10
--        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
--        sub	$16, %r10
--        # Determine if partial block is not being filled and
--        # shift mask accordingly
--        jge	.L_no_extra_mask_2_\@
--        sub	%r10, %r12
--.L_no_extra_mask_2_\@:
--
--        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
--        # get the appropriate mask to mask out bottom r13 bytes of xmm9
--        vpand	%xmm1, %xmm9, %xmm9
--
--        vmovdqa	SHUF_MASK(%rip), %xmm1
--        vpshufb %xmm1, %xmm9, %xmm9
--        vpshufb %xmm2, %xmm9, %xmm9
--        vpxor	%xmm9, \AAD_HASH, \AAD_HASH
--
--        test	%r10, %r10
--        jl	.L_partial_incomplete_2_\@
--
--        # GHASH computation for the last <16 Byte block
--        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
--        xor	%eax,%eax
--
--        mov	%rax, PBlockLen(arg2)
--        jmp	.L_encode_done_\@
--.L_partial_incomplete_2_\@:
--        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
--.L_encode_done_\@:
--        vmovdqu	\AAD_HASH, AadHash(arg2)
--
--        vmovdqa	SHUF_MASK(%rip), %xmm10
--        # shuffle xmm9 back to output as ciphertext
--        vpshufb	%xmm10, %xmm9, %xmm9
--        vpshufb	%xmm2, %xmm9, %xmm9
--.endif
--        # output encrypted Bytes
--        test	%r10, %r10
--        jl	.L_partial_fill_\@
--        mov	%r13, %r12
--        mov	$16, %r13
--        # Set r13 to be the number of bytes to write out
--        sub	%r12, %r13
--        jmp	.L_count_set_\@
--.L_partial_fill_\@:
--        mov	\PLAIN_CYPH_LEN, %r13
--.L_count_set_\@:
--        vmovdqa	%xmm9, %xmm0
--        vmovq	%xmm0, %rax
--        cmp	$8, %r13
--        jle	.L_less_than_8_bytes_left_\@
--
--        mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
--        add	$8, \DATA_OFFSET
--        psrldq	$8, %xmm0
--        vmovq	%xmm0, %rax
--        sub	$8, %r13
--.L_less_than_8_bytes_left_\@:
--        movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
--        add	$1, \DATA_OFFSET
--        shr	$8, %rax
--        sub	$1, %r13
--        jne	.L_less_than_8_bytes_left_\@
--.L_partial_block_done_\@:
--.endm # PARTIAL_BLOCK
--
--###############################################################################
--# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
--# Input: A and B (128-bits each, bit-reflected)
--# Output: C = A*B*x mod poly, (i.e. >>1 )
--# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
--# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
--###############################################################################
--.macro  GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5
--
--        vpshufd         $0b01001110, \GH, \T2
--        vpshufd         $0b01001110, \HK, \T3
--        vpxor           \GH     , \T2, \T2      # T2 = (a1+a0)
--        vpxor           \HK     , \T3, \T3      # T3 = (b1+b0)
--
--        vpclmulqdq      $0x11, \HK, \GH, \T1    # T1 = a1*b1
--        vpclmulqdq      $0x00, \HK, \GH, \GH    # GH = a0*b0
--        vpclmulqdq      $0x00, \T3, \T2, \T2    # T2 = (a1+a0)*(b1+b0)
--        vpxor           \GH, \T2,\T2
--        vpxor           \T1, \T2,\T2            # T2 = a0*b1+a1*b0
--
--        vpslldq         $8, \T2,\T3             # shift-L T3 2 DWs
--        vpsrldq         $8, \T2,\T2             # shift-R T2 2 DWs
--        vpxor           \T3, \GH, \GH
--        vpxor           \T2, \T1, \T1           # <T1:GH> = GH x HK
--
--        #first phase of the reduction
--        vpslld  $31, \GH, \T2                   # packed right shifting << 31
--        vpslld  $30, \GH, \T3                   # packed right shifting shift << 30
--        vpslld  $25, \GH, \T4                   # packed right shifting shift << 25
--
--        vpxor   \T3, \T2, \T2                   # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpsrldq $4, \T2, \T5                    # shift-R T5 1 DW
--
--        vpslldq $12, \T2, \T2                   # shift-L T2 3 DWs
--        vpxor   \T2, \GH, \GH                   # first phase of the reduction complete
--
--        #second phase of the reduction
--
--        vpsrld  $1,\GH, \T2                     # packed left shifting >> 1
--        vpsrld  $2,\GH, \T3                     # packed left shifting >> 2
--        vpsrld  $7,\GH, \T4                     # packed left shifting >> 7
--        vpxor   \T3, \T2, \T2                   # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpxor   \T5, \T2, \T2
--        vpxor   \T2, \GH, \GH
--        vpxor   \T1, \GH, \GH                   # the result is in GH
--
--
--.endm
--
--.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6
--
--        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
--        vmovdqa  \HK, \T5
--
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^2<<1 mod poly
--        vmovdqu  \T5, HashKey_2(arg2)                    #  [HashKey_2] = HashKey^2<<1 mod poly
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_2_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^3<<1 mod poly
--        vmovdqu  \T5, HashKey_3(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_3_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^4<<1 mod poly
--        vmovdqu  \T5, HashKey_4(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_4_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^5<<1 mod poly
--        vmovdqu  \T5, HashKey_5(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_5_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^6<<1 mod poly
--        vmovdqu  \T5, HashKey_6(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_6_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^7<<1 mod poly
--        vmovdqu  \T5, HashKey_7(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_7_k(arg2)
--
--        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^8<<1 mod poly
--        vmovdqu  \T5, HashKey_8(arg2)
--        vpshufd  $0b01001110, \T5, \T1
--        vpxor    \T5, \T1, \T1
--        vmovdqu  \T1, HashKey_8_k(arg2)
--
--.endm
--
--## if a = number of total plaintext bytes
--## b = floor(a/16)
--## num_initial_blocks = b mod 4#
--## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
--## r10, r11, r12, rax are clobbered
--## arg1, arg2, arg3, arg4 are used as pointers only, not modified
--
--.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
--	i = (8-\num_initial_blocks)
--	setreg
--        vmovdqu AadHash(arg2), reg_i
--
--	# start AES for num_initial_blocks blocks
--	vmovdqu CurCount(arg2), \CTR
--
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vpaddd  ONE(%rip), \CTR, \CTR		# INCR Y0
--                vmovdqa \CTR, reg_i
--                vpshufb SHUF_MASK(%rip), reg_i, reg_i   # perform a 16Byte swap
--	i = (i+1)
--	setreg
--.endr
--
--	vmovdqa  (arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vpxor   \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--       j = 1
--       setreg
--.rep \REP
--       vmovdqa  16*j(arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--        vaesenc \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--       j = (j+1)
--       setreg
--.endr
--
--	vmovdqa  16*j(arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--        vaesenclast      \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vmovdqu (arg4, %r11), \T1
--                vpxor   \T1, reg_i, reg_i
--                vmovdqu reg_i, (arg3 , %r11)           # write back ciphertext for num_initial_blocks blocks
--                add     $16, %r11
--.if  \ENC_DEC == DEC
--                vmovdqa \T1, reg_i
--.endif
--                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
--	i = (i+1)
--	setreg
--.endr
--
--
--	i = (8-\num_initial_blocks)
--	j = (9-\num_initial_blocks)
--	setreg
--
--.rep \num_initial_blocks
--        vpxor    reg_i, reg_j, reg_j
--        GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
--	i = (i+1)
--	j = (j+1)
--	setreg
--.endr
--        # XMM8 has the combined result here
--
--        vmovdqa  \XMM8, TMP1(%rsp)
--        vmovdqa  \XMM8, \T3
--
--        cmp     $128, %r13
--        jl      .L_initial_blocks_done\@                  # no need for precomputed constants
--
--###############################################################################
--# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM1
--                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM2
--                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM3
--                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM4
--                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM5
--                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM6
--                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM7
--                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM8
--                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
--
--                vmovdqa  (arg1), \T_key
--                vpxor    \T_key, \XMM1, \XMM1
--                vpxor    \T_key, \XMM2, \XMM2
--                vpxor    \T_key, \XMM3, \XMM3
--                vpxor    \T_key, \XMM4, \XMM4
--                vpxor    \T_key, \XMM5, \XMM5
--                vpxor    \T_key, \XMM6, \XMM6
--                vpxor    \T_key, \XMM7, \XMM7
--                vpxor    \T_key, \XMM8, \XMM8
--
--               i = 1
--               setreg
--.rep    \REP       # do REP rounds
--                vmovdqa  16*i(arg1), \T_key
--                vaesenc  \T_key, \XMM1, \XMM1
--                vaesenc  \T_key, \XMM2, \XMM2
--                vaesenc  \T_key, \XMM3, \XMM3
--                vaesenc  \T_key, \XMM4, \XMM4
--                vaesenc  \T_key, \XMM5, \XMM5
--                vaesenc  \T_key, \XMM6, \XMM6
--                vaesenc  \T_key, \XMM7, \XMM7
--                vaesenc  \T_key, \XMM8, \XMM8
--               i = (i+1)
--               setreg
--.endr
--
--                vmovdqa  16*i(arg1), \T_key
--                vaesenclast  \T_key, \XMM1, \XMM1
--                vaesenclast  \T_key, \XMM2, \XMM2
--                vaesenclast  \T_key, \XMM3, \XMM3
--                vaesenclast  \T_key, \XMM4, \XMM4
--                vaesenclast  \T_key, \XMM5, \XMM5
--                vaesenclast  \T_key, \XMM6, \XMM6
--                vaesenclast  \T_key, \XMM7, \XMM7
--                vaesenclast  \T_key, \XMM8, \XMM8
--
--                vmovdqu  (arg4, %r11), \T1
--                vpxor    \T1, \XMM1, \XMM1
--                vmovdqu  \XMM1, (arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM1
--                .endif
--
--                vmovdqu  16*1(arg4, %r11), \T1
--                vpxor    \T1, \XMM2, \XMM2
--                vmovdqu  \XMM2, 16*1(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM2
--                .endif
--
--                vmovdqu  16*2(arg4, %r11), \T1
--                vpxor    \T1, \XMM3, \XMM3
--                vmovdqu  \XMM3, 16*2(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM3
--                .endif
--
--                vmovdqu  16*3(arg4, %r11), \T1
--                vpxor    \T1, \XMM4, \XMM4
--                vmovdqu  \XMM4, 16*3(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM4
--                .endif
--
--                vmovdqu  16*4(arg4, %r11), \T1
--                vpxor    \T1, \XMM5, \XMM5
--                vmovdqu  \XMM5, 16*4(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM5
--                .endif
--
--                vmovdqu  16*5(arg4, %r11), \T1
--                vpxor    \T1, \XMM6, \XMM6
--                vmovdqu  \XMM6, 16*5(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM6
--                .endif
--
--                vmovdqu  16*6(arg4, %r11), \T1
--                vpxor    \T1, \XMM7, \XMM7
--                vmovdqu  \XMM7, 16*6(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM7
--                .endif
--
--                vmovdqu  16*7(arg4, %r11), \T1
--                vpxor    \T1, \XMM8, \XMM8
--                vmovdqu  \XMM8, 16*7(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM8
--                .endif
--
--                add     $128, %r11
--
--                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
--                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with the corresponding ciphertext
--                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
--
--###############################################################################
--
--.L_initial_blocks_done\@:
--
--.endm
--
--# encrypt 8 blocks at a time
--# ghash the 8 previously encrypted ciphertext blocks
--# arg1, arg2, arg3, arg4 are used as pointers only, not modified
--# r11 is the data offset value
--.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
--
--        vmovdqa \XMM1, \T2
--        vmovdqa \XMM2, TMP2(%rsp)
--        vmovdqa \XMM3, TMP3(%rsp)
--        vmovdqa \XMM4, TMP4(%rsp)
--        vmovdqa \XMM5, TMP5(%rsp)
--        vmovdqa \XMM6, TMP6(%rsp)
--        vmovdqa \XMM7, TMP7(%rsp)
--        vmovdqa \XMM8, TMP8(%rsp)
--
--.if \loop_idx == in_order
--                vpaddd  ONE(%rip), \CTR, \XMM1           # INCR CNT
--                vpaddd  ONE(%rip), \XMM1, \XMM2
--                vpaddd  ONE(%rip), \XMM2, \XMM3
--                vpaddd  ONE(%rip), \XMM3, \XMM4
--                vpaddd  ONE(%rip), \XMM4, \XMM5
--                vpaddd  ONE(%rip), \XMM5, \XMM6
--                vpaddd  ONE(%rip), \XMM6, \XMM7
--                vpaddd  ONE(%rip), \XMM7, \XMM8
--                vmovdqa \XMM8, \CTR
--
--                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7    # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8    # perform a 16Byte swap
--.else
--                vpaddd  ONEf(%rip), \CTR, \XMM1           # INCR CNT
--                vpaddd  ONEf(%rip), \XMM1, \XMM2
--                vpaddd  ONEf(%rip), \XMM2, \XMM3
--                vpaddd  ONEf(%rip), \XMM3, \XMM4
--                vpaddd  ONEf(%rip), \XMM4, \XMM5
--                vpaddd  ONEf(%rip), \XMM5, \XMM6
--                vpaddd  ONEf(%rip), \XMM6, \XMM7
--                vpaddd  ONEf(%rip), \XMM7, \XMM8
--                vmovdqa \XMM8, \CTR
--.endif
--
--
--        #######################################################################
--
--                vmovdqu (arg1), \T1
--                vpxor   \T1, \XMM1, \XMM1
--                vpxor   \T1, \XMM2, \XMM2
--                vpxor   \T1, \XMM3, \XMM3
--                vpxor   \T1, \XMM4, \XMM4
--                vpxor   \T1, \XMM5, \XMM5
--                vpxor   \T1, \XMM6, \XMM6
--                vpxor   \T1, \XMM7, \XMM7
--                vpxor   \T1, \XMM8, \XMM8
--
--        #######################################################################
--
--
--
--
--
--                vmovdqu 16*1(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--                vmovdqu 16*2(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--
--        #######################################################################
--
--        vmovdqu         HashKey_8(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T2, \T4             # T4 = a1*b1
--        vpclmulqdq      $0x00, \T5, \T2, \T7             # T7 = a0*b0
--
--        vpshufd         $0b01001110, \T2, \T6
--        vpxor           \T2, \T6, \T6
--
--        vmovdqu         HashKey_8_k(arg2), \T5
--        vpclmulqdq      $0x00, \T5, \T6, \T6
--
--                vmovdqu 16*3(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP2(%rsp), \T1
--        vmovdqu         HashKey_7(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_7_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*4(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        #######################################################################
--
--        vmovdqa         TMP3(%rsp), \T1
--        vmovdqu         HashKey_6(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_6_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*5(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP4(%rsp), \T1
--        vmovdqu         HashKey_5(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_5_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*6(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--
--        vmovdqa         TMP5(%rsp), \T1
--        vmovdqu         HashKey_4(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_4_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*7(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP6(%rsp), \T1
--        vmovdqu         HashKey_3(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_3_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--
--                vmovdqu 16*8(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP7(%rsp), \T1
--        vmovdqu         HashKey_2(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_2_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--        #######################################################################
--
--                vmovdqu 16*9(arg1), \T5
--                vaesenc \T5, \XMM1, \XMM1
--                vaesenc \T5, \XMM2, \XMM2
--                vaesenc \T5, \XMM3, \XMM3
--                vaesenc \T5, \XMM4, \XMM4
--                vaesenc \T5, \XMM5, \XMM5
--                vaesenc \T5, \XMM6, \XMM6
--                vaesenc \T5, \XMM7, \XMM7
--                vaesenc \T5, \XMM8, \XMM8
--
--        vmovdqa         TMP8(%rsp), \T1
--        vmovdqu         HashKey(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpshufd         $0b01001110, \T1, \T3
--        vpxor           \T1, \T3, \T3
--        vmovdqu         HashKey_k(arg2), \T5
--        vpclmulqdq      $0x10, \T5, \T3, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpxor           \T4, \T6, \T6
--        vpxor           \T7, \T6, \T6
--
--                vmovdqu 16*10(arg1), \T5
--
--        i = 11
--        setreg
--.rep (\REP-9)
--
--        vaesenc \T5, \XMM1, \XMM1
--        vaesenc \T5, \XMM2, \XMM2
--        vaesenc \T5, \XMM3, \XMM3
--        vaesenc \T5, \XMM4, \XMM4
--        vaesenc \T5, \XMM5, \XMM5
--        vaesenc \T5, \XMM6, \XMM6
--        vaesenc \T5, \XMM7, \XMM7
--        vaesenc \T5, \XMM8, \XMM8
--
--        vmovdqu 16*i(arg1), \T5
--        i = i + 1
--        setreg
--.endr
--
--	i = 0
--	j = 1
--	setreg
--.rep 8
--		vpxor	16*i(arg4, %r11), \T5, \T2
--                .if \ENC_DEC == ENC
--                vaesenclast     \T2, reg_j, reg_j
--                .else
--                vaesenclast     \T2, reg_j, \T3
--                vmovdqu 16*i(arg4, %r11), reg_j
--                vmovdqu \T3, 16*i(arg3, %r11)
--                .endif
--	i = (i+1)
--	j = (j+1)
--	setreg
--.endr
--	#######################################################################
--
--
--	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
--	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
--	vpxor	\T3, \T7, \T7
--	vpxor	\T4, \T6, \T6				# accumulate the results in T6:T7
--
--
--
--	#######################################################################
--	#first phase of the reduction
--	#######################################################################
--        vpslld  $31, \T7, \T2                           # packed right shifting << 31
--        vpslld  $30, \T7, \T3                           # packed right shifting shift << 30
--        vpslld  $25, \T7, \T4                           # packed right shifting shift << 25
--
--        vpxor   \T3, \T2, \T2                           # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpsrldq $4, \T2, \T1                            # shift-R T1 1 DW
--
--        vpslldq $12, \T2, \T2                           # shift-L T2 3 DWs
--        vpxor   \T2, \T7, \T7                           # first phase of the reduction complete
--	#######################################################################
--                .if \ENC_DEC == ENC
--		vmovdqu	 \XMM1,	16*0(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM2,	16*1(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM3,	16*2(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM4,	16*3(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM5,	16*4(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM6,	16*5(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM7,	16*6(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM8,	16*7(arg3,%r11)		# Write to the Ciphertext buffer
--                .endif
--
--	#######################################################################
--	#second phase of the reduction
--        vpsrld  $1, \T7, \T2                            # packed left shifting >> 1
--        vpsrld  $2, \T7, \T3                            # packed left shifting >> 2
--        vpsrld  $7, \T7, \T4                            # packed left shifting >> 7
--        vpxor   \T3, \T2, \T2                           # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpxor   \T1, \T2, \T2
--        vpxor   \T2, \T7, \T7
--        vpxor   \T7, \T6, \T6                           # the result is in T6
--	#######################################################################
--
--		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
--
--
--	vpxor	\T6, \XMM1, \XMM1
--
--
--
--.endm
--
--
--# GHASH the last 4 ciphertext blocks.
--.macro  GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
--
--        ## Karatsuba Method
--
--
--        vpshufd         $0b01001110, \XMM1, \T2
--        vpxor           \XMM1, \T2, \T2
--        vmovdqu         HashKey_8(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM1, \T6
--        vpclmulqdq      $0x00, \T5, \XMM1, \T7
--
--        vmovdqu         HashKey_8_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM2, \T2
--        vpxor           \XMM2, \T2, \T2
--        vmovdqu         HashKey_7(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM2, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM2, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_7_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM3, \T2
--        vpxor           \XMM3, \T2, \T2
--        vmovdqu         HashKey_6(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM3, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM3, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_6_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM4, \T2
--        vpxor           \XMM4, \T2, \T2
--        vmovdqu         HashKey_5(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM4, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM4, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_5_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM5, \T2
--        vpxor           \XMM5, \T2, \T2
--        vmovdqu         HashKey_4(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM5, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM5, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_4_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM6, \T2
--        vpxor           \XMM6, \T2, \T2
--        vmovdqu         HashKey_3(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM6, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM6, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_3_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM7, \T2
--        vpxor           \XMM7, \T2, \T2
--        vmovdqu         HashKey_2(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM7, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM7, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_2_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vpshufd         $0b01001110, \XMM8, \T2
--        vpxor           \XMM8, \T2, \T2
--        vmovdqu         HashKey(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \XMM8, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM8, \T4
--        vpxor           \T4, \T7, \T7
--
--        vmovdqu         HashKey_k(arg2), \T3
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--        vpxor           \T6, \XMM1, \XMM1
--        vpxor           \T7, \XMM1, \T2
--
--
--
--
--        vpslldq $8, \T2, \T4
--        vpsrldq $8, \T2, \T2
--
--        vpxor   \T4, \T7, \T7
--        vpxor   \T2, \T6, \T6   # <T6:T7> holds the result of
--				# the accumulated carry-less multiplications
--
--        #######################################################################
--        #first phase of the reduction
--        vpslld  $31, \T7, \T2   # packed right shifting << 31
--        vpslld  $30, \T7, \T3   # packed right shifting shift << 30
--        vpslld  $25, \T7, \T4   # packed right shifting shift << 25
--
--        vpxor   \T3, \T2, \T2   # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpsrldq $4, \T2, \T1    # shift-R T1 1 DW
--
--        vpslldq $12, \T2, \T2   # shift-L T2 3 DWs
--        vpxor   \T2, \T7, \T7   # first phase of the reduction complete
--        #######################################################################
--
--
--        #second phase of the reduction
--        vpsrld  $1, \T7, \T2    # packed left shifting >> 1
--        vpsrld  $2, \T7, \T3    # packed left shifting >> 2
--        vpsrld  $7, \T7, \T4    # packed left shifting >> 7
--        vpxor   \T3, \T2, \T2   # xor the shifted versions
--        vpxor   \T4, \T2, \T2
--
--        vpxor   \T1, \T2, \T2
--        vpxor   \T2, \T7, \T7
--        vpxor   \T7, \T6, \T6   # the result is in T6
--
--.endm
--
--#############################################################
--#void   aesni_gcm_precomp_avx_gen2
--#        (gcm_data     *my_ctx_data,
--#         gcm_context_data *data,
--#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
--#        u8      *iv, /* Pre-counter block j0: 4 byte salt
--#			(from Security Association) concatenated with 8 byte
--#			Initialisation Vector (from IPSec ESP Payload)
--#			concatenated with 0x00000001. 16-byte aligned pointer. */
--#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
--#        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
--#############################################################
--SYM_FUNC_START(aesni_gcm_init_avx_gen2)
--        FUNC_SAVE
--        INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_init_avx_gen2)
--
--###############################################################################
--#void   aesni_gcm_enc_update_avx_gen2(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
--#        const   u8 *in, /* Plaintext input */
--#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2)
--        FUNC_SAVE
--        mov     keysize, %eax
--        cmp     $32, %eax
--        je      key_256_enc_update
--        cmp     $16, %eax
--        je      key_128_enc_update
--        # must be 192
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
--        FUNC_RESTORE
--        RET
--key_128_enc_update:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
--        FUNC_RESTORE
--        RET
--key_256_enc_update:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
--
--###############################################################################
--#void   aesni_gcm_dec_update_avx_gen2(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
--#        const   u8 *in, /* Ciphertext input */
--#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2)
--        FUNC_SAVE
--        mov     keysize,%eax
--        cmp     $32, %eax
--        je      key_256_dec_update
--        cmp     $16, %eax
--        je      key_128_dec_update
--        # must be 192
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
--        FUNC_RESTORE
--        RET
--key_128_dec_update:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
--        FUNC_RESTORE
--        RET
--key_256_dec_update:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
--
--###############################################################################
--#void   aesni_gcm_finalize_avx_gen2(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *auth_tag, /* Authenticated Tag output. */
--#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
--#				Valid values are 16 (most likely), 12 or 8. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_finalize_avx_gen2)
--        FUNC_SAVE
--        mov	keysize,%eax
--        cmp     $32, %eax
--        je      key_256_finalize
--        cmp     $16, %eax
--        je      key_128_finalize
--        # must be 192
--        GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
--        FUNC_RESTORE
--        RET
--key_128_finalize:
--        GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
--        FUNC_RESTORE
--        RET
--key_256_finalize:
--        GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
--
--###############################################################################
--# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
--# Input: A and B (128-bits each, bit-reflected)
--# Output: C = A*B*x mod poly, (i.e. >>1 )
--# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
--# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
--###############################################################################
--.macro  GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5
--
--        vpclmulqdq      $0x11,\HK,\GH,\T1      # T1 = a1*b1
--        vpclmulqdq      $0x00,\HK,\GH,\T2      # T2 = a0*b0
--        vpclmulqdq      $0x01,\HK,\GH,\T3      # T3 = a1*b0
--        vpclmulqdq      $0x10,\HK,\GH,\GH      # GH = a0*b1
--        vpxor           \T3, \GH, \GH
--
--
--        vpsrldq         $8 , \GH, \T3          # shift-R GH 2 DWs
--        vpslldq         $8 , \GH, \GH          # shift-L GH 2 DWs
--
--        vpxor           \T3, \T1, \T1
--        vpxor           \T2, \GH, \GH
--
--        #######################################################################
--        #first phase of the reduction
--        vmovdqa         POLY2(%rip), \T3
--
--        vpclmulqdq      $0x01, \GH, \T3, \T2
--        vpslldq         $8, \T2, \T2           # shift-L T2 2 DWs
--
--        vpxor           \T2, \GH, \GH          # first phase of the reduction complete
--        #######################################################################
--        #second phase of the reduction
--        vpclmulqdq      $0x00, \GH, \T3, \T2
--        vpsrldq         $4, \T2, \T2           # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
--
--        vpclmulqdq      $0x10, \GH, \T3, \GH
--        vpslldq         $4, \GH, \GH           # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts)
--
--        vpxor           \T2, \GH, \GH          # second phase of the reduction complete
--        #######################################################################
--        vpxor           \T1, \GH, \GH          # the result is in GH
--
--
--.endm
--
--.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6
--
--        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
--        vmovdqa  \HK, \T5
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^2<<1 mod poly
--        vmovdqu  \T5, HashKey_2(arg2)                       #  [HashKey_2] = HashKey^2<<1 mod poly
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^3<<1 mod poly
--        vmovdqu  \T5, HashKey_3(arg2)
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^4<<1 mod poly
--        vmovdqu  \T5, HashKey_4(arg2)
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^5<<1 mod poly
--        vmovdqu  \T5, HashKey_5(arg2)
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^6<<1 mod poly
--        vmovdqu  \T5, HashKey_6(arg2)
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^7<<1 mod poly
--        vmovdqu  \T5, HashKey_7(arg2)
--
--        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^8<<1 mod poly
--        vmovdqu  \T5, HashKey_8(arg2)
--
--.endm
--
--## if a = number of total plaintext bytes
--## b = floor(a/16)
--## num_initial_blocks = b mod 4#
--## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
--## r10, r11, r12, rax are clobbered
--## arg1, arg2, arg3, arg4 are used as pointers only, not modified
--
--.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
--	i = (8-\num_initial_blocks)
--	setreg
--	vmovdqu AadHash(arg2), reg_i
--
--	# start AES for num_initial_blocks blocks
--	vmovdqu CurCount(arg2), \CTR
--
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vpaddd  ONE(%rip), \CTR, \CTR   # INCR Y0
--                vmovdqa \CTR, reg_i
--                vpshufb SHUF_MASK(%rip), reg_i, reg_i     # perform a 16Byte swap
--	i = (i+1)
--	setreg
--.endr
--
--	vmovdqa  (arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vpxor   \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--	j = 1
--	setreg
--.rep \REP
--	vmovdqa  16*j(arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--        vaesenc \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--	j = (j+1)
--	setreg
--.endr
--
--
--	vmovdqa  16*j(arg1), \T_key
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--        vaesenclast      \T_key, reg_i, reg_i
--	i = (i+1)
--	setreg
--.endr
--
--	i = (9-\num_initial_blocks)
--	setreg
--.rep \num_initial_blocks
--                vmovdqu (arg4, %r11), \T1
--                vpxor   \T1, reg_i, reg_i
--                vmovdqu reg_i, (arg3 , %r11)           # write back ciphertext for
--						       # num_initial_blocks blocks
--                add     $16, %r11
--.if  \ENC_DEC == DEC
--                vmovdqa \T1, reg_i
--.endif
--                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
--	i = (i+1)
--	setreg
--.endr
--
--
--	i = (8-\num_initial_blocks)
--	j = (9-\num_initial_blocks)
--	setreg
--
--.rep \num_initial_blocks
--        vpxor    reg_i, reg_j, reg_j
--        GHASH_MUL_AVX2       reg_j, \T2, \T1, \T3, \T4, \T5, \T6  # apply GHASH on num_initial_blocks blocks
--	i = (i+1)
--	j = (j+1)
--	setreg
--.endr
--        # XMM8 has the combined result here
--
--        vmovdqa  \XMM8, TMP1(%rsp)
--        vmovdqa  \XMM8, \T3
--
--        cmp     $128, %r13
--        jl      .L_initial_blocks_done\@                  # no need for precomputed constants
--
--###############################################################################
--# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM1
--                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM2
--                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM3
--                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM4
--                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM5
--                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM6
--                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM7
--                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
--
--                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
--                vmovdqa  \CTR, \XMM8
--                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
--
--                vmovdqa  (arg1), \T_key
--                vpxor    \T_key, \XMM1, \XMM1
--                vpxor    \T_key, \XMM2, \XMM2
--                vpxor    \T_key, \XMM3, \XMM3
--                vpxor    \T_key, \XMM4, \XMM4
--                vpxor    \T_key, \XMM5, \XMM5
--                vpxor    \T_key, \XMM6, \XMM6
--                vpxor    \T_key, \XMM7, \XMM7
--                vpxor    \T_key, \XMM8, \XMM8
--
--		i = 1
--		setreg
--.rep    \REP       # do REP rounds
--                vmovdqa  16*i(arg1), \T_key
--                vaesenc  \T_key, \XMM1, \XMM1
--                vaesenc  \T_key, \XMM2, \XMM2
--                vaesenc  \T_key, \XMM3, \XMM3
--                vaesenc  \T_key, \XMM4, \XMM4
--                vaesenc  \T_key, \XMM5, \XMM5
--                vaesenc  \T_key, \XMM6, \XMM6
--                vaesenc  \T_key, \XMM7, \XMM7
--                vaesenc  \T_key, \XMM8, \XMM8
--		i = (i+1)
--		setreg
--.endr
--
--
--                vmovdqa  16*i(arg1), \T_key
--                vaesenclast  \T_key, \XMM1, \XMM1
--                vaesenclast  \T_key, \XMM2, \XMM2
--                vaesenclast  \T_key, \XMM3, \XMM3
--                vaesenclast  \T_key, \XMM4, \XMM4
--                vaesenclast  \T_key, \XMM5, \XMM5
--                vaesenclast  \T_key, \XMM6, \XMM6
--                vaesenclast  \T_key, \XMM7, \XMM7
--                vaesenclast  \T_key, \XMM8, \XMM8
--
--                vmovdqu  (arg4, %r11), \T1
--                vpxor    \T1, \XMM1, \XMM1
--                vmovdqu  \XMM1, (arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM1
--                .endif
--
--                vmovdqu  16*1(arg4, %r11), \T1
--                vpxor    \T1, \XMM2, \XMM2
--                vmovdqu  \XMM2, 16*1(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM2
--                .endif
--
--                vmovdqu  16*2(arg4, %r11), \T1
--                vpxor    \T1, \XMM3, \XMM3
--                vmovdqu  \XMM3, 16*2(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM3
--                .endif
--
--                vmovdqu  16*3(arg4, %r11), \T1
--                vpxor    \T1, \XMM4, \XMM4
--                vmovdqu  \XMM4, 16*3(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM4
--                .endif
--
--                vmovdqu  16*4(arg4, %r11), \T1
--                vpxor    \T1, \XMM5, \XMM5
--                vmovdqu  \XMM5, 16*4(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM5
--                .endif
--
--                vmovdqu  16*5(arg4, %r11), \T1
--                vpxor    \T1, \XMM6, \XMM6
--                vmovdqu  \XMM6, 16*5(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM6
--                .endif
--
--                vmovdqu  16*6(arg4, %r11), \T1
--                vpxor    \T1, \XMM7, \XMM7
--                vmovdqu  \XMM7, 16*6(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM7
--                .endif
--
--                vmovdqu  16*7(arg4, %r11), \T1
--                vpxor    \T1, \XMM8, \XMM8
--                vmovdqu  \XMM8, 16*7(arg3 , %r11)
--                .if   \ENC_DEC == DEC
--                vmovdqa  \T1, \XMM8
--                .endif
--
--                add     $128, %r11
--
--                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
--                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with
--							   # the corresponding ciphertext
--                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
--                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
--
--###############################################################################
--
--.L_initial_blocks_done\@:
--
--
--.endm
--
--
--
--# encrypt 8 blocks at a time
--# ghash the 8 previously encrypted ciphertext blocks
--# arg1, arg2, arg3, arg4 are used as pointers only, not modified
--# r11 is the data offset value
--.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
--
--        vmovdqa \XMM1, \T2
--        vmovdqa \XMM2, TMP2(%rsp)
--        vmovdqa \XMM3, TMP3(%rsp)
--        vmovdqa \XMM4, TMP4(%rsp)
--        vmovdqa \XMM5, TMP5(%rsp)
--        vmovdqa \XMM6, TMP6(%rsp)
--        vmovdqa \XMM7, TMP7(%rsp)
--        vmovdqa \XMM8, TMP8(%rsp)
--
--.if \loop_idx == in_order
--                vpaddd  ONE(%rip), \CTR, \XMM1            # INCR CNT
--                vpaddd  ONE(%rip), \XMM1, \XMM2
--                vpaddd  ONE(%rip), \XMM2, \XMM3
--                vpaddd  ONE(%rip), \XMM3, \XMM4
--                vpaddd  ONE(%rip), \XMM4, \XMM5
--                vpaddd  ONE(%rip), \XMM5, \XMM6
--                vpaddd  ONE(%rip), \XMM6, \XMM7
--                vpaddd  ONE(%rip), \XMM7, \XMM8
--                vmovdqa \XMM8, \CTR
--
--                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
--                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
--.else
--                vpaddd  ONEf(%rip), \CTR, \XMM1            # INCR CNT
--                vpaddd  ONEf(%rip), \XMM1, \XMM2
--                vpaddd  ONEf(%rip), \XMM2, \XMM3
--                vpaddd  ONEf(%rip), \XMM3, \XMM4
--                vpaddd  ONEf(%rip), \XMM4, \XMM5
--                vpaddd  ONEf(%rip), \XMM5, \XMM6
--                vpaddd  ONEf(%rip), \XMM6, \XMM7
--                vpaddd  ONEf(%rip), \XMM7, \XMM8
--                vmovdqa \XMM8, \CTR
--.endif
--
--
--        #######################################################################
--
--                vmovdqu (arg1), \T1
--                vpxor   \T1, \XMM1, \XMM1
--                vpxor   \T1, \XMM2, \XMM2
--                vpxor   \T1, \XMM3, \XMM3
--                vpxor   \T1, \XMM4, \XMM4
--                vpxor   \T1, \XMM5, \XMM5
--                vpxor   \T1, \XMM6, \XMM6
--                vpxor   \T1, \XMM7, \XMM7
--                vpxor   \T1, \XMM8, \XMM8
--
--        #######################################################################
--
--
--
--
--
--                vmovdqu 16*1(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--                vmovdqu 16*2(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--
--        #######################################################################
--
--        vmovdqu         HashKey_8(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T2, \T4              # T4 = a1*b1
--        vpclmulqdq      $0x00, \T5, \T2, \T7              # T7 = a0*b0
--        vpclmulqdq      $0x01, \T5, \T2, \T6              # T6 = a1*b0
--        vpclmulqdq      $0x10, \T5, \T2, \T5              # T5 = a0*b1
--        vpxor           \T5, \T6, \T6
--
--                vmovdqu 16*3(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP2(%rsp), \T1
--        vmovdqu         HashKey_7(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*4(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        #######################################################################
--
--        vmovdqa         TMP3(%rsp), \T1
--        vmovdqu         HashKey_6(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*5(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP4(%rsp), \T1
--        vmovdqu         HashKey_5(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*6(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--
--        vmovdqa         TMP5(%rsp), \T1
--        vmovdqu         HashKey_4(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*7(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP6(%rsp), \T1
--        vmovdqu         HashKey_3(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--                vmovdqu 16*8(arg1), \T1
--                vaesenc \T1, \XMM1, \XMM1
--                vaesenc \T1, \XMM2, \XMM2
--                vaesenc \T1, \XMM3, \XMM3
--                vaesenc \T1, \XMM4, \XMM4
--                vaesenc \T1, \XMM5, \XMM5
--                vaesenc \T1, \XMM6, \XMM6
--                vaesenc \T1, \XMM7, \XMM7
--                vaesenc \T1, \XMM8, \XMM8
--
--        vmovdqa         TMP7(%rsp), \T1
--        vmovdqu         HashKey_2(arg2), \T5
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T4
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--
--        #######################################################################
--
--                vmovdqu 16*9(arg1), \T5
--                vaesenc \T5, \XMM1, \XMM1
--                vaesenc \T5, \XMM2, \XMM2
--                vaesenc \T5, \XMM3, \XMM3
--                vaesenc \T5, \XMM4, \XMM4
--                vaesenc \T5, \XMM5, \XMM5
--                vaesenc \T5, \XMM6, \XMM6
--                vaesenc \T5, \XMM7, \XMM7
--                vaesenc \T5, \XMM8, \XMM8
--
--        vmovdqa         TMP8(%rsp), \T1
--        vmovdqu         HashKey(arg2), \T5
--
--        vpclmulqdq      $0x00, \T5, \T1, \T3
--        vpxor           \T3, \T7, \T7
--
--        vpclmulqdq      $0x01, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x10, \T5, \T1, \T3
--        vpxor           \T3, \T6, \T6
--
--        vpclmulqdq      $0x11, \T5, \T1, \T3
--        vpxor           \T3, \T4, \T1
--
--
--                vmovdqu 16*10(arg1), \T5
--
--        i = 11
--        setreg
--.rep (\REP-9)
--        vaesenc \T5, \XMM1, \XMM1
--        vaesenc \T5, \XMM2, \XMM2
--        vaesenc \T5, \XMM3, \XMM3
--        vaesenc \T5, \XMM4, \XMM4
--        vaesenc \T5, \XMM5, \XMM5
--        vaesenc \T5, \XMM6, \XMM6
--        vaesenc \T5, \XMM7, \XMM7
--        vaesenc \T5, \XMM8, \XMM8
--
--        vmovdqu 16*i(arg1), \T5
--        i = i + 1
--        setreg
--.endr
--
--	i = 0
--	j = 1
--	setreg
--.rep 8
--		vpxor	16*i(arg4, %r11), \T5, \T2
--                .if \ENC_DEC == ENC
--                vaesenclast     \T2, reg_j, reg_j
--                .else
--                vaesenclast     \T2, reg_j, \T3
--                vmovdqu 16*i(arg4, %r11), reg_j
--                vmovdqu \T3, 16*i(arg3, %r11)
--                .endif
--	i = (i+1)
--	j = (j+1)
--	setreg
--.endr
--	#######################################################################
--
--
--	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
--	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
--	vpxor	\T3, \T7, \T7
--	vpxor	\T6, \T1, \T1				# accumulate the results in T1:T7
--
--
--
--	#######################################################################
--	#first phase of the reduction
--	vmovdqa         POLY2(%rip), \T3
--
--	vpclmulqdq	$0x01, \T7, \T3, \T2
--	vpslldq		$8, \T2, \T2			# shift-L xmm2 2 DWs
--
--	vpxor		\T2, \T7, \T7			# first phase of the reduction complete
--	#######################################################################
--                .if \ENC_DEC == ENC
--		vmovdqu	 \XMM1,	16*0(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM2,	16*1(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM3,	16*2(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM4,	16*3(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM5,	16*4(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM6,	16*5(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM7,	16*6(arg3,%r11)		# Write to the Ciphertext buffer
--		vmovdqu	 \XMM8,	16*7(arg3,%r11)		# Write to the Ciphertext buffer
--                .endif
--
--	#######################################################################
--	#second phase of the reduction
--	vpclmulqdq	$0x00, \T7, \T3, \T2
--	vpsrldq		$4, \T2, \T2			# shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
--
--	vpclmulqdq	$0x10, \T7, \T3, \T4
--	vpslldq		$4, \T4, \T4			# shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
--
--	vpxor		\T2, \T4, \T4			# second phase of the reduction complete
--	#######################################################################
--	vpxor		\T4, \T1, \T1			# the result is in T1
--
--		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
--		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
--
--
--	vpxor	\T1, \XMM1, \XMM1
--
--
--
--.endm
--
--
--# GHASH the last 4 ciphertext blocks.
--.macro  GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
--
--        ## Karatsuba Method
--
--        vmovdqu         HashKey_8(arg2), \T5
--
--        vpshufd         $0b01001110, \XMM1, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM1, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM1, \T6
--        vpclmulqdq      $0x00, \T5, \XMM1, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_7(arg2), \T5
--        vpshufd         $0b01001110, \XMM2, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM2, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM2, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM2, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_6(arg2), \T5
--        vpshufd         $0b01001110, \XMM3, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM3, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM3, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM3, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_5(arg2), \T5
--        vpshufd         $0b01001110, \XMM4, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM4, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM4, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM4, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_4(arg2), \T5
--        vpshufd         $0b01001110, \XMM5, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM5, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM5, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM5, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_3(arg2), \T5
--        vpshufd         $0b01001110, \XMM6, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM6, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM6, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM6, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey_2(arg2), \T5
--        vpshufd         $0b01001110, \XMM7, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM7, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM7, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM7, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--
--        ######################
--
--        vmovdqu         HashKey(arg2), \T5
--        vpshufd         $0b01001110, \XMM8, \T2
--        vpshufd         $0b01001110, \T5, \T3
--        vpxor           \XMM8, \T2, \T2
--        vpxor           \T5, \T3, \T3
--
--        vpclmulqdq      $0x11, \T5, \XMM8, \T4
--        vpxor           \T4, \T6, \T6
--
--        vpclmulqdq      $0x00, \T5, \XMM8, \T4
--        vpxor           \T4, \T7, \T7
--
--        vpclmulqdq      $0x00, \T3, \T2, \T2
--
--        vpxor           \T2, \XMM1, \XMM1
--        vpxor           \T6, \XMM1, \XMM1
--        vpxor           \T7, \XMM1, \T2
--
--
--
--
--        vpslldq $8, \T2, \T4
--        vpsrldq $8, \T2, \T2
--
--        vpxor   \T4, \T7, \T7
--        vpxor   \T2, \T6, \T6                      # <T6:T7> holds the result of the
--						   # accumulated carry-less multiplications
--
--        #######################################################################
--        #first phase of the reduction
--        vmovdqa         POLY2(%rip), \T3
--
--        vpclmulqdq      $0x01, \T7, \T3, \T2
--        vpslldq         $8, \T2, \T2               # shift-L xmm2 2 DWs
--
--        vpxor           \T2, \T7, \T7              # first phase of the reduction complete
--        #######################################################################
--
--
--        #second phase of the reduction
--        vpclmulqdq      $0x00, \T7, \T3, \T2
--        vpsrldq         $4, \T2, \T2               # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
--
--        vpclmulqdq      $0x10, \T7, \T3, \T4
--        vpslldq         $4, \T4, \T4               # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts)
--
--        vpxor           \T2, \T4, \T4              # second phase of the reduction complete
--        #######################################################################
--        vpxor           \T4, \T6, \T6              # the result is in T6
--.endm
--
--
--
--#############################################################
--#void   aesni_gcm_init_avx_gen4
--#        (gcm_data     *my_ctx_data,
--#         gcm_context_data *data,
--#        u8      *iv, /* Pre-counter block j0: 4 byte salt
--#			(from Security Association) concatenated with 8 byte
--#			Initialisation Vector (from IPSec ESP Payload)
--#			concatenated with 0x00000001. 16-byte aligned pointer. */
--#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
--#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
--#        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
--#############################################################
--SYM_FUNC_START(aesni_gcm_init_avx_gen4)
--        FUNC_SAVE
--        INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_init_avx_gen4)
--
--###############################################################################
--#void   aesni_gcm_enc_avx_gen4(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
--#        const   u8 *in, /* Plaintext input */
--#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4)
--        FUNC_SAVE
--        mov     keysize,%eax
--        cmp     $32, %eax
--        je      key_256_enc_update4
--        cmp     $16, %eax
--        je      key_128_enc_update4
--        # must be 192
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
--        FUNC_RESTORE
--	RET
--key_128_enc_update4:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
--        FUNC_RESTORE
--	RET
--key_256_enc_update4:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
--        FUNC_RESTORE
--	RET
--SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
--
--###############################################################################
--#void   aesni_gcm_dec_update_avx_gen4(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
--#        const   u8 *in, /* Ciphertext input */
--#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4)
--        FUNC_SAVE
--        mov     keysize,%eax
--        cmp     $32, %eax
--        je      key_256_dec_update4
--        cmp     $16, %eax
--        je      key_128_dec_update4
--        # must be 192
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
--        FUNC_RESTORE
--        RET
--key_128_dec_update4:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
--        FUNC_RESTORE
--        RET
--key_256_dec_update4:
--        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
--
--###############################################################################
--#void   aesni_gcm_finalize_avx_gen4(
--#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
--#        gcm_context_data *data,
--#        u8      *auth_tag, /* Authenticated Tag output. */
--#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
--#                              Valid values are 16 (most likely), 12 or 8. */
--###############################################################################
--SYM_FUNC_START(aesni_gcm_finalize_avx_gen4)
--        FUNC_SAVE
--        mov	keysize,%eax
--        cmp     $32, %eax
--        je      key_256_finalize4
--        cmp     $16, %eax
--        je      key_128_finalize4
--        # must be 192
--        GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
--        FUNC_RESTORE
--        RET
--key_128_finalize4:
--        GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
--        FUNC_RESTORE
--        RET
--key_256_finalize4:
--        GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
--        FUNC_RESTORE
--        RET
--SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
-diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
-index ef031655b2d3..cd37de5ec404 100644
---- a/arch/x86/crypto/aesni-intel_glue.c
-+++ b/arch/x86/crypto/aesni-intel_glue.c
-@@ -1,7 +1,7 @@
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
-- * Support for Intel AES-NI instructions. This file contains glue
-- * code, the real AES implementation is in intel-aes_asm.S.
-+ * Support for AES-NI and VAES instructions.  This file contains glue code.
-+ * The real AES implementations are in aesni-intel_asm.S and other .S files.
-  *
-  * Copyright (C) 2008, Intel Corp.
-  *    Author: Huang Ying <ying.huang@intel.com>
-@@ -13,6 +13,8 @@
-  *             Tadeusz Struk (tadeusz.struk@intel.com)
-  *             Aidan O'Mahony (aidan.o.mahony@intel.com)
-  *    Copyright (c) 2010, Intel Corporation.
-+ *
-+ * Copyright 2024 Google LLC
-  */
- 
- #include <linux/hardirq.h>
-@@ -44,41 +46,11 @@
- #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
- #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
- 
--/* This data is stored at the end of the crypto_tfm struct.
-- * It's a type of per "session" data storage location.
-- * This needs to be 16 byte aligned.
-- */
--struct aesni_rfc4106_gcm_ctx {
--	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
--	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
--	u8 nonce[4];
--};
--
--struct generic_gcmaes_ctx {
--	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
--	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
--};
--
- struct aesni_xts_ctx {
- 	struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR;
- 	struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR;
- };
- 
--#define GCM_BLOCK_LEN 16
--
--struct gcm_context_data {
--	/* init, update and finalize context data */
--	u8 aad_hash[GCM_BLOCK_LEN];
--	u64 aad_length;
--	u64 in_length;
--	u8 partial_block_enc_key[GCM_BLOCK_LEN];
--	u8 orig_IV[GCM_BLOCK_LEN];
--	u8 current_counter[GCM_BLOCK_LEN];
--	u64 partial_block_len;
--	u64 unused;
--	u8 hash_keys[GCM_BLOCK_LEN * 16];
--};
--
- static inline void *aes_align_addr(void *addr)
- {
- 	if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN)
-@@ -103,9 +75,6 @@ asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
- asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
- 				  const u8 *in, unsigned int len, u8 *iv);
- 
--#define AVX_GEN2_OPTSIZE 640
--#define AVX_GEN4_OPTSIZE 4096
--
- asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out,
- 			      const u8 *in, unsigned int len, u8 *iv);
- 
-@@ -118,23 +87,6 @@ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
- 			      const u8 *in, unsigned int len, u8 *iv);
- DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc);
- 
--/* Scatter / Gather routines, with args similar to above */
--asmlinkage void aesni_gcm_init(void *ctx,
--			       struct gcm_context_data *gdata,
--			       u8 *iv,
--			       u8 *hash_subkey, const u8 *aad,
--			       unsigned long aad_len);
--asmlinkage void aesni_gcm_enc_update(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in, unsigned long plaintext_len);
--asmlinkage void aesni_gcm_dec_update(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in,
--				     unsigned long ciphertext_len);
--asmlinkage void aesni_gcm_finalize(void *ctx,
--				   struct gcm_context_data *gdata,
--				   u8 *auth_tag, unsigned long auth_tag_len);
--
- asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
- 		void *keys, u8 *out, unsigned int num_bytes);
- asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
-@@ -154,67 +106,6 @@ asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
- asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
- 	const void *keys, u8 *out, unsigned int num_bytes,
- 	unsigned int byte_ctr);
--
--/*
-- * asmlinkage void aesni_gcm_init_avx_gen2()
-- * gcm_data *my_ctx_data, context data
-- * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
-- */
--asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
--					struct gcm_context_data *gdata,
--					u8 *iv,
--					u8 *hash_subkey,
--					const u8 *aad,
--					unsigned long aad_len);
--
--asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in, unsigned long plaintext_len);
--asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in,
--				     unsigned long ciphertext_len);
--asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
--				   struct gcm_context_data *gdata,
--				   u8 *auth_tag, unsigned long auth_tag_len);
--
--/*
-- * asmlinkage void aesni_gcm_init_avx_gen4()
-- * gcm_data *my_ctx_data, context data
-- * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
-- */
--asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
--					struct gcm_context_data *gdata,
--					u8 *iv,
--					u8 *hash_subkey,
--					const u8 *aad,
--					unsigned long aad_len);
--
--asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in, unsigned long plaintext_len);
--asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
--				     struct gcm_context_data *gdata, u8 *out,
--				     const u8 *in,
--				     unsigned long ciphertext_len);
--asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
--				   struct gcm_context_data *gdata,
--				   u8 *auth_tag, unsigned long auth_tag_len);
--
--static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx);
--static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2);
--
--static inline struct
--aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
--{
--	return aes_align_addr(crypto_aead_ctx(tfm));
--}
--
--static inline struct
--generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
--{
--	return aes_align_addr(crypto_aead_ctx(tfm));
--}
- #endif
- 
- static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
-@@ -588,280 +479,6 @@ static int xctr_crypt(struct skcipher_request *req)
- 	}
- 	return err;
- }
--
--static int aes_gcm_derive_hash_subkey(const struct crypto_aes_ctx *aes_key,
--				      u8 hash_subkey[AES_BLOCK_SIZE])
--{
--	static const u8 zeroes[AES_BLOCK_SIZE];
--
--	aes_encrypt(aes_key, hash_subkey, zeroes);
--	return 0;
--}
--
--static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
--				  unsigned int key_len)
--{
--	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
--
--	if (key_len < 4)
--		return -EINVAL;
--
--	/*Account for 4 byte nonce at the end.*/
--	key_len -= 4;
--
--	memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
--
--	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
--	       aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded,
--					  ctx->hash_subkey);
--}
--
--/* This is the Integrity Check Value (aka the authentication tag) length and can
-- * be 8, 12 or 16 bytes long. */
--static int common_rfc4106_set_authsize(struct crypto_aead *aead,
--				       unsigned int authsize)
--{
--	switch (authsize) {
--	case 8:
--	case 12:
--	case 16:
--		break;
--	default:
--		return -EINVAL;
--	}
--
--	return 0;
--}
--
--static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
--				       unsigned int authsize)
--{
--	switch (authsize) {
--	case 4:
--	case 8:
--	case 12:
--	case 13:
--	case 14:
--	case 15:
--	case 16:
--		break;
--	default:
--		return -EINVAL;
--	}
--
--	return 0;
--}
--
--static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
--			      unsigned int assoclen, u8 *hash_subkey,
--			      u8 *iv, void *aes_ctx, u8 *auth_tag,
--			      unsigned long auth_tag_len)
--{
--	u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
--	struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
--	unsigned long left = req->cryptlen;
--	struct scatter_walk assoc_sg_walk;
--	struct skcipher_walk walk;
--	bool do_avx, do_avx2;
--	u8 *assocmem = NULL;
--	u8 *assoc;
--	int err;
--
--	if (!enc)
--		left -= auth_tag_len;
--
--	do_avx = (left >= AVX_GEN2_OPTSIZE);
--	do_avx2 = (left >= AVX_GEN4_OPTSIZE);
--
--	/* Linearize assoc, if not already linear */
--	if (req->src->length >= assoclen && req->src->length) {
--		scatterwalk_start(&assoc_sg_walk, req->src);
--		assoc = scatterwalk_map(&assoc_sg_walk);
--	} else {
--		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
--			      GFP_KERNEL : GFP_ATOMIC;
--
--		/* assoc can be any length, so must be on heap */
--		assocmem = kmalloc(assoclen, flags);
--		if (unlikely(!assocmem))
--			return -ENOMEM;
--		assoc = assocmem;
--
--		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
--	}
--
--	kernel_fpu_begin();
--	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
--		aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc,
--					assoclen);
--	else if (static_branch_likely(&gcm_use_avx) && do_avx)
--		aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc,
--					assoclen);
--	else
--		aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
--	kernel_fpu_end();
--
--	if (!assocmem)
--		scatterwalk_unmap(assoc);
--	else
--		kfree(assocmem);
--
--	err = enc ? skcipher_walk_aead_encrypt(&walk, req, false)
--		  : skcipher_walk_aead_decrypt(&walk, req, false);
--
--	while (walk.nbytes > 0) {
--		kernel_fpu_begin();
--		if (static_branch_likely(&gcm_use_avx2) && do_avx2) {
--			if (enc)
--				aesni_gcm_enc_update_avx_gen4(aes_ctx, data,
--							      walk.dst.virt.addr,
--							      walk.src.virt.addr,
--							      walk.nbytes);
--			else
--				aesni_gcm_dec_update_avx_gen4(aes_ctx, data,
--							      walk.dst.virt.addr,
--							      walk.src.virt.addr,
--							      walk.nbytes);
--		} else if (static_branch_likely(&gcm_use_avx) && do_avx) {
--			if (enc)
--				aesni_gcm_enc_update_avx_gen2(aes_ctx, data,
--							      walk.dst.virt.addr,
--							      walk.src.virt.addr,
--							      walk.nbytes);
--			else
--				aesni_gcm_dec_update_avx_gen2(aes_ctx, data,
--							      walk.dst.virt.addr,
--							      walk.src.virt.addr,
--							      walk.nbytes);
--		} else if (enc) {
--			aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr,
--					     walk.src.virt.addr, walk.nbytes);
--		} else {
--			aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr,
--					     walk.src.virt.addr, walk.nbytes);
--		}
--		kernel_fpu_end();
--
--		err = skcipher_walk_done(&walk, 0);
--	}
--
--	if (err)
--		return err;
--
--	kernel_fpu_begin();
--	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
--		aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag,
--					    auth_tag_len);
--	else if (static_branch_likely(&gcm_use_avx) && do_avx)
--		aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag,
--					    auth_tag_len);
--	else
--		aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len);
--	kernel_fpu_end();
--
--	return 0;
--}
--
--static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
--			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
--{
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
--	u8 auth_tag[16];
--	int err;
--
--	err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx,
--				 auth_tag, auth_tag_len);
--	if (err)
--		return err;
--
--	scatterwalk_map_and_copy(auth_tag, req->dst,
--				 req->assoclen + req->cryptlen,
--				 auth_tag_len, 1);
--	return 0;
--}
--
--static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
--			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
--{
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
--	u8 auth_tag_msg[16];
--	u8 auth_tag[16];
--	int err;
--
--	err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx,
--				 auth_tag, auth_tag_len);
--	if (err)
--		return err;
--
--	/* Copy out original auth_tag */
--	scatterwalk_map_and_copy(auth_tag_msg, req->src,
--				 req->assoclen + req->cryptlen - auth_tag_len,
--				 auth_tag_len, 0);
--
--	/* Compare generated tag with passed in tag. */
--	if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) {
--		memzero_explicit(auth_tag, sizeof(auth_tag));
--		return -EBADMSG;
--	}
--	return 0;
--}
--
--static int helper_rfc4106_encrypt(struct aead_request *req)
--{
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
--	void *aes_ctx = &(ctx->aes_key_expanded);
--	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
--	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
--	unsigned int i;
--	__be32 counter = cpu_to_be32(1);
--
--	/* Assuming we are supporting rfc4106 64-bit extended */
--	/* sequence numbers We need to have the AAD length equal */
--	/* to 16 or 20 bytes */
--	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
--		return -EINVAL;
--
--	/* IV below built */
--	for (i = 0; i < 4; i++)
--		*(iv+i) = ctx->nonce[i];
--	for (i = 0; i < 8; i++)
--		*(iv+4+i) = req->iv[i];
--	*((__be32 *)(iv+12)) = counter;
--
--	return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
--			      aes_ctx);
--}
--
--static int helper_rfc4106_decrypt(struct aead_request *req)
--{
--	__be32 counter = cpu_to_be32(1);
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
--	void *aes_ctx = &(ctx->aes_key_expanded);
--	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
--	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
--	unsigned int i;
--
--	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
--		return -EINVAL;
--
--	/* Assuming we are supporting rfc4106 64-bit extended */
--	/* sequence numbers We need to have the AAD length */
--	/* equal to 16 or 20 bytes */
--
--	/* IV below built */
--	for (i = 0; i < 4; i++)
--		*(iv+i) = ctx->nonce[i];
--	for (i = 0; i < 8; i++)
--		*(iv+4+i) = req->iv[i];
--	*((__be32 *)(iv+12)) = counter;
--
--	return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
--			      aes_ctx);
--}
- #endif
- 
- static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key,
-@@ -1216,11 +833,717 @@ DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
- DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800);
- #endif
- 
-+/* The common part of the x86_64 AES-GCM key struct */
-+struct aes_gcm_key {
-+	/* Expanded AES key and the AES key length in bytes */
-+	struct crypto_aes_ctx aes_key;
-+
-+	/* RFC4106 nonce (used only by the rfc4106 algorithms) */
-+	u32 rfc4106_nonce;
-+};
-+
-+/* Key struct used by the AES-NI implementations of AES-GCM */
-+struct aes_gcm_key_aesni {
-+	/*
-+	 * Common part of the key.  The assembly code requires 16-byte alignment
-+	 * for the round keys; we get this by them being located at the start of
-+	 * the struct and the whole struct being 16-byte aligned.
-+	 */
-+	struct aes_gcm_key base;
-+
-+	/*
-+	 * Powers of the hash key H^8 through H^1.  These are 128-bit values.
-+	 * They all have an extra factor of x^-1 and are byte-reversed.  16-byte
-+	 * alignment is required by the assembly code.
-+	 */
-+	u64 h_powers[8][2] __aligned(16);
-+
-+	/*
-+	 * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd
-+	 * together.  It's used for Karatsuba multiplication.  16-byte alignment
-+	 * is required by the assembly code.
-+	 */
-+	u64 h_powers_xored[8] __aligned(16);
-+
-+	/*
-+	 * H^1 times x^64 (and also the usual extra factor of x^-1).  16-byte
-+	 * alignment is required by the assembly code.
-+	 */
-+	u64 h_times_x64[2] __aligned(16);
-+};
-+#define AES_GCM_KEY_AESNI(key)	\
-+	container_of((key), struct aes_gcm_key_aesni, base)
-+#define AES_GCM_KEY_AESNI_SIZE	\
-+	(sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1)))
-+
-+/* Key struct used by the VAES + AVX10 implementations of AES-GCM */
-+struct aes_gcm_key_avx10 {
-+	/*
-+	 * Common part of the key.  The assembly code prefers 16-byte alignment
-+	 * for the round keys; we get this by them being located at the start of
-+	 * the struct and the whole struct being 64-byte aligned.
-+	 */
-+	struct aes_gcm_key base;
-+
-+	/*
-+	 * Powers of the hash key H^16 through H^1.  These are 128-bit values.
-+	 * They all have an extra factor of x^-1 and are byte-reversed.  This
-+	 * array is aligned to a 64-byte boundary to make it naturally aligned
-+	 * for 512-bit loads, which can improve performance.  (The assembly code
-+	 * doesn't *need* the alignment; this is just an optimization.)
-+	 */
-+	u64 h_powers[16][2] __aligned(64);
-+
-+	/* Three padding blocks required by the assembly code */
-+	u64 padding[3][2];
-+};
-+#define AES_GCM_KEY_AVX10(key)	\
-+	container_of((key), struct aes_gcm_key_avx10, base)
-+#define AES_GCM_KEY_AVX10_SIZE	\
-+	(sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
-+
-+/*
-+ * These flags are passed to the AES-GCM helper functions to specify the
-+ * specific version of AES-GCM (RFC4106 or not), whether it's encryption or
-+ * decryption, and which assembly functions should be called.  Assembly
-+ * functions are selected using flags instead of function pointers to avoid
-+ * indirect calls (which are very expensive on x86) regardless of inlining.
-+ */
-+#define FLAG_RFC4106	BIT(0)
-+#define FLAG_ENC	BIT(1)
-+#define FLAG_AVX	BIT(2)
-+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
-+#  define FLAG_AVX10_256	BIT(3)
-+#  define FLAG_AVX10_512	BIT(4)
-+#else
-+   /*
-+    * This should cause all calls to the AVX10 assembly functions to be
-+    * optimized out, avoiding the need to ifdef each call individually.
-+    */
-+#  define FLAG_AVX10_256	0
-+#  define FLAG_AVX10_512	0
-+#endif
-+
-+static inline struct aes_gcm_key *
-+aes_gcm_key_get(struct crypto_aead *tfm, int flags)
-+{
-+	if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
-+		return PTR_ALIGN(crypto_aead_ctx(tfm), 64);
-+	else
-+		return PTR_ALIGN(crypto_aead_ctx(tfm), 16);
-+}
-+
-+asmlinkage void
-+aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key);
-+asmlinkage void
-+aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key);
-+asmlinkage void
-+aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key);
-+asmlinkage void
-+aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key);
-+
-+static void aes_gcm_precompute(struct aes_gcm_key *key, int flags)
-+{
-+	/*
-+	 * To make things a bit easier on the assembly side, the AVX10
-+	 * implementations use the same key format.  Therefore, a single
-+	 * function using 256-bit vectors would suffice here.  However, it's
-+	 * straightforward to provide a 512-bit one because of how the assembly
-+	 * code is structured, and it works nicely because the total size of the
-+	 * key powers is a multiple of 512 bits.  So we take advantage of that.
-+	 *
-+	 * A similar situation applies to the AES-NI implementations.
-+	 */
-+	if (flags & FLAG_AVX10_512)
-+		aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key));
-+	else if (flags & FLAG_AVX10_256)
-+		aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key));
-+	else if (flags & FLAG_AVX)
-+		aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key));
-+	else
-+		aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key));
-+}
-+
-+asmlinkage void
-+aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key,
-+			 u8 ghash_acc[16], const u8 *aad, int aadlen);
-+asmlinkage void
-+aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key,
-+			     u8 ghash_acc[16], const u8 *aad, int aadlen);
-+asmlinkage void
-+aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+			      u8 ghash_acc[16], const u8 *aad, int aadlen);
-+
-+static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16],
-+			       const u8 *aad, int aadlen, int flags)
-+{
-+	if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
-+		aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc,
-+					      aad, aadlen);
-+	else if (flags & FLAG_AVX)
-+		aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc,
-+					     aad, aadlen);
-+	else
-+		aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key), ghash_acc,
-+					 aad, aadlen);
-+}
-+
-+asmlinkage void
-+aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni *key,
-+			 const u32 le_ctr[4], u8 ghash_acc[16],
-+			 const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key,
-+			     const u32 le_ctr[4], u8 ghash_acc[16],
-+			     const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
-+				  const u32 le_ctr[4], u8 ghash_acc[16],
-+				  const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
-+				  const u32 le_ctr[4], u8 ghash_acc[16],
-+				  const u8 *src, u8 *dst, int datalen);
-+
-+asmlinkage void
-+aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key,
-+			 const u32 le_ctr[4], u8 ghash_acc[16],
-+			 const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key,
-+			     const u32 le_ctr[4], u8 ghash_acc[16],
-+			     const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
-+				  const u32 le_ctr[4], u8 ghash_acc[16],
-+				  const u8 *src, u8 *dst, int datalen);
-+asmlinkage void
-+aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
-+				  const u32 le_ctr[4], u8 ghash_acc[16],
-+				  const u8 *src, u8 *dst, int datalen);
-+
-+/* __always_inline to optimize out the branches based on @flags */
-+static __always_inline void
-+aes_gcm_update(const struct aes_gcm_key *key,
-+	       const u32 le_ctr[4], u8 ghash_acc[16],
-+	       const u8 *src, u8 *dst, int datalen, int flags)
-+{
-+	if (flags & FLAG_ENC) {
-+		if (flags & FLAG_AVX10_512)
-+			aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
-+							  le_ctr, ghash_acc,
-+							  src, dst, datalen);
-+		else if (flags & FLAG_AVX10_256)
-+			aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
-+							  le_ctr, ghash_acc,
-+							  src, dst, datalen);
-+		else if (flags & FLAG_AVX)
-+			aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key),
-+						     le_ctr, ghash_acc,
-+						     src, dst, datalen);
-+		else
-+			aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr,
-+						 ghash_acc, src, dst, datalen);
-+	} else {
-+		if (flags & FLAG_AVX10_512)
-+			aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
-+							  le_ctr, ghash_acc,
-+							  src, dst, datalen);
-+		else if (flags & FLAG_AVX10_256)
-+			aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
-+							  le_ctr, ghash_acc,
-+							  src, dst, datalen);
-+		else if (flags & FLAG_AVX)
-+			aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key),
-+						     le_ctr, ghash_acc,
-+						     src, dst, datalen);
-+		else
-+			aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key),
-+						 le_ctr, ghash_acc,
-+						 src, dst, datalen);
-+	}
-+}
-+
-+asmlinkage void
-+aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni *key,
-+			const u32 le_ctr[4], u8 ghash_acc[16],
-+			u64 total_aadlen, u64 total_datalen);
-+asmlinkage void
-+aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key,
-+			    const u32 le_ctr[4], u8 ghash_acc[16],
-+			    u64 total_aadlen, u64 total_datalen);
-+asmlinkage void
-+aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+			     const u32 le_ctr[4], u8 ghash_acc[16],
-+			     u64 total_aadlen, u64 total_datalen);
-+
-+/* __always_inline to optimize out the branches based on @flags */
-+static __always_inline void
-+aes_gcm_enc_final(const struct aes_gcm_key *key,
-+		  const u32 le_ctr[4], u8 ghash_acc[16],
-+		  u64 total_aadlen, u64 total_datalen, int flags)
-+{
-+	if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
-+		aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
-+					     le_ctr, ghash_acc,
-+					     total_aadlen, total_datalen);
-+	else if (flags & FLAG_AVX)
-+		aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key),
-+					    le_ctr, ghash_acc,
-+					    total_aadlen, total_datalen);
-+	else
-+		aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key),
-+					le_ctr, ghash_acc,
-+					total_aadlen, total_datalen);
-+}
-+
-+asmlinkage bool __must_check
-+aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni *key,
-+			const u32 le_ctr[4], const u8 ghash_acc[16],
-+			u64 total_aadlen, u64 total_datalen,
-+			const u8 tag[16], int taglen);
-+asmlinkage bool __must_check
-+aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key,
-+			    const u32 le_ctr[4], const u8 ghash_acc[16],
-+			    u64 total_aadlen, u64 total_datalen,
-+			    const u8 tag[16], int taglen);
-+asmlinkage bool __must_check
-+aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
-+			     const u32 le_ctr[4], const u8 ghash_acc[16],
-+			     u64 total_aadlen, u64 total_datalen,
-+			     const u8 tag[16], int taglen);
-+
-+/* __always_inline to optimize out the branches based on @flags */
-+static __always_inline bool __must_check
-+aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4],
-+		  u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen,
-+		  u8 tag[16], int taglen, int flags)
-+{
-+	if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
-+		return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
-+						    le_ctr, ghash_acc,
-+						    total_aadlen, total_datalen,
-+						    tag, taglen);
-+	else if (flags & FLAG_AVX)
-+		return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key),
-+						   le_ctr, ghash_acc,
-+						   total_aadlen, total_datalen,
-+						   tag, taglen);
-+	else
-+		return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key),
-+					       le_ctr, ghash_acc,
-+					       total_aadlen, total_datalen,
-+					       tag, taglen);
-+}
-+
-+/*
-+ * This is the Integrity Check Value (aka the authentication tag) length and can
-+ * be 8, 12 or 16 bytes long.
-+ */
-+static int common_rfc4106_set_authsize(struct crypto_aead *aead,
-+				       unsigned int authsize)
-+{
-+	switch (authsize) {
-+	case 8:
-+	case 12:
-+	case 16:
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+
-+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
-+				       unsigned int authsize)
-+{
-+	switch (authsize) {
-+	case 4:
-+	case 8:
-+	case 12:
-+	case 13:
-+	case 14:
-+	case 15:
-+	case 16:
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * This is the setkey function for the x86_64 implementations of AES-GCM.  It
-+ * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes
-+ * powers of the hash key.
-+ *
-+ * To comply with the crypto_aead API, this has to be usable in no-SIMD context.
-+ * For that reason, this function includes a portable C implementation of the
-+ * needed logic.  However, the portable C implementation is very slow, taking
-+ * about the same time as encrypting 37 KB of data.  To be ready for users that
-+ * may set a key even somewhat frequently, we therefore also include a SIMD
-+ * assembly implementation, expanding the AES key using AES-NI and precomputing
-+ * the hash key powers using PCLMULQDQ or VPCLMULQDQ.
-+ */
-+static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
-+		      unsigned int keylen, int flags)
-+{
-+	struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
-+	int err;
-+
-+	if (flags & FLAG_RFC4106) {
-+		if (keylen < 4)
-+			return -EINVAL;
-+		keylen -= 4;
-+		key->rfc4106_nonce = get_unaligned_be32(raw_key + keylen);
-+	}
-+
-+	/* The assembly code assumes the following offsets. */
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512);
-+	BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768);
-+
-+	if (likely(crypto_simd_usable())) {
-+		err = aes_check_keylen(keylen);
-+		if (err)
-+			return err;
-+		kernel_fpu_begin();
-+		aesni_set_key(&key->aes_key, raw_key, keylen);
-+		aes_gcm_precompute(key, flags);
-+		kernel_fpu_end();
-+	} else {
-+		static const u8 x_to_the_minus1[16] __aligned(__alignof__(be128)) = {
-+			[0] = 0xc2, [15] = 1
-+		};
-+		static const u8 x_to_the_63[16] __aligned(__alignof__(be128)) = {
-+			[7] = 1,
-+		};
-+		be128 h1 = {};
-+		be128 h;
-+		int i;
-+
-+		err = aes_expandkey(&key->aes_key, raw_key, keylen);
-+		if (err)
-+			return err;
-+
-+		/* Encrypt the all-zeroes block to get the hash key H^1 */
-+		aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1);
-+
-+		/* Compute H^1 * x^-1 */
-+		h = h1;
-+		gf128mul_lle(&h, (const be128 *)x_to_the_minus1);
-+
-+		/* Compute the needed key powers */
-+		if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) {
-+			struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key);
-+
-+			for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
-+				k->h_powers[i][0] = be64_to_cpu(h.b);
-+				k->h_powers[i][1] = be64_to_cpu(h.a);
-+				gf128mul_lle(&h, &h1);
-+			}
-+			memset(k->padding, 0, sizeof(k->padding));
-+		} else {
-+			struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key);
-+
-+			for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
-+				k->h_powers[i][0] = be64_to_cpu(h.b);
-+				k->h_powers[i][1] = be64_to_cpu(h.a);
-+				k->h_powers_xored[i] = k->h_powers[i][0] ^
-+						       k->h_powers[i][1];
-+				gf128mul_lle(&h, &h1);
-+			}
-+			gf128mul_lle(&h1, (const be128 *)x_to_the_63);
-+			k->h_times_x64[0] = be64_to_cpu(h1.b);
-+			k->h_times_x64[1] = be64_to_cpu(h1.a);
-+		}
-+	}
-+	return 0;
-+}
-+
-+/*
-+ * Initialize @ghash_acc, then pass all @assoclen bytes of associated data
-+ * (a.k.a. additional authenticated data) from @sg_src through the GHASH update
-+ * assembly function.  kernel_fpu_begin() must have already been called.
-+ */
-+static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16],
-+			      struct scatterlist *sg_src, unsigned int assoclen,
-+			      int flags)
-+{
-+	struct scatter_walk walk;
-+	/*
-+	 * The assembly function requires that the length of any non-last
-+	 * segment of associated data be a multiple of 16 bytes, so this
-+	 * function does the buffering needed to achieve that.
-+	 */
-+	unsigned int pos = 0;
-+	u8 buf[16];
-+
-+	memset(ghash_acc, 0, 16);
-+	scatterwalk_start(&walk, sg_src);
-+
-+	while (assoclen) {
-+		unsigned int len_this_page = scatterwalk_clamp(&walk, assoclen);
-+		void *mapped = scatterwalk_map(&walk);
-+		const void *src = mapped;
-+		unsigned int len;
-+
-+		assoclen -= len_this_page;
-+		scatterwalk_advance(&walk, len_this_page);
-+		if (unlikely(pos)) {
-+			len = min(len_this_page, 16 - pos);
-+			memcpy(&buf[pos], src, len);
-+			pos += len;
-+			src += len;
-+			len_this_page -= len;
-+			if (pos < 16)
-+				goto next;
-+			aes_gcm_aad_update(key, ghash_acc, buf, 16, flags);
-+			pos = 0;
-+		}
-+		len = len_this_page;
-+		if (unlikely(assoclen)) /* Not the last segment yet? */
-+			len = round_down(len, 16);
-+		aes_gcm_aad_update(key, ghash_acc, src, len, flags);
-+		src += len;
-+		len_this_page -= len;
-+		if (unlikely(len_this_page)) {
-+			memcpy(buf, src, len_this_page);
-+			pos = len_this_page;
-+		}
-+next:
-+		scatterwalk_unmap(mapped);
-+		scatterwalk_pagedone(&walk, 0, assoclen);
-+		if (need_resched()) {
-+			kernel_fpu_end();
-+			kernel_fpu_begin();
-+		}
-+	}
-+	if (unlikely(pos))
-+		aes_gcm_aad_update(key, ghash_acc, buf, pos, flags);
-+}
-+
-+
-+/* __always_inline to optimize out the branches based on @flags */
-+static __always_inline int
-+gcm_crypt(struct aead_request *req, int flags)
-+{
-+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-+	const struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
-+	unsigned int assoclen = req->assoclen;
-+	struct skcipher_walk walk;
-+	unsigned int nbytes;
-+	u8 ghash_acc[16]; /* GHASH accumulator */
-+	u32 le_ctr[4]; /* Counter in little-endian format */
-+	int taglen;
-+	int err;
-+
-+	/* Initialize the counter and determine the associated data length. */
-+	le_ctr[0] = 2;
-+	if (flags & FLAG_RFC4106) {
-+		if (unlikely(assoclen != 16 && assoclen != 20))
-+			return -EINVAL;
-+		assoclen -= 8;
-+		le_ctr[1] = get_unaligned_be32(req->iv + 4);
-+		le_ctr[2] = get_unaligned_be32(req->iv + 0);
-+		le_ctr[3] = key->rfc4106_nonce; /* already byte-swapped */
-+	} else {
-+		le_ctr[1] = get_unaligned_be32(req->iv + 8);
-+		le_ctr[2] = get_unaligned_be32(req->iv + 4);
-+		le_ctr[3] = get_unaligned_be32(req->iv + 0);
-+	}
-+
-+	/* Begin walking through the plaintext or ciphertext. */
-+	if (flags & FLAG_ENC)
-+		err = skcipher_walk_aead_encrypt(&walk, req, false);
-+	else
-+		err = skcipher_walk_aead_decrypt(&walk, req, false);
-+
-+	/*
-+	 * Since the AES-GCM assembly code requires that at least three assembly
-+	 * functions be called to process any message (this is needed to support
-+	 * incremental updates cleanly), to reduce overhead we try to do all
-+	 * three calls in the same kernel FPU section if possible.  We close the
-+	 * section and start a new one if there are multiple data segments or if
-+	 * rescheduling is needed while processing the associated data.
-+	 */
-+	kernel_fpu_begin();
-+
-+	/* Pass the associated data through GHASH. */
-+	gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags);
-+
-+	/* En/decrypt the data and pass the ciphertext through GHASH. */
-+	while ((nbytes = walk.nbytes) != 0) {
-+		if (unlikely(nbytes < walk.total)) {
-+			/*
-+			 * Non-last segment.  In this case, the assembly
-+			 * function requires that the length be a multiple of 16
-+			 * (AES_BLOCK_SIZE) bytes.  The needed buffering of up
-+			 * to 16 bytes is handled by the skcipher_walk.  Here we
-+			 * just need to round down to a multiple of 16.
-+			 */
-+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
-+			aes_gcm_update(key, le_ctr, ghash_acc,
-+				       walk.src.virt.addr, walk.dst.virt.addr,
-+				       nbytes, flags);
-+			le_ctr[0] += nbytes / AES_BLOCK_SIZE;
-+			kernel_fpu_end();
-+			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-+			kernel_fpu_begin();
-+		} else {
-+			/* Last segment: process all remaining data. */
-+			aes_gcm_update(key, le_ctr, ghash_acc,
-+				       walk.src.virt.addr, walk.dst.virt.addr,
-+				       nbytes, flags);
-+			err = skcipher_walk_done(&walk, 0);
-+			/*
-+			 * The low word of the counter isn't used by the
-+			 * finalize, so there's no need to increment it here.
-+			 */
-+		}
-+	}
-+	if (err)
-+		goto out;
-+
-+	/* Finalize */
-+	taglen = crypto_aead_authsize(tfm);
-+	if (flags & FLAG_ENC) {
-+		/* Finish computing the auth tag. */
-+		aes_gcm_enc_final(key, le_ctr, ghash_acc, assoclen,
-+				  req->cryptlen, flags);
-+
-+		/* Store the computed auth tag in the dst scatterlist. */
-+		scatterwalk_map_and_copy(ghash_acc, req->dst, req->assoclen +
-+					 req->cryptlen, taglen, 1);
-+	} else {
-+		unsigned int datalen = req->cryptlen - taglen;
-+		u8 tag[16];
-+
-+		/* Get the transmitted auth tag from the src scatterlist. */
-+		scatterwalk_map_and_copy(tag, req->src, req->assoclen + datalen,
-+					 taglen, 0);
-+		/*
-+		 * Finish computing the auth tag and compare it to the
-+		 * transmitted one.  The assembly function does the actual tag
-+		 * comparison.  Here, just check the boolean result.
-+		 */
-+		if (!aes_gcm_dec_final(key, le_ctr, ghash_acc, assoclen,
-+				       datalen, tag, taglen, flags))
-+			err = -EBADMSG;
-+	}
-+out:
-+	kernel_fpu_end();
-+	return err;
-+}
-+
-+#define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name,   \
-+			ctxsize, priority)				       \
-+									       \
-+static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key,     \
-+			       unsigned int keylen)			       \
-+{									       \
-+	return gcm_setkey(tfm, raw_key, keylen, (flags));		       \
-+}									       \
-+									       \
-+static int gcm_encrypt_##suffix(struct aead_request *req)		       \
-+{									       \
-+	return gcm_crypt(req, (flags) | FLAG_ENC);			       \
-+}									       \
-+									       \
-+static int gcm_decrypt_##suffix(struct aead_request *req)		       \
-+{									       \
-+	return gcm_crypt(req, (flags));					       \
-+}									       \
-+									       \
-+static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
-+				   unsigned int keylen)			       \
-+{									       \
-+	return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106);       \
-+}									       \
-+									       \
-+static int rfc4106_encrypt_##suffix(struct aead_request *req)		       \
-+{									       \
-+	return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC);	       \
-+}									       \
-+									       \
-+static int rfc4106_decrypt_##suffix(struct aead_request *req)		       \
-+{									       \
-+	return gcm_crypt(req, (flags) | FLAG_RFC4106);			       \
-+}									       \
-+									       \
-+static struct aead_alg aes_gcm_algs_##suffix[] = { {			       \
-+	.setkey			= gcm_setkey_##suffix,			       \
-+	.setauthsize		= generic_gcmaes_set_authsize,		       \
-+	.encrypt		= gcm_encrypt_##suffix,			       \
-+	.decrypt		= gcm_decrypt_##suffix,			       \
-+	.ivsize			= GCM_AES_IV_SIZE,			       \
-+	.chunksize		= AES_BLOCK_SIZE,			       \
-+	.maxauthsize		= 16,					       \
-+	.base = {							       \
-+		.cra_name		= "__gcm(aes)",			       \
-+		.cra_driver_name	= "__" generic_driver_name,	       \
-+		.cra_priority		= (priority),			       \
-+		.cra_flags		= CRYPTO_ALG_INTERNAL,		       \
-+		.cra_blocksize		= 1,				       \
-+		.cra_ctxsize		= (ctxsize),			       \
-+		.cra_module		= THIS_MODULE,			       \
-+	},								       \
-+}, {									       \
-+	.setkey			= rfc4106_setkey_##suffix,		       \
-+	.setauthsize		= common_rfc4106_set_authsize,		       \
-+	.encrypt		= rfc4106_encrypt_##suffix,		       \
-+	.decrypt		= rfc4106_decrypt_##suffix,		       \
-+	.ivsize			= GCM_RFC4106_IV_SIZE,			       \
-+	.chunksize		= AES_BLOCK_SIZE,			       \
-+	.maxauthsize		= 16,					       \
-+	.base = {							       \
-+		.cra_name		= "__rfc4106(gcm(aes))",	       \
-+		.cra_driver_name	= "__" rfc_driver_name,		       \
-+		.cra_priority		= (priority),			       \
-+		.cra_flags		= CRYPTO_ALG_INTERNAL,		       \
-+		.cra_blocksize		= 1,				       \
-+		.cra_ctxsize		= (ctxsize),			       \
-+		.cra_module		= THIS_MODULE,			       \
-+	},								       \
-+} };									       \
-+									       \
-+static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2]		       \
-+
-+/* aes_gcm_algs_aesni */
-+DEFINE_GCM_ALGS(aesni, /* no flags */ 0,
-+		"generic-gcm-aesni", "rfc4106-gcm-aesni",
-+		AES_GCM_KEY_AESNI_SIZE, 400);
-+
-+/* aes_gcm_algs_aesni_avx */
-+DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX,
-+		"generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
-+		AES_GCM_KEY_AESNI_SIZE, 500);
-+
-+#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
-+/* aes_gcm_algs_vaes_avx10_256 */
-+DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
-+		"generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
-+		AES_GCM_KEY_AVX10_SIZE, 700);
-+
-+/* aes_gcm_algs_vaes_avx10_512 */
-+DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
-+		"generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
-+		AES_GCM_KEY_AVX10_SIZE, 800);
-+#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
-+
- /*
-  * This is a list of CPU models that are known to suffer from downclocking when
-- * zmm registers (512-bit vectors) are used.  On these CPUs, the AES-XTS
-- * implementation with zmm registers won't be used by default.  An
-- * implementation with ymm registers (256-bit vectors) will be used instead.
-+ * zmm registers (512-bit vectors) are used.  On these CPUs, the AES mode
-+ * implementations with zmm registers won't be used by default.  Implementations
-+ * with ymm registers (256-bit vectors) will be used by default instead.
-  */
- static const struct x86_cpu_id zmm_exclusion_list[] = {
- 	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
-@@ -1236,7 +1559,7 @@ static const struct x86_cpu_id zmm_exclusion_list[] = {
- 	{},
- };
- 
--static int __init register_xts_algs(void)
-+static int __init register_avx_algs(void)
- {
- 	int err;
- 
-@@ -1246,6 +1569,11 @@ static int __init register_xts_algs(void)
- 					     &aes_xts_simdalg_aesni_avx);
- 	if (err)
- 		return err;
-+	err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx,
-+					 ARRAY_SIZE(aes_gcm_algs_aesni_avx),
-+					 aes_gcm_simdalgs_aesni_avx);
-+	if (err)
-+		return err;
- #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
- 	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- 	    !boot_cpu_has(X86_FEATURE_VAES) ||
-@@ -1269,23 +1597,42 @@ static int __init register_xts_algs(void)
- 					     &aes_xts_simdalg_vaes_avx10_256);
- 	if (err)
- 		return err;
-+	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
-+					 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
-+					 aes_gcm_simdalgs_vaes_avx10_256);
-+	if (err)
-+		return err;
-+
-+	if (x86_match_cpu(zmm_exclusion_list)) {
-+		int i;
- 
--	if (x86_match_cpu(zmm_exclusion_list))
- 		aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
-+		for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
-+			aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
-+	}
- 
- 	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1,
- 					     &aes_xts_simdalg_vaes_avx10_512);
- 	if (err)
- 		return err;
-+	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
-+					 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
-+					 aes_gcm_simdalgs_vaes_avx10_512);
-+	if (err)
-+		return err;
- #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
- 	return 0;
- }
- 
--static void unregister_xts_algs(void)
-+static void unregister_avx_algs(void)
- {
- 	if (aes_xts_simdalg_aesni_avx)
- 		simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1,
- 					  &aes_xts_simdalg_aesni_avx);
-+	if (aes_gcm_simdalgs_aesni_avx[0])
-+		simd_unregister_aeads(aes_gcm_algs_aesni_avx,
-+				      ARRAY_SIZE(aes_gcm_algs_aesni_avx),
-+				      aes_gcm_simdalgs_aesni_avx);
- #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
- 	if (aes_xts_simdalg_vaes_avx2)
- 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1,
-@@ -1293,106 +1640,33 @@ static void unregister_xts_algs(void)
- 	if (aes_xts_simdalg_vaes_avx10_256)
- 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
- 					  &aes_xts_simdalg_vaes_avx10_256);
-+	if (aes_gcm_simdalgs_vaes_avx10_256[0])
-+		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
-+				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
-+				      aes_gcm_simdalgs_vaes_avx10_256);
- 	if (aes_xts_simdalg_vaes_avx10_512)
- 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1,
- 					  &aes_xts_simdalg_vaes_avx10_512);
-+	if (aes_gcm_simdalgs_vaes_avx10_512[0])
-+		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
-+				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
-+				      aes_gcm_simdalgs_vaes_avx10_512);
- #endif
- }
- #else /* CONFIG_X86_64 */
--static int __init register_xts_algs(void)
-+static struct aead_alg aes_gcm_algs_aesni[0];
-+static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0];
-+
-+static int __init register_avx_algs(void)
- {
- 	return 0;
- }
- 
--static void unregister_xts_algs(void)
-+static void unregister_avx_algs(void)
- {
- }
- #endif /* !CONFIG_X86_64 */
- 
--#ifdef CONFIG_X86_64
--static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
--				  unsigned int key_len)
--{
--	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
--
--	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
--	       aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded,
--					  ctx->hash_subkey);
--}
--
--static int generic_gcmaes_encrypt(struct aead_request *req)
--{
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
--	void *aes_ctx = &(ctx->aes_key_expanded);
--	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
--	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
--	__be32 counter = cpu_to_be32(1);
--
--	memcpy(iv, req->iv, 12);
--	*((__be32 *)(iv+12)) = counter;
--
--	return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
--			      aes_ctx);
--}
--
--static int generic_gcmaes_decrypt(struct aead_request *req)
--{
--	__be32 counter = cpu_to_be32(1);
--	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
--	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
--	void *aes_ctx = &(ctx->aes_key_expanded);
--	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
--	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
--
--	memcpy(iv, req->iv, 12);
--	*((__be32 *)(iv+12)) = counter;
--
--	return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
--			      aes_ctx);
--}
--
--static struct aead_alg aesni_aeads[] = { {
--	.setkey			= common_rfc4106_set_key,
--	.setauthsize		= common_rfc4106_set_authsize,
--	.encrypt		= helper_rfc4106_encrypt,
--	.decrypt		= helper_rfc4106_decrypt,
--	.ivsize			= GCM_RFC4106_IV_SIZE,
--	.maxauthsize		= 16,
--	.base = {
--		.cra_name		= "__rfc4106(gcm(aes))",
--		.cra_driver_name	= "__rfc4106-gcm-aesni",
--		.cra_priority		= 400,
--		.cra_flags		= CRYPTO_ALG_INTERNAL,
--		.cra_blocksize		= 1,
--		.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx),
--		.cra_alignmask		= 0,
--		.cra_module		= THIS_MODULE,
--	},
--}, {
--	.setkey			= generic_gcmaes_set_key,
--	.setauthsize		= generic_gcmaes_set_authsize,
--	.encrypt		= generic_gcmaes_encrypt,
--	.decrypt		= generic_gcmaes_decrypt,
--	.ivsize			= GCM_AES_IV_SIZE,
--	.maxauthsize		= 16,
--	.base = {
--		.cra_name		= "__gcm(aes)",
--		.cra_driver_name	= "__generic-gcm-aesni",
--		.cra_priority		= 400,
--		.cra_flags		= CRYPTO_ALG_INTERNAL,
--		.cra_blocksize		= 1,
--		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
--		.cra_alignmask		= 0,
--		.cra_module		= THIS_MODULE,
--	},
--} };
--#else
--static struct aead_alg aesni_aeads[0];
--#endif
--
--static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
--
- static const struct x86_cpu_id aesni_cpu_id[] = {
- 	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
- 	{}
-@@ -1406,17 +1680,6 @@ static int __init aesni_init(void)
- 	if (!x86_match_cpu(aesni_cpu_id))
- 		return -ENODEV;
- #ifdef CONFIG_X86_64
--	if (boot_cpu_has(X86_FEATURE_AVX2)) {
--		pr_info("AVX2 version of gcm_enc/dec engaged.\n");
--		static_branch_enable(&gcm_use_avx);
--		static_branch_enable(&gcm_use_avx2);
--	} else
--	if (boot_cpu_has(X86_FEATURE_AVX)) {
--		pr_info("AVX version of gcm_enc/dec engaged.\n");
--		static_branch_enable(&gcm_use_avx);
--	} else {
--		pr_info("SSE version of gcm_enc/dec engaged.\n");
--	}
- 	if (boot_cpu_has(X86_FEATURE_AVX)) {
- 		/* optimize performance of ctr mode encryption transform */
- 		static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
-@@ -1434,8 +1697,9 @@ static int __init aesni_init(void)
- 	if (err)
- 		goto unregister_cipher;
- 
--	err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
--					 aesni_simd_aeads);
-+	err = simd_register_aeads_compat(aes_gcm_algs_aesni,
-+					 ARRAY_SIZE(aes_gcm_algs_aesni),
-+					 aes_gcm_simdalgs_aesni);
- 	if (err)
- 		goto unregister_skciphers;
- 
-@@ -1447,22 +1711,22 @@ static int __init aesni_init(void)
- 		goto unregister_aeads;
- #endif /* CONFIG_X86_64 */
- 
--	err = register_xts_algs();
-+	err = register_avx_algs();
- 	if (err)
--		goto unregister_xts;
-+		goto unregister_avx;
- 
- 	return 0;
- 
--unregister_xts:
--	unregister_xts_algs();
-+unregister_avx:
-+	unregister_avx_algs();
- #ifdef CONFIG_X86_64
- 	if (aesni_simd_xctr)
- 		simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
- unregister_aeads:
- #endif /* CONFIG_X86_64 */
--	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
--				aesni_simd_aeads);
--
-+	simd_unregister_aeads(aes_gcm_algs_aesni,
-+			      ARRAY_SIZE(aes_gcm_algs_aesni),
-+			      aes_gcm_simdalgs_aesni);
- unregister_skciphers:
- 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
- 				  aesni_simd_skciphers);
-@@ -1473,8 +1737,9 @@ static int __init aesni_init(void)
- 
- static void __exit aesni_exit(void)
- {
--	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
--			      aesni_simd_aeads);
-+	simd_unregister_aeads(aes_gcm_algs_aesni,
-+			      ARRAY_SIZE(aes_gcm_algs_aesni),
-+			      aes_gcm_simdalgs_aesni);
- 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
- 				  aesni_simd_skciphers);
- 	crypto_unregister_alg(&aesni_cipher_alg);
-@@ -1482,7 +1747,7 @@ static void __exit aesni_exit(void)
- 	if (boot_cpu_has(X86_FEATURE_AVX))
- 		simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
- #endif /* CONFIG_X86_64 */
--	unregister_xts_algs();
-+	unregister_avx_algs();
- }
- 
- late_initcall(aesni_init);
--- 
-2.46.0.rc1
-
-From 3a6187f4ef69fa4f0bf82ee5138e23bd83b85691 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:25:57 +0200
-Subject: [PATCH 06/11] fixes
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- arch/Kconfig | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/Kconfig b/arch/Kconfig
-index 975dd22a2dbd..de69b8f5b5be 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -1050,7 +1050,7 @@ config ARCH_MMAP_RND_BITS
- 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
- 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
- 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
--	default ARCH_MMAP_RND_BITS_MIN
-+	default ARCH_MMAP_RND_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
-@@ -1084,7 +1084,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
- 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
- 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
--	default ARCH_MMAP_RND_COMPAT_BITS_MIN
-+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
- 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
- 	help
- 	  This value can be used to select the number of bits to use to
--- 
-2.46.0.rc1
-
-From 33ec19c577f867fff299c3b0ed6d84f14cdc23ad Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:26:09 +0200
-Subject: [PATCH 07/11] ksm
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- arch/alpha/kernel/syscalls/syscall.tbl      |   3 +
- arch/arm/tools/syscall.tbl                  |   3 +
- arch/arm64/include/asm/unistd.h             |   2 +-
- arch/arm64/include/asm/unistd32.h           |   6 +
- arch/m68k/kernel/syscalls/syscall.tbl       |   3 +
- arch/microblaze/kernel/syscalls/syscall.tbl |   3 +
- arch/mips/kernel/syscalls/syscall_n32.tbl   |   3 +
- arch/mips/kernel/syscalls/syscall_n64.tbl   |   3 +
- arch/mips/kernel/syscalls/syscall_o32.tbl   |   3 +
- arch/parisc/kernel/syscalls/syscall.tbl     |   3 +
- arch/powerpc/kernel/syscalls/syscall.tbl    |   3 +
- arch/s390/kernel/syscalls/syscall.tbl       |   3 +
- arch/sh/kernel/syscalls/syscall.tbl         |   3 +
- arch/sparc/kernel/syscalls/syscall.tbl      |   3 +
- arch/x86/entry/syscalls/syscall_32.tbl      |   3 +
- arch/x86/entry/syscalls/syscall_64.tbl      |   3 +
- arch/xtensa/kernel/syscalls/syscall.tbl     |   3 +
- include/linux/syscalls.h                    |   3 +
- include/uapi/asm-generic/unistd.h           |  11 +-
- kernel/sys.c                                | 147 ++++++++++++++++++++
- kernel/sys_ni.c                             |   3 +
- 21 files changed, 215 insertions(+), 2 deletions(-)
-
-diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
-index 74720667fe09..e6a11f3c0a2e 100644
---- a/arch/alpha/kernel/syscalls/syscall.tbl
-+++ b/arch/alpha/kernel/syscalls/syscall.tbl
-@@ -502,3 +502,6 @@
- 570	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 571	common	lsm_list_modules		sys_lsm_list_modules
- 572	common  mseal				sys_mseal
-+573	common	process_ksm_enable		sys_process_ksm_enable
-+574	common	process_ksm_disable		sys_process_ksm_disable
-+575	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
-index 2ed7d229c8f9..3f59e9c5c1ff 100644
---- a/arch/arm/tools/syscall.tbl
-+++ b/arch/arm/tools/syscall.tbl
-@@ -476,3 +476,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
-index 1346579f802f..f3a77719eb05 100644
---- a/arch/arm64/include/asm/unistd.h
-+++ b/arch/arm64/include/asm/unistd.h
-@@ -39,7 +39,7 @@
- #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
- #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
- 
--#define __NR_compat_syscalls		463
-+#define __NR_compat_syscalls		466
- #endif
- 
- #define __ARCH_WANT_SYS_CLONE
-diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
-index 1386e8e751f2..ccdc523fa4bd 100644
---- a/arch/arm64/include/asm/unistd32.h
-+++ b/arch/arm64/include/asm/unistd32.h
-@@ -931,6 +931,12 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
- __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
- #define __NR_mseal 462
- __SYSCALL(__NR_mseal, sys_mseal)
-+#define __NR_process_ksm_enable 463
-+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
-+#define __NR_process_ksm_disable 464
-+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
-+#define __NR_process_ksm_status 465
-+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
- 
- /*
-  * Please add new compat syscalls above this comment and update
-diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
-index 22a3cbd4c602..12d2c7594bf0 100644
---- a/arch/m68k/kernel/syscalls/syscall.tbl
-+++ b/arch/m68k/kernel/syscalls/syscall.tbl
-@@ -462,3 +462,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
-index 2b81a6bd78b2..e2a93c856eed 100644
---- a/arch/microblaze/kernel/syscalls/syscall.tbl
-+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
-@@ -468,3 +468,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
-index 953f5b7dc723..b921fbf56fa6 100644
---- a/arch/mips/kernel/syscalls/syscall_n32.tbl
-+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
-@@ -401,3 +401,6 @@
- 460	n32	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	n32	lsm_list_modules		sys_lsm_list_modules
- 462	n32	mseal				sys_mseal
-+463	n32	process_ksm_enable		sys_process_ksm_enable
-+464	n32	process_ksm_disable		sys_process_ksm_disable
-+465	n32	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
-index 1464c6be6eb3..8d7f9ddd66f4 100644
---- a/arch/mips/kernel/syscalls/syscall_n64.tbl
-+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
-@@ -377,3 +377,6 @@
- 460	n64	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	n64	lsm_list_modules		sys_lsm_list_modules
- 462	n64	mseal				sys_mseal
-+463	n64	process_ksm_enable		sys_process_ksm_enable
-+464	n64	process_ksm_disable		sys_process_ksm_disable
-+465	n64	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
-index 2439a2491cff..9d6142739954 100644
---- a/arch/mips/kernel/syscalls/syscall_o32.tbl
-+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
-@@ -450,3 +450,6 @@
- 460	o32	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	o32	lsm_list_modules		sys_lsm_list_modules
- 462	o32	mseal				sys_mseal
-+463	o32	process_ksm_enable		sys_process_ksm_enable
-+464	o32	process_ksm_disable		sys_process_ksm_disable
-+465	o32	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
-index 66dc406b12e4..9d46476fd908 100644
---- a/arch/parisc/kernel/syscalls/syscall.tbl
-+++ b/arch/parisc/kernel/syscalls/syscall.tbl
-@@ -461,3 +461,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
-index ebae8415dfbb..16f71bc2f6f0 100644
---- a/arch/powerpc/kernel/syscalls/syscall.tbl
-+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
-@@ -553,3 +553,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
-index 01071182763e..7394bad8178e 100644
---- a/arch/s390/kernel/syscalls/syscall.tbl
-+++ b/arch/s390/kernel/syscalls/syscall.tbl
-@@ -465,3 +465,6 @@
- 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
- 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
- 462  common	mseal			sys_mseal			sys_mseal
-+463  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
-+464  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
-+465  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
-diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
-index c55fd7696d40..b9fc31221b87 100644
---- a/arch/sh/kernel/syscalls/syscall.tbl
-+++ b/arch/sh/kernel/syscalls/syscall.tbl
-@@ -466,3 +466,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
-index cfdfb3707c16..0d79fd772854 100644
---- a/arch/sparc/kernel/syscalls/syscall.tbl
-+++ b/arch/sparc/kernel/syscalls/syscall.tbl
-@@ -508,3 +508,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal 				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index d6ebcab1d8b2..ae5d147f05f2 100644
---- a/arch/x86/entry/syscalls/syscall_32.tbl
-+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -467,3 +467,6 @@
- 460	i386	lsm_set_self_attr	sys_lsm_set_self_attr
- 461	i386	lsm_list_modules	sys_lsm_list_modules
- 462	i386	mseal 			sys_mseal
-+463	i386	process_ksm_enable		sys_process_ksm_enable
-+464	i386	process_ksm_disable		sys_process_ksm_disable
-+465	i386	process_ksm_status		sys_process_ksm_status
-diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index a396f6e6ab5b..472c23b39a70 100644
---- a/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -384,6 +384,9 @@
- 460	common	lsm_set_self_attr	sys_lsm_set_self_attr
- 461	common	lsm_list_modules	sys_lsm_list_modules
- 462 	common  mseal			sys_mseal
-+463	common	process_ksm_enable	sys_process_ksm_enable
-+464	common	process_ksm_disable	sys_process_ksm_disable
-+465	common	process_ksm_status	sys_process_ksm_status
- 
- #
- # Due to a historical design error, certain syscalls are numbered differently
-diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
-index 67083fc1b2f5..c1aecee4ad9b 100644
---- a/arch/xtensa/kernel/syscalls/syscall.tbl
-+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
-@@ -433,3 +433,6 @@
- 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
- 461	common	lsm_list_modules		sys_lsm_list_modules
- 462	common	mseal 				sys_mseal
-+463	common	process_ksm_enable		sys_process_ksm_enable
-+464	common	process_ksm_disable		sys_process_ksm_disable
-+465	common	process_ksm_status		sys_process_ksm_status
-diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index fff820c3e93e..ab7d77ddc112 100644
---- a/include/linux/syscalls.h
-+++ b/include/linux/syscalls.h
-@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
- asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
- 			size_t vlen, int behavior, unsigned int flags);
- asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
-+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
-+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
-+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
- asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
- 			unsigned long prot, unsigned long pgoff,
- 			unsigned long flags);
-diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index d4cc26932ff4..d191548f6326 100644
---- a/include/uapi/asm-generic/unistd.h
-+++ b/include/uapi/asm-generic/unistd.h
-@@ -845,8 +845,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
- #define __NR_mseal 462
- __SYSCALL(__NR_mseal, sys_mseal)
- 
-+#define __NR_process_ksm_enable 463
-+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
-+
-+#define __NR_process_ksm_disable 464
-+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
-+
-+#define __NR_process_ksm_status 465
-+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
-+
- #undef __NR_syscalls
--#define __NR_syscalls 463
-+#define __NR_syscalls 466
- 
- /*
-  * 32 bit systems traditionally used different
-diff --git a/kernel/sys.c b/kernel/sys.c
-index 3a2df1bd9f64..86c6dd9d8c84 100644
---- a/kernel/sys.c
-+++ b/kernel/sys.c
-@@ -2789,6 +2789,153 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
- 	return error;
- }
- 
-+#ifdef CONFIG_KSM
-+enum pkc_action {
-+	PKSM_ENABLE = 0,
-+	PKSM_DISABLE,
-+	PKSM_STATUS,
-+};
-+
-+static long do_process_ksm_control(int pidfd, enum pkc_action action)
-+{
-+	long ret;
-+	struct pid *pid;
-+	struct task_struct *task;
-+	struct mm_struct *mm;
-+	unsigned int f_flags;
-+
-+	pid = pidfd_get_pid(pidfd, &f_flags);
-+	if (IS_ERR(pid)) {
-+		ret = PTR_ERR(pid);
-+		goto out;
-+	}
-+
-+	task = get_pid_task(pid, PIDTYPE_PID);
-+	if (!task) {
-+		ret = -ESRCH;
-+		goto put_pid;
-+	}
-+
-+	/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
-+	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
-+	if (IS_ERR_OR_NULL(mm)) {
-+		ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
-+		goto release_task;
-+	}
-+
-+	/* Require CAP_SYS_NICE for influencing process performance. */
-+	if (!capable(CAP_SYS_NICE)) {
-+		ret = -EPERM;
-+		goto release_mm;
-+	}
-+
-+	if (mmap_write_lock_killable(mm)) {
-+		ret = -EINTR;
-+		goto release_mm;
-+	}
-+
-+	switch (action) {
-+		case PKSM_ENABLE:
-+			ret = ksm_enable_merge_any(mm);
-+			break;
-+		case PKSM_DISABLE:
-+			ret = ksm_disable_merge_any(mm);
-+			break;
-+		case PKSM_STATUS:
-+			ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
-+			break;
-+	}
-+
-+	mmap_write_unlock(mm);
-+
-+release_mm:
-+	mmput(mm);
-+release_task:
-+	put_task_struct(task);
-+put_pid:
-+	put_pid(pid);
-+out:
-+	return ret;
-+}
-+#endif /* CONFIG_KSM */
-+
-+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
-+{
-+#ifdef CONFIG_KSM
-+	if (flags != 0)
-+		return -EINVAL;
-+
-+	return do_process_ksm_control(pidfd, PKSM_ENABLE);
-+#else /* CONFIG_KSM */
-+	return -ENOSYS;
-+#endif /* CONFIG_KSM */
-+}
-+
-+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
-+{
-+#ifdef CONFIG_KSM
-+	if (flags != 0)
-+		return -EINVAL;
-+
-+	return do_process_ksm_control(pidfd, PKSM_DISABLE);
-+#else /* CONFIG_KSM */
-+	return -ENOSYS;
-+#endif /* CONFIG_KSM */
-+}
-+
-+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
-+{
-+#ifdef CONFIG_KSM
-+	if (flags != 0)
-+		return -EINVAL;
-+
-+	return do_process_ksm_control(pidfd, PKSM_STATUS);
-+#else /* CONFIG_KSM */
-+	return -ENOSYS;
-+#endif /* CONFIG_KSM */
-+}
-+
-+#ifdef CONFIG_KSM
-+static ssize_t process_ksm_enable_show(struct kobject *kobj,
-+		struct kobj_attribute *attr, char *buf)
-+{
-+	return sprintf(buf, "%u\n", __NR_process_ksm_enable);
-+}
-+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
-+
-+static ssize_t process_ksm_disable_show(struct kobject *kobj,
-+		struct kobj_attribute *attr, char *buf)
-+{
-+	return sprintf(buf, "%u\n", __NR_process_ksm_disable);
-+}
-+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
-+
-+static ssize_t process_ksm_status_show(struct kobject *kobj,
-+		struct kobj_attribute *attr, char *buf)
-+{
-+	return sprintf(buf, "%u\n", __NR_process_ksm_status);
-+}
-+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
-+
-+static struct attribute *process_ksm_sysfs_attrs[] = {
-+	&process_ksm_enable_attr.attr,
-+	&process_ksm_disable_attr.attr,
-+	&process_ksm_status_attr.attr,
-+	NULL,
-+};
-+
-+static const struct attribute_group process_ksm_sysfs_attr_group = {
-+	.attrs = process_ksm_sysfs_attrs,
-+	.name = "process_ksm",
-+};
-+
-+static int __init process_ksm_sysfs_init(void)
-+{
-+	return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
-+}
-+subsys_initcall(process_ksm_sysfs_init);
-+#endif /* CONFIG_KSM */
-+
- SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
- 		struct getcpu_cache __user *, unused)
- {
-diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index b696b85ac63e..cf7f3d841b1e 100644
---- a/kernel/sys_ni.c
-+++ b/kernel/sys_ni.c
-@@ -188,6 +188,9 @@ COND_SYSCALL(mincore);
- COND_SYSCALL(madvise);
- COND_SYSCALL(process_madvise);
- COND_SYSCALL(process_mrelease);
-+COND_SYSCALL(process_ksm_enable);
-+COND_SYSCALL(process_ksm_disable);
-+COND_SYSCALL(process_ksm_status);
- COND_SYSCALL(remap_file_pages);
- COND_SYSCALL(mbind);
- COND_SYSCALL(get_mempolicy);
--- 
-2.46.0.rc1
-
-From d0a6d18c3ce077b9b944a383d001bc4a8b907006 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:26:39 +0200
-Subject: [PATCH 08/11] ntsync
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- Documentation/userspace-api/index.rst         |    1 +
- Documentation/userspace-api/ntsync.rst        |  398 +++++
- MAINTAINERS                                   |    9 +
- drivers/misc/Kconfig                          |    1 -
- drivers/misc/ntsync.c                         |  989 +++++++++++-
- include/uapi/linux/ntsync.h                   |   39 +
- tools/testing/selftests/Makefile              |    1 +
- .../selftests/drivers/ntsync/.gitignore       |    1 +
- .../testing/selftests/drivers/ntsync/Makefile |    7 +
- tools/testing/selftests/drivers/ntsync/config |    1 +
- .../testing/selftests/drivers/ntsync/ntsync.c | 1407 +++++++++++++++++
- 11 files changed, 2850 insertions(+), 4 deletions(-)
- create mode 100644 Documentation/userspace-api/ntsync.rst
- create mode 100644 tools/testing/selftests/drivers/ntsync/.gitignore
- create mode 100644 tools/testing/selftests/drivers/ntsync/Makefile
- create mode 100644 tools/testing/selftests/drivers/ntsync/config
- create mode 100644 tools/testing/selftests/drivers/ntsync/ntsync.c
-
-diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
-index 8a251d71fa6e..02bea81fb4bf 100644
---- a/Documentation/userspace-api/index.rst
-+++ b/Documentation/userspace-api/index.rst
-@@ -64,6 +64,7 @@ Everything else
-    vduse
-    futex2
-    perf_ring_buffer
-+   ntsync
- 
- .. only::  subproject and html
- 
-diff --git a/Documentation/userspace-api/ntsync.rst b/Documentation/userspace-api/ntsync.rst
-new file mode 100644
-index 000000000000..767844637a7d
---- /dev/null
-+++ b/Documentation/userspace-api/ntsync.rst
-@@ -0,0 +1,398 @@
-+===================================
-+NT synchronization primitive driver
-+===================================
-+
-+This page documents the user-space API for the ntsync driver.
-+
-+ntsync is a support driver for emulation of NT synchronization
-+primitives by user-space NT emulators. It exists because implementation
-+in user-space, using existing tools, cannot match Windows performance
-+while offering accurate semantics. It is implemented entirely in
-+software, and does not drive any hardware device.
-+
-+This interface is meant as a compatibility tool only, and should not
-+be used for general synchronization. Instead use generic, versatile
-+interfaces such as futex(2) and poll(2).
-+
-+Synchronization primitives
-+==========================
-+
-+The ntsync driver exposes three types of synchronization primitives:
-+semaphores, mutexes, and events.
-+
-+A semaphore holds a single volatile 32-bit counter, and a static 32-bit
-+integer denoting the maximum value. It is considered signaled (that is,
-+can be acquired without contention, or will wake up a waiting thread)
-+when the counter is nonzero. The counter is decremented by one when a
-+wait is satisfied. Both the initial and maximum count are established
-+when the semaphore is created.
-+
-+A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit
-+identifier denoting its owner. A mutex is considered signaled when its
-+owner is zero (indicating that it is not owned). The recursion count is
-+incremented when a wait is satisfied, and ownership is set to the given
-+identifier.
-+
-+A mutex also holds an internal flag denoting whether its previous owner
-+has died; such a mutex is said to be abandoned. Owner death is not
-+tracked automatically based on thread death, but rather must be
-+communicated using ``NTSYNC_IOC_MUTEX_KILL``. An abandoned mutex is
-+inherently considered unowned.
-+
-+Except for the "unowned" semantics of zero, the actual value of the
-+owner identifier is not interpreted by the ntsync driver at all. The
-+intended use is to store a thread identifier; however, the ntsync
-+driver does not actually validate that a calling thread provides
-+consistent or unique identifiers.
-+
-+An event is similar to a semaphore with a maximum count of one. It holds
-+a volatile boolean state denoting whether it is signaled or not. There
-+are two types of events, auto-reset and manual-reset. An auto-reset
-+event is designaled when a wait is satisfied; a manual-reset event is
-+not. The event type is specified when the event is created.
-+
-+Unless specified otherwise, all operations on an object are atomic and
-+totally ordered with respect to other operations on the same object.
-+
-+Objects are represented by files. When all file descriptors to an
-+object are closed, that object is deleted.
-+
-+Char device
-+===========
-+
-+The ntsync driver creates a single char device /dev/ntsync. Each file
-+description opened on the device represents a unique instance intended
-+to back an individual NT virtual machine. Objects created by one ntsync
-+instance may only be used with other objects created by the same
-+instance.
-+
-+ioctl reference
-+===============
-+
-+All operations on the device are done through ioctls. There are four
-+structures used in ioctl calls::
-+
-+   struct ntsync_sem_args {
-+   	__u32 sem;
-+   	__u32 count;
-+   	__u32 max;
-+   };
-+
-+   struct ntsync_mutex_args {
-+   	__u32 mutex;
-+   	__u32 owner;
-+   	__u32 count;
-+   };
-+
-+   struct ntsync_event_args {
-+   	__u32 event;
-+   	__u32 signaled;
-+   	__u32 manual;
-+   };
-+
-+   struct ntsync_wait_args {
-+   	__u64 timeout;
-+   	__u64 objs;
-+   	__u32 count;
-+   	__u32 owner;
-+   	__u32 index;
-+   	__u32 alert;
-+   	__u32 flags;
-+   	__u32 pad;
-+   };
-+
-+Depending on the ioctl, members of the structure may be used as input,
-+output, or not at all. All ioctls return 0 on success.
-+
-+The ioctls on the device file are as follows:
-+
-+.. c:macro:: NTSYNC_IOC_CREATE_SEM
-+
-+  Create a semaphore object. Takes a pointer to struct
-+  :c:type:`ntsync_sem_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``sem``
-+       - On output, contains a file descriptor to the created semaphore.
-+     * - ``count``
-+       - Initial count of the semaphore.
-+     * - ``max``
-+       - Maximum count of the semaphore.
-+
-+  Fails with ``EINVAL`` if ``count`` is greater than ``max``.
-+
-+.. c:macro:: NTSYNC_IOC_CREATE_MUTEX
-+
-+  Create a mutex object. Takes a pointer to struct
-+  :c:type:`ntsync_mutex_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``mutex``
-+       - On output, contains a file descriptor to the created mutex.
-+     * - ``count``
-+       - Initial recursion count of the mutex.
-+     * - ``owner``
-+       - Initial owner of the mutex.
-+
-+  If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is
-+  zero and ``count`` is nonzero, the function fails with ``EINVAL``.
-+
-+.. c:macro:: NTSYNC_IOC_CREATE_EVENT
-+
-+  Create an event object. Takes a pointer to struct
-+  :c:type:`ntsync_event_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``event``
-+       - On output, contains a file descriptor to the created event.
-+     * - ``signaled``
-+       - If nonzero, the event is initially signaled, otherwise
-+         nonsignaled.
-+     * - ``manual``
-+       - If nonzero, the event is a manual-reset event, otherwise
-+         auto-reset.
-+
-+The ioctls on the individual objects are as follows:
-+
-+.. c:macro:: NTSYNC_IOC_SEM_POST
-+
-+  Post to a semaphore object. Takes a pointer to a 32-bit integer,
-+  which on input holds the count to be added to the semaphore, and on
-+  output contains its previous count.
-+
-+  If adding to the semaphore's current count would raise the latter
-+  past the semaphore's maximum count, the ioctl fails with
-+  ``EOVERFLOW`` and the semaphore is not affected. If raising the
-+  semaphore's count causes it to become signaled, eligible threads
-+  waiting on this semaphore will be woken and the semaphore's count
-+  decremented appropriately.
-+
-+.. c:macro:: NTSYNC_IOC_MUTEX_UNLOCK
-+
-+  Release a mutex object. Takes a pointer to struct
-+  :c:type:`ntsync_mutex_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``mutex``
-+       - Ignored.
-+     * - ``owner``
-+       - Specifies the owner trying to release this mutex.
-+     * - ``count``
-+       - On output, contains the previous recursion count.
-+
-+  If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner``
-+  is not the current owner of the mutex, the ioctl fails with
-+  ``EPERM``.
-+
-+  The mutex's count will be decremented by one. If decrementing the
-+  mutex's count causes it to become zero, the mutex is marked as
-+  unowned and signaled, and eligible threads waiting on it will be
-+  woken as appropriate.
-+
-+.. c:macro:: NTSYNC_IOC_SET_EVENT
-+
-+  Signal an event object. Takes a pointer to a 32-bit integer, which on
-+  output contains the previous state of the event.
-+
-+  Eligible threads will be woken, and auto-reset events will be
-+  designaled appropriately.
-+
-+.. c:macro:: NTSYNC_IOC_RESET_EVENT
-+
-+  Designal an event object. Takes a pointer to a 32-bit integer, which
-+  on output contains the previous state of the event.
-+
-+.. c:macro:: NTSYNC_IOC_PULSE_EVENT
-+
-+  Wake threads waiting on an event object while leaving it in an
-+  unsignaled state. Takes a pointer to a 32-bit integer, which on
-+  output contains the previous state of the event.
-+
-+  A pulse operation can be thought of as a set followed by a reset,
-+  performed as a single atomic operation. If two threads are waiting on
-+  an auto-reset event which is pulsed, only one will be woken. If two
-+  threads are waiting a manual-reset event which is pulsed, both will
-+  be woken. However, in both cases, the event will be unsignaled
-+  afterwards, and a simultaneous read operation will always report the
-+  event as unsignaled.
-+
-+.. c:macro:: NTSYNC_IOC_READ_SEM
-+
-+  Read the current state of a semaphore object. Takes a pointer to
-+  struct :c:type:`ntsync_sem_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``sem``
-+       - Ignored.
-+     * - ``count``
-+       - On output, contains the current count of the semaphore.
-+     * - ``max``
-+       - On output, contains the maximum count of the semaphore.
-+
-+.. c:macro:: NTSYNC_IOC_READ_MUTEX
-+
-+  Read the current state of a mutex object. Takes a pointer to struct
-+  :c:type:`ntsync_mutex_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``mutex``
-+       - Ignored.
-+     * - ``owner``
-+       - On output, contains the current owner of the mutex, or zero
-+         if the mutex is not currently owned.
-+     * - ``count``
-+       - On output, contains the current recursion count of the mutex.
-+
-+  If the mutex is marked as abandoned, the function fails with
-+  ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to
-+  zero.
-+
-+.. c:macro:: NTSYNC_IOC_READ_EVENT
-+
-+  Read the current state of an event object. Takes a pointer to struct
-+  :c:type:`ntsync_event_args`, which is used as follows:
-+
-+  .. list-table::
-+
-+     * - ``event``
-+       - Ignored.
-+     * - ``signaled``
-+       - On output, contains the current state of the event.
-+     * - ``manual``
-+       - On output, contains 1 if the event is a manual-reset event,
-+         and 0 otherwise.
-+
-+.. c:macro:: NTSYNC_IOC_KILL_OWNER
-+
-+  Mark a mutex as unowned and abandoned if it is owned by the given
-+  owner. Takes an input-only pointer to a 32-bit integer denoting the
-+  owner. If the owner is zero, the ioctl fails with ``EINVAL``. If the
-+  owner does not own the mutex, the function fails with ``EPERM``.
-+
-+  Eligible threads waiting on the mutex will be woken as appropriate
-+  (and such waits will fail with ``EOWNERDEAD``, as described below).
-+
-+.. c:macro:: NTSYNC_IOC_WAIT_ANY
-+
-+  Poll on any of a list of objects, atomically acquiring at most one.
-+  Takes a pointer to struct :c:type:`ntsync_wait_args`, which is
-+  used as follows:
-+
-+  .. list-table::
-+
-+     * - ``timeout``
-+       - Absolute timeout in nanoseconds. If ``NTSYNC_WAIT_REALTIME``
-+         is set, the timeout is measured against the REALTIME clock;
-+         otherwise it is measured against the MONOTONIC clock. If the
-+         timeout is equal to or earlier than the current time, the
-+         function returns immediately without sleeping. If ``timeout``
-+         is U64_MAX, the function will sleep until an object is
-+         signaled, and will not fail with ``ETIMEDOUT``.
-+     * - ``objs``
-+       - Pointer to an array of ``count`` file descriptors
-+         (specified as an integer so that the structure has the same
-+         size regardless of architecture). If any object is
-+         invalid, the function fails with ``EINVAL``.
-+     * - ``count``
-+       - Number of objects specified in the ``objs`` array.
-+         If greater than ``NTSYNC_MAX_WAIT_COUNT``, the function fails
-+         with ``EINVAL``.
-+     * - ``owner``
-+       - Mutex owner identifier. If any object in ``objs`` is a mutex,
-+         the ioctl will attempt to acquire that mutex on behalf of
-+         ``owner``. If ``owner`` is zero, the ioctl fails with
-+         ``EINVAL``.
-+     * - ``index``
-+       - On success, contains the index (into ``objs``) of the object
-+         which was signaled. If ``alert`` was signaled instead,
-+         this contains ``count``.
-+     * - ``alert``
-+       - Optional event object file descriptor. If nonzero, this
-+         specifies an "alert" event object which, if signaled, will
-+         terminate the wait. If nonzero, the identifier must point to a
-+         valid event.
-+     * - ``flags``
-+       - Zero or more flags. Currently the only flag is
-+         ``NTSYNC_WAIT_REALTIME``, which causes the timeout to be
-+         measured against the REALTIME clock instead of MONOTONIC.
-+     * - ``pad``
-+       - Unused, must be set to zero.
-+
-+  This function attempts to acquire one of the given objects. If unable
-+  to do so, it sleeps until an object becomes signaled, subsequently
-+  acquiring it, or the timeout expires. In the latter case the ioctl
-+  fails with ``ETIMEDOUT``. The function only acquires one object, even
-+  if multiple objects are signaled.
-+
-+  A semaphore is considered to be signaled if its count is nonzero, and
-+  is acquired by decrementing its count by one. A mutex is considered
-+  to be signaled if it is unowned or if its owner matches the ``owner``
-+  argument, and is acquired by incrementing its recursion count by one
-+  and setting its owner to the ``owner`` argument. An auto-reset event
-+  is acquired by designaling it; a manual-reset event is not affected
-+  by acquisition.
-+
-+  Acquisition is atomic and totally ordered with respect to other
-+  operations on the same object. If two wait operations (with different
-+  ``owner`` identifiers) are queued on the same mutex, only one is
-+  signaled. If two wait operations are queued on the same semaphore,
-+  and a value of one is posted to it, only one is signaled.
-+
-+  If an abandoned mutex is acquired, the ioctl fails with
-+  ``EOWNERDEAD``. Although this is a failure return, the function may
-+  otherwise be considered successful. The mutex is marked as owned by
-+  the given owner (with a recursion count of 1) and as no longer
-+  abandoned, and ``index`` is still set to the index of the mutex.
-+
-+  The ``alert`` argument is an "extra" event which can terminate the
-+  wait, independently of all other objects.
-+
-+  It is valid to pass the same object more than once, including by
-+  passing the same event in the ``objs`` array and in ``alert``. If a
-+  wakeup occurs due to that object being signaled, ``index`` is set to
-+  the lowest index corresponding to that object.
-+
-+  The function may fail with ``EINTR`` if a signal is received.
-+
-+.. c:macro:: NTSYNC_IOC_WAIT_ALL
-+
-+  Poll on a list of objects, atomically acquiring all of them. Takes a
-+  pointer to struct :c:type:`ntsync_wait_args`, which is used
-+  identically to ``NTSYNC_IOC_WAIT_ANY``, except that ``index`` is
-+  always filled with zero on success if not woken via alert.
-+
-+  This function attempts to simultaneously acquire all of the given
-+  objects. If unable to do so, it sleeps until all objects become
-+  simultaneously signaled, subsequently acquiring them, or the timeout
-+  expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and no
-+  objects are modified.
-+
-+  Objects may become signaled and subsequently designaled (through
-+  acquisition by other threads) while this thread is sleeping. Only
-+  once all objects are simultaneously signaled does the ioctl acquire
-+  them and return. The entire acquisition is atomic and totally ordered
-+  with respect to other operations on any of the given objects.
-+
-+  If an abandoned mutex is acquired, the ioctl fails with
-+  ``EOWNERDEAD``. Similarly to ``NTSYNC_IOC_WAIT_ANY``, all objects are
-+  nevertheless marked as acquired. Note that if multiple mutex objects
-+  are specified, there is no way to know which were marked as
-+  abandoned.
-+
-+  As with "any" waits, the ``alert`` argument is an "extra" event which
-+  can terminate the wait. Critically, however, an "all" wait will
-+  succeed if all members in ``objs`` are signaled, *or* if ``alert`` is
-+  signaled. In the latter case ``index`` will be set to ``count``. As
-+  with "any" waits, if both conditions are filled, the former takes
-+  priority, and objects in ``objs`` will be acquired.
-+
-+  Unlike ``NTSYNC_IOC_WAIT_ANY``, it is not valid to pass the same
-+  object more than once, nor is it valid to pass the same object in
-+  ``objs`` and in ``alert``. If this is attempted, the function fails
-+  with ``EINVAL``.
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 958e935449e5..b25b2a731512 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -15976,6 +15976,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
- F:	Documentation/filesystems/ntfs3.rst
- F:	fs/ntfs3/
- 
-+NTSYNC SYNCHRONIZATION PRIMITIVE DRIVER
-+M:	Elizabeth Figura <zfigura@codeweavers.com>
-+L:	wine-devel@winehq.org
-+S:	Supported
-+F:	Documentation/userspace-api/ntsync.rst
-+F:	drivers/misc/ntsync.c
-+F:	include/uapi/linux/ntsync.h
-+F:	tools/testing/selftests/drivers/ntsync/
-+
- NUBUS SUBSYSTEM
- M:	Finn Thain <fthain@linux-m68k.org>
- L:	linux-m68k@lists.linux-m68k.org
-diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
-index faf983680040..2907b5c23368 100644
---- a/drivers/misc/Kconfig
-+++ b/drivers/misc/Kconfig
-@@ -507,7 +507,6 @@ config OPEN_DICE
- 
- config NTSYNC
- 	tristate "NT synchronization primitive emulation"
--	depends on BROKEN
- 	help
- 	  This module provides kernel support for emulation of Windows NT
- 	  synchronization primitives. It is not a hardware driver.
-diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
-index 3c2f743c58b0..87a24798a5c7 100644
---- a/drivers/misc/ntsync.c
-+++ b/drivers/misc/ntsync.c
-@@ -6,11 +6,17 @@
-  */
- 
- #include <linux/anon_inodes.h>
-+#include <linux/atomic.h>
- #include <linux/file.h>
- #include <linux/fs.h>
-+#include <linux/hrtimer.h>
-+#include <linux/ktime.h>
- #include <linux/miscdevice.h>
- #include <linux/module.h>
-+#include <linux/mutex.h>
- #include <linux/overflow.h>
-+#include <linux/sched.h>
-+#include <linux/sched/signal.h>
- #include <linux/slab.h>
- #include <linux/spinlock.h>
- #include <uapi/linux/ntsync.h>
-@@ -19,6 +25,8 @@
- 
- enum ntsync_type {
- 	NTSYNC_TYPE_SEM,
-+	NTSYNC_TYPE_MUTEX,
-+	NTSYNC_TYPE_EVENT,
- };
- 
- /*
-@@ -30,10 +38,13 @@ enum ntsync_type {
-  *
-  * Both rely on struct file for reference counting. Individual
-  * ntsync_obj objects take a reference to the device when created.
-+ * Wait operations take a reference to each object being waited on for
-+ * the duration of the wait.
-  */
- 
- struct ntsync_obj {
- 	spinlock_t lock;
-+	int dev_locked;
- 
- 	enum ntsync_type type;
- 
-@@ -46,13 +57,335 @@ struct ntsync_obj {
- 			__u32 count;
- 			__u32 max;
- 		} sem;
-+		struct {
-+			__u32 count;
-+			pid_t owner;
-+			bool ownerdead;
-+		} mutex;
-+		struct {
-+			bool manual;
-+			bool signaled;
-+		} event;
- 	} u;
-+
-+	/*
-+	 * any_waiters is protected by the object lock, but all_waiters is
-+	 * protected by the device wait_all_lock.
-+	 */
-+	struct list_head any_waiters;
-+	struct list_head all_waiters;
-+
-+	/*
-+	 * Hint describing how many tasks are queued on this object in a
-+	 * wait-all operation.
-+	 *
-+	 * Any time we do a wake, we may need to wake "all" waiters as well as
-+	 * "any" waiters. In order to atomically wake "all" waiters, we must
-+	 * lock all of the objects, and that means grabbing the wait_all_lock
-+	 * below (and, due to lock ordering rules, before locking this object).
-+	 * However, wait-all is a rare operation, and grabbing the wait-all
-+	 * lock for every wake would create unnecessary contention.
-+	 * Therefore we first check whether all_hint is zero, and, if it is,
-+	 * we skip trying to wake "all" waiters.
-+	 *
-+	 * Since wait requests must originate from user-space threads, we're
-+	 * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
-+	 */
-+	atomic_t all_hint;
-+};
-+
-+struct ntsync_q_entry {
-+	struct list_head node;
-+	struct ntsync_q *q;
-+	struct ntsync_obj *obj;
-+	__u32 index;
-+};
-+
-+struct ntsync_q {
-+	struct task_struct *task;
-+	__u32 owner;
-+
-+	/*
-+	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
-+	 * compare-and-swap may actually change object states and wake this
-+	 * task.
-+	 */
-+	atomic_t signaled;
-+
-+	bool all;
-+	bool ownerdead;
-+	__u32 count;
-+	struct ntsync_q_entry entries[];
- };
- 
- struct ntsync_device {
-+	/*
-+	 * Wait-all operations must atomically grab all objects, and be totally
-+	 * ordered with respect to each other and wait-any operations.
-+	 * If one thread is trying to acquire several objects, another thread
-+	 * cannot touch the object at the same time.
-+	 *
-+	 * This device-wide lock is used to serialize wait-for-all
-+	 * operations, and operations on an object that is involved in a
-+	 * wait-for-all.
-+	 */
-+	struct mutex wait_all_lock;
-+
- 	struct file *file;
- };
- 
-+/*
-+ * Single objects are locked using obj->lock.
-+ *
-+ * Multiple objects are 'locked' while holding dev->wait_all_lock.
-+ * In this case however, individual objects are not locked by holding
-+ * obj->lock, but by setting obj->dev_locked.
-+ *
-+ * This means that in order to lock a single object, the sequence is slightly
-+ * more complicated than usual. Specifically it needs to check obj->dev_locked
-+ * after acquiring obj->lock, if set, it needs to drop the lock and acquire
-+ * dev->wait_all_lock in order to serialize against the multi-object operation.
-+ */
-+
-+static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
-+{
-+	lockdep_assert_held(&dev->wait_all_lock);
-+	lockdep_assert(obj->dev == dev);
-+	spin_lock(&obj->lock);
-+	/*
-+	 * By setting obj->dev_locked inside obj->lock, it is ensured that
-+	 * anyone holding obj->lock must see the value.
-+	 */
-+	obj->dev_locked = 1;
-+	spin_unlock(&obj->lock);
-+}
-+
-+static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
-+{
-+	lockdep_assert_held(&dev->wait_all_lock);
-+	lockdep_assert(obj->dev == dev);
-+	spin_lock(&obj->lock);
-+	obj->dev_locked = 0;
-+	spin_unlock(&obj->lock);
-+}
-+
-+static void obj_lock(struct ntsync_obj *obj)
-+{
-+	struct ntsync_device *dev = obj->dev;
-+
-+	for (;;) {
-+		spin_lock(&obj->lock);
-+		if (likely(!obj->dev_locked))
-+			break;
-+
-+		spin_unlock(&obj->lock);
-+		mutex_lock(&dev->wait_all_lock);
-+		spin_lock(&obj->lock);
-+		/*
-+		 * obj->dev_locked should be set and released under the same
-+		 * wait_all_lock section, since we now own this lock, it should
-+		 * be clear.
-+		 */
-+		lockdep_assert(!obj->dev_locked);
-+		spin_unlock(&obj->lock);
-+		mutex_unlock(&dev->wait_all_lock);
-+	}
-+}
-+
-+static void obj_unlock(struct ntsync_obj *obj)
-+{
-+	spin_unlock(&obj->lock);
-+}
-+
-+static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
-+{
-+	bool all;
-+
-+	obj_lock(obj);
-+	all = atomic_read(&obj->all_hint);
-+	if (unlikely(all)) {
-+		obj_unlock(obj);
-+		mutex_lock(&dev->wait_all_lock);
-+		dev_lock_obj(dev, obj);
-+	}
-+
-+	return all;
-+}
-+
-+static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
-+{
-+	if (all) {
-+		dev_unlock_obj(dev, obj);
-+		mutex_unlock(&dev->wait_all_lock);
-+	} else {
-+		obj_unlock(obj);
-+	}
-+}
-+
-+#define ntsync_assert_held(obj) \
-+	lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
-+		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
-+			(obj)->dev_locked))
-+
-+static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
-+{
-+	ntsync_assert_held(obj);
-+
-+	switch (obj->type) {
-+	case NTSYNC_TYPE_SEM:
-+		return !!obj->u.sem.count;
-+	case NTSYNC_TYPE_MUTEX:
-+		if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
-+			return false;
-+		return obj->u.mutex.count < UINT_MAX;
-+	case NTSYNC_TYPE_EVENT:
-+		return obj->u.event.signaled;
-+	}
-+
-+	WARN(1, "bad object type %#x\n", obj->type);
-+	return false;
-+}
-+
-+/*
-+ * "locked_obj" is an optional pointer to an object which is already locked and
-+ * should not be locked again. This is necessary so that changing an object's
-+ * state and waking it can be a single atomic operation.
-+ */
-+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
-+			 struct ntsync_obj *locked_obj)
-+{
-+	__u32 count = q->count;
-+	bool can_wake = true;
-+	int signaled = -1;
-+	__u32 i;
-+
-+	lockdep_assert_held(&dev->wait_all_lock);
-+	if (locked_obj)
-+		lockdep_assert(locked_obj->dev_locked);
-+
-+	for (i = 0; i < count; i++) {
-+		if (q->entries[i].obj != locked_obj)
-+			dev_lock_obj(dev, q->entries[i].obj);
-+	}
-+
-+	for (i = 0; i < count; i++) {
-+		if (!is_signaled(q->entries[i].obj, q->owner)) {
-+			can_wake = false;
-+			break;
-+		}
-+	}
-+
-+	if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
-+		for (i = 0; i < count; i++) {
-+			struct ntsync_obj *obj = q->entries[i].obj;
-+
-+			switch (obj->type) {
-+			case NTSYNC_TYPE_SEM:
-+				obj->u.sem.count--;
-+				break;
-+			case NTSYNC_TYPE_MUTEX:
-+				if (obj->u.mutex.ownerdead)
-+					q->ownerdead = true;
-+				obj->u.mutex.ownerdead = false;
-+				obj->u.mutex.count++;
-+				obj->u.mutex.owner = q->owner;
-+				break;
-+			case NTSYNC_TYPE_EVENT:
-+				if (!obj->u.event.manual)
-+					obj->u.event.signaled = false;
-+				break;
-+			}
-+		}
-+		wake_up_process(q->task);
-+	}
-+
-+	for (i = 0; i < count; i++) {
-+		if (q->entries[i].obj != locked_obj)
-+			dev_unlock_obj(dev, q->entries[i].obj);
-+	}
-+}
-+
-+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
-+{
-+	struct ntsync_q_entry *entry;
-+
-+	lockdep_assert_held(&dev->wait_all_lock);
-+	lockdep_assert(obj->dev_locked);
-+
-+	list_for_each_entry(entry, &obj->all_waiters, node)
-+		try_wake_all(dev, entry->q, obj);
-+}
-+
-+static void try_wake_any_sem(struct ntsync_obj *sem)
-+{
-+	struct ntsync_q_entry *entry;
-+
-+	ntsync_assert_held(sem);
-+	lockdep_assert(sem->type == NTSYNC_TYPE_SEM);
-+
-+	list_for_each_entry(entry, &sem->any_waiters, node) {
-+		struct ntsync_q *q = entry->q;
-+		int signaled = -1;
-+
-+		if (!sem->u.sem.count)
-+			break;
-+
-+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
-+			sem->u.sem.count--;
-+			wake_up_process(q->task);
-+		}
-+	}
-+}
-+
-+static void try_wake_any_mutex(struct ntsync_obj *mutex)
-+{
-+	struct ntsync_q_entry *entry;
-+
-+	ntsync_assert_held(mutex);
-+	lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX);
-+
-+	list_for_each_entry(entry, &mutex->any_waiters, node) {
-+		struct ntsync_q *q = entry->q;
-+		int signaled = -1;
-+
-+		if (mutex->u.mutex.count == UINT_MAX)
-+			break;
-+		if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner)
-+			continue;
-+
-+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
-+			if (mutex->u.mutex.ownerdead)
-+				q->ownerdead = true;
-+			mutex->u.mutex.ownerdead = false;
-+			mutex->u.mutex.count++;
-+			mutex->u.mutex.owner = q->owner;
-+			wake_up_process(q->task);
-+		}
-+	}
-+}
-+
-+static void try_wake_any_event(struct ntsync_obj *event)
-+{
-+	struct ntsync_q_entry *entry;
-+
-+	ntsync_assert_held(event);
-+	lockdep_assert(event->type == NTSYNC_TYPE_EVENT);
-+
-+	list_for_each_entry(entry, &event->any_waiters, node) {
-+		struct ntsync_q *q = entry->q;
-+		int signaled = -1;
-+
-+		if (!event->u.event.signaled)
-+			break;
-+
-+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
-+			if (!event->u.event.manual)
-+				event->u.event.signaled = false;
-+			wake_up_process(q->task);
-+		}
-+	}
-+}
-+
- /*
-  * Actually change the semaphore state, returning -EOVERFLOW if it is made
-  * invalid.
-@@ -61,7 +394,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
- {
- 	__u32 sum;
- 
--	lockdep_assert_held(&sem->lock);
-+	ntsync_assert_held(sem);
- 
- 	if (check_add_overflow(sem->u.sem.count, count, &sum) ||
- 	    sum > sem->u.sem.max)
-@@ -73,9 +406,11 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
- 
- static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
- {
-+	struct ntsync_device *dev = sem->dev;
- 	__u32 __user *user_args = argp;
- 	__u32 prev_count;
- 	__u32 args;
-+	bool all;
- 	int ret;
- 
- 	if (copy_from_user(&args, argp, sizeof(args)))
-@@ -84,12 +419,17 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
- 	if (sem->type != NTSYNC_TYPE_SEM)
- 		return -EINVAL;
- 
--	spin_lock(&sem->lock);
-+	all = ntsync_lock_obj(dev, sem);
- 
- 	prev_count = sem->u.sem.count;
- 	ret = post_sem_state(sem, args);
-+	if (!ret) {
-+		if (all)
-+			try_wake_all_obj(dev, sem);
-+		try_wake_any_sem(sem);
-+	}
- 
--	spin_unlock(&sem->lock);
-+	ntsync_unlock_obj(dev, sem, all);
- 
- 	if (!ret && put_user(prev_count, user_args))
- 		ret = -EFAULT;
-@@ -97,6 +437,226 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
- 	return ret;
- }
- 
-+/*
-+ * Actually change the mutex state, returning -EPERM if not the owner.
-+ */
-+static int unlock_mutex_state(struct ntsync_obj *mutex,
-+			      const struct ntsync_mutex_args *args)
-+{
-+	ntsync_assert_held(mutex);
-+
-+	if (mutex->u.mutex.owner != args->owner)
-+		return -EPERM;
-+
-+	if (!--mutex->u.mutex.count)
-+		mutex->u.mutex.owner = 0;
-+	return 0;
-+}
-+
-+static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
-+{
-+	struct ntsync_mutex_args __user *user_args = argp;
-+	struct ntsync_device *dev = mutex->dev;
-+	struct ntsync_mutex_args args;
-+	__u32 prev_count;
-+	bool all;
-+	int ret;
-+
-+	if (copy_from_user(&args, argp, sizeof(args)))
-+		return -EFAULT;
-+	if (!args.owner)
-+		return -EINVAL;
-+
-+	if (mutex->type != NTSYNC_TYPE_MUTEX)
-+		return -EINVAL;
-+
-+	all = ntsync_lock_obj(dev, mutex);
-+
-+	prev_count = mutex->u.mutex.count;
-+	ret = unlock_mutex_state(mutex, &args);
-+	if (!ret) {
-+		if (all)
-+			try_wake_all_obj(dev, mutex);
-+		try_wake_any_mutex(mutex);
-+	}
-+
-+	ntsync_unlock_obj(dev, mutex, all);
-+
-+	if (!ret && put_user(prev_count, &user_args->count))
-+		ret = -EFAULT;
-+
-+	return ret;
-+}
-+
-+/*
-+ * Actually change the mutex state to mark its owner as dead,
-+ * returning -EPERM if not the owner.
-+ */
-+static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner)
-+{
-+	ntsync_assert_held(mutex);
-+
-+	if (mutex->u.mutex.owner != owner)
-+		return -EPERM;
-+
-+	mutex->u.mutex.ownerdead = true;
-+	mutex->u.mutex.owner = 0;
-+	mutex->u.mutex.count = 0;
-+	return 0;
-+}
-+
-+static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
-+{
-+	struct ntsync_device *dev = mutex->dev;
-+	__u32 owner;
-+	bool all;
-+	int ret;
-+
-+	if (get_user(owner, (__u32 __user *)argp))
-+		return -EFAULT;
-+	if (!owner)
-+		return -EINVAL;
-+
-+	if (mutex->type != NTSYNC_TYPE_MUTEX)
-+		return -EINVAL;
-+
-+	all = ntsync_lock_obj(dev, mutex);
-+
-+	ret = kill_mutex_state(mutex, owner);
-+	if (!ret) {
-+		if (all)
-+			try_wake_all_obj(dev, mutex);
-+		try_wake_any_mutex(mutex);
-+	}
-+
-+	ntsync_unlock_obj(dev, mutex, all);
-+
-+	return ret;
-+}
-+
-+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse)
-+{
-+	struct ntsync_device *dev = event->dev;
-+	__u32 prev_state;
-+	bool all;
-+
-+	if (event->type != NTSYNC_TYPE_EVENT)
-+		return -EINVAL;
-+
-+	all = ntsync_lock_obj(dev, event);
-+
-+	prev_state = event->u.event.signaled;
-+	event->u.event.signaled = true;
-+	if (all)
-+		try_wake_all_obj(dev, event);
-+	try_wake_any_event(event);
-+	if (pulse)
-+		event->u.event.signaled = false;
-+
-+	ntsync_unlock_obj(dev, event, all);
-+
-+	if (put_user(prev_state, (__u32 __user *)argp))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
-+{
-+	struct ntsync_device *dev = event->dev;
-+	__u32 prev_state;
-+	bool all;
-+
-+	if (event->type != NTSYNC_TYPE_EVENT)
-+		return -EINVAL;
-+
-+	all = ntsync_lock_obj(dev, event);
-+
-+	prev_state = event->u.event.signaled;
-+	event->u.event.signaled = false;
-+
-+	ntsync_unlock_obj(dev, event, all);
-+
-+	if (put_user(prev_state, (__u32 __user *)argp))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
-+{
-+	struct ntsync_sem_args __user *user_args = argp;
-+	struct ntsync_device *dev = sem->dev;
-+	struct ntsync_sem_args args;
-+	bool all;
-+
-+	if (sem->type != NTSYNC_TYPE_SEM)
-+		return -EINVAL;
-+
-+	args.sem = 0;
-+
-+	all = ntsync_lock_obj(dev, sem);
-+
-+	args.count = sem->u.sem.count;
-+	args.max = sem->u.sem.max;
-+
-+	ntsync_unlock_obj(dev, sem, all);
-+
-+	if (copy_to_user(user_args, &args, sizeof(args)))
-+		return -EFAULT;
-+	return 0;
-+}
-+
-+static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
-+{
-+	struct ntsync_mutex_args __user *user_args = argp;
-+	struct ntsync_device *dev = mutex->dev;
-+	struct ntsync_mutex_args args;
-+	bool all;
-+	int ret;
-+
-+	if (mutex->type != NTSYNC_TYPE_MUTEX)
-+		return -EINVAL;
-+
-+	args.mutex = 0;
-+
-+	all = ntsync_lock_obj(dev, mutex);
-+
-+	args.count = mutex->u.mutex.count;
-+	args.owner = mutex->u.mutex.owner;
-+	ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
-+
-+	ntsync_unlock_obj(dev, mutex, all);
-+
-+	if (copy_to_user(user_args, &args, sizeof(args)))
-+		return -EFAULT;
-+	return ret;
-+}
-+
-+static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
-+{
-+	struct ntsync_event_args __user *user_args = argp;
-+	struct ntsync_device *dev = event->dev;
-+	struct ntsync_event_args args;
-+	bool all;
-+
-+	if (event->type != NTSYNC_TYPE_EVENT)
-+		return -EINVAL;
-+
-+	args.event = 0;
-+
-+	all = ntsync_lock_obj(dev, event);
-+
-+	args.manual = event->u.event.manual;
-+	args.signaled = event->u.event.signaled;
-+
-+	ntsync_unlock_obj(dev, event, all);
-+
-+	if (copy_to_user(user_args, &args, sizeof(args)))
-+		return -EFAULT;
-+	return 0;
-+}
-+
- static int ntsync_obj_release(struct inode *inode, struct file *file)
- {
- 	struct ntsync_obj *obj = file->private_data;
-@@ -116,6 +676,22 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
- 	switch (cmd) {
- 	case NTSYNC_IOC_SEM_POST:
- 		return ntsync_sem_post(obj, argp);
-+	case NTSYNC_IOC_SEM_READ:
-+		return ntsync_sem_read(obj, argp);
-+	case NTSYNC_IOC_MUTEX_UNLOCK:
-+		return ntsync_mutex_unlock(obj, argp);
-+	case NTSYNC_IOC_MUTEX_KILL:
-+		return ntsync_mutex_kill(obj, argp);
-+	case NTSYNC_IOC_MUTEX_READ:
-+		return ntsync_mutex_read(obj, argp);
-+	case NTSYNC_IOC_EVENT_SET:
-+		return ntsync_event_set(obj, argp, false);
-+	case NTSYNC_IOC_EVENT_RESET:
-+		return ntsync_event_reset(obj, argp);
-+	case NTSYNC_IOC_EVENT_PULSE:
-+		return ntsync_event_set(obj, argp, true);
-+	case NTSYNC_IOC_EVENT_READ:
-+		return ntsync_event_read(obj, argp);
- 	default:
- 		return -ENOIOCTLCMD;
- 	}
-@@ -141,6 +717,9 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
- 	obj->dev = dev;
- 	get_file(dev->file);
- 	spin_lock_init(&obj->lock);
-+	INIT_LIST_HEAD(&obj->any_waiters);
-+	INIT_LIST_HEAD(&obj->all_waiters);
-+	atomic_set(&obj->all_hint, 0);
- 
- 	return obj;
- }
-@@ -191,6 +770,400 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
- 	return put_user(fd, &user_args->sem);
- }
- 
-+static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
-+{
-+	struct ntsync_mutex_args __user *user_args = argp;
-+	struct ntsync_mutex_args args;
-+	struct ntsync_obj *mutex;
-+	int fd;
-+
-+	if (copy_from_user(&args, argp, sizeof(args)))
-+		return -EFAULT;
-+
-+	if (!args.owner != !args.count)
-+		return -EINVAL;
-+
-+	mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX);
-+	if (!mutex)
-+		return -ENOMEM;
-+	mutex->u.mutex.count = args.count;
-+	mutex->u.mutex.owner = args.owner;
-+	fd = ntsync_obj_get_fd(mutex);
-+	if (fd < 0) {
-+		kfree(mutex);
-+		return fd;
-+	}
-+
-+	return put_user(fd, &user_args->mutex);
-+}
-+
-+static int ntsync_create_event(struct ntsync_device *dev, void __user *argp)
-+{
-+	struct ntsync_event_args __user *user_args = argp;
-+	struct ntsync_event_args args;
-+	struct ntsync_obj *event;
-+	int fd;
-+
-+	if (copy_from_user(&args, argp, sizeof(args)))
-+		return -EFAULT;
-+
-+	event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT);
-+	if (!event)
-+		return -ENOMEM;
-+	event->u.event.manual = args.manual;
-+	event->u.event.signaled = args.signaled;
-+	fd = ntsync_obj_get_fd(event);
-+	if (fd < 0) {
-+		kfree(event);
-+		return fd;
-+	}
-+
-+	return put_user(fd, &user_args->event);
-+}
-+
-+static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
-+{
-+	struct file *file = fget(fd);
-+	struct ntsync_obj *obj;
-+
-+	if (!file)
-+		return NULL;
-+
-+	if (file->f_op != &ntsync_obj_fops) {
-+		fput(file);
-+		return NULL;
-+	}
-+
-+	obj = file->private_data;
-+	if (obj->dev != dev) {
-+		fput(file);
-+		return NULL;
-+	}
-+
-+	return obj;
-+}
-+
-+static void put_obj(struct ntsync_obj *obj)
-+{
-+	fput(obj->file);
-+}
-+
-+static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
-+{
-+	ktime_t timeout = ns_to_ktime(args->timeout);
-+	clockid_t clock = CLOCK_MONOTONIC;
-+	ktime_t *timeout_ptr;
-+	int ret = 0;
-+
-+	timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
-+
-+	if (args->flags & NTSYNC_WAIT_REALTIME)
-+		clock = CLOCK_REALTIME;
-+
-+	do {
-+		if (signal_pending(current)) {
-+			ret = -ERESTARTSYS;
-+			break;
-+		}
-+
-+		set_current_state(TASK_INTERRUPTIBLE);
-+		if (atomic_read(&q->signaled) != -1) {
-+			ret = 0;
-+			break;
-+		}
-+		ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock);
-+	} while (ret < 0);
-+	__set_current_state(TASK_RUNNING);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Allocate and initialize the ntsync_q structure, but do not queue us yet.
-+ */
-+static int setup_wait(struct ntsync_device *dev,
-+		      const struct ntsync_wait_args *args, bool all,
-+		      struct ntsync_q **ret_q)
-+{
-+	int fds[NTSYNC_MAX_WAIT_COUNT + 1];
-+	const __u32 count = args->count;
-+	struct ntsync_q *q;
-+	__u32 total_count;
-+	__u32 i, j;
-+
-+	if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME))
-+		return -EINVAL;
-+
-+	if (args->count > NTSYNC_MAX_WAIT_COUNT)
-+		return -EINVAL;
-+
-+	total_count = count;
-+	if (args->alert)
-+		total_count++;
-+
-+	if (copy_from_user(fds, u64_to_user_ptr(args->objs),
-+			   array_size(count, sizeof(*fds))))
-+		return -EFAULT;
-+	if (args->alert)
-+		fds[count] = args->alert;
-+
-+	q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL);
-+	if (!q)
-+		return -ENOMEM;
-+	q->task = current;
-+	q->owner = args->owner;
-+	atomic_set(&q->signaled, -1);
-+	q->all = all;
-+	q->ownerdead = false;
-+	q->count = count;
-+
-+	for (i = 0; i < total_count; i++) {
-+		struct ntsync_q_entry *entry = &q->entries[i];
-+		struct ntsync_obj *obj = get_obj(dev, fds[i]);
-+
-+		if (!obj)
-+			goto err;
-+
-+		if (all) {
-+			/* Check that the objects are all distinct. */
-+			for (j = 0; j < i; j++) {
-+				if (obj == q->entries[j].obj) {
-+					put_obj(obj);
-+					goto err;
-+				}
-+			}
-+		}
-+
-+		entry->obj = obj;
-+		entry->q = q;
-+		entry->index = i;
-+	}
-+
-+	*ret_q = q;
-+	return 0;
-+
-+err:
-+	for (j = 0; j < i; j++)
-+		put_obj(q->entries[j].obj);
-+	kfree(q);
-+	return -EINVAL;
-+}
-+
-+static void try_wake_any_obj(struct ntsync_obj *obj)
-+{
-+	switch (obj->type) {
-+	case NTSYNC_TYPE_SEM:
-+		try_wake_any_sem(obj);
-+		break;
-+	case NTSYNC_TYPE_MUTEX:
-+		try_wake_any_mutex(obj);
-+		break;
-+	case NTSYNC_TYPE_EVENT:
-+		try_wake_any_event(obj);
-+		break;
-+	}
-+}
-+
-+static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
-+{
-+	struct ntsync_wait_args args;
-+	__u32 i, total_count;
-+	struct ntsync_q *q;
-+	int signaled;
-+	bool all;
-+	int ret;
-+
-+	if (copy_from_user(&args, argp, sizeof(args)))
-+		return -EFAULT;
-+
-+	ret = setup_wait(dev, &args, false, &q);
-+	if (ret < 0)
-+		return ret;
-+
-+	total_count = args.count;
-+	if (args.alert)
-+		total_count++;
-+
-+	/* queue ourselves */
-+
-+	for (i = 0; i < total_count; i++) {
-+		struct ntsync_q_entry *entry = &q->entries[i];
-+		struct ntsync_obj *obj = entry->obj;
-+
-+		all = ntsync_lock_obj(dev, obj);
-+		list_add_tail(&entry->node, &obj->any_waiters);
-+		ntsync_unlock_obj(dev, obj, all);
-+	}
-+
-+	/*
-+	 * Check if we are already signaled.
-+	 *
-+	 * Note that the API requires that normal objects are checked before
-+	 * the alert event. Hence we queue the alert event last, and check
-+	 * objects in order.
-+	 */
-+
-+	for (i = 0; i < total_count; i++) {
-+		struct ntsync_obj *obj = q->entries[i].obj;
-+
-+		if (atomic_read(&q->signaled) != -1)
-+			break;
-+
-+		all = ntsync_lock_obj(dev, obj);
-+		try_wake_any_obj(obj);
-+		ntsync_unlock_obj(dev, obj, all);
-+	}
-+
-+	/* sleep */
-+
-+	ret = ntsync_schedule(q, &args);
-+
-+	/* and finally, unqueue */
-+
-+	for (i = 0; i < total_count; i++) {
-+		struct ntsync_q_entry *entry = &q->entries[i];
-+		struct ntsync_obj *obj = entry->obj;
-+
-+		all = ntsync_lock_obj(dev, obj);
-+		list_del(&entry->node);
-+		ntsync_unlock_obj(dev, obj, all);
-+
-+		put_obj(obj);
-+	}
-+
-+	signaled = atomic_read(&q->signaled);
-+	if (signaled != -1) {
-+		struct ntsync_wait_args __user *user_args = argp;
-+
-+		/* even if we caught a signal, we need to communicate success */
-+		ret = q->ownerdead ? -EOWNERDEAD : 0;
-+
-+		if (put_user(signaled, &user_args->index))
-+			ret = -EFAULT;
-+	} else if (!ret) {
-+		ret = -ETIMEDOUT;
-+	}
-+
-+	kfree(q);
-+	return ret;
-+}
-+
-+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
-+{
-+	struct ntsync_wait_args args;
-+	struct ntsync_q *q;
-+	int signaled;
-+	__u32 i;
-+	int ret;
-+
-+	if (copy_from_user(&args, argp, sizeof(args)))
-+		return -EFAULT;
-+
-+	ret = setup_wait(dev, &args, true, &q);
-+	if (ret < 0)
-+		return ret;
-+
-+	/* queue ourselves */
-+
-+	mutex_lock(&dev->wait_all_lock);
-+
-+	for (i = 0; i < args.count; i++) {
-+		struct ntsync_q_entry *entry = &q->entries[i];
-+		struct ntsync_obj *obj = entry->obj;
-+
-+		atomic_inc(&obj->all_hint);
-+
-+		/*
-+		 * obj->all_waiters is protected by dev->wait_all_lock rather
-+		 * than obj->lock, so there is no need to acquire obj->lock
-+		 * here.
-+		 */
-+		list_add_tail(&entry->node, &obj->all_waiters);
-+	}
-+	if (args.alert) {
-+		struct ntsync_q_entry *entry = &q->entries[args.count];
-+		struct ntsync_obj *obj = entry->obj;
-+
-+		dev_lock_obj(dev, obj);
-+		list_add_tail(&entry->node, &obj->any_waiters);
-+		dev_unlock_obj(dev, obj);
-+	}
-+
-+	/* check if we are already signaled */
-+
-+	try_wake_all(dev, q, NULL);
-+
-+	mutex_unlock(&dev->wait_all_lock);
-+
-+	/*
-+	 * Check if the alert event is signaled, making sure to do so only
-+	 * after checking if the other objects are signaled.
-+	 */
-+
-+	if (args.alert) {
-+		struct ntsync_obj *obj = q->entries[args.count].obj;
-+
-+		if (atomic_read(&q->signaled) == -1) {
-+			bool all = ntsync_lock_obj(dev, obj);
-+			try_wake_any_obj(obj);
-+			ntsync_unlock_obj(dev, obj, all);
-+		}
-+	}
-+
-+	/* sleep */
-+
-+	ret = ntsync_schedule(q, &args);
-+
-+	/* and finally, unqueue */
-+
-+	mutex_lock(&dev->wait_all_lock);
-+
-+	for (i = 0; i < args.count; i++) {
-+		struct ntsync_q_entry *entry = &q->entries[i];
-+		struct ntsync_obj *obj = entry->obj;
-+
-+		/*
-+		 * obj->all_waiters is protected by dev->wait_all_lock rather
-+		 * than obj->lock, so there is no need to acquire it here.
-+		 */
-+		list_del(&entry->node);
-+
-+		atomic_dec(&obj->all_hint);
-+
-+		put_obj(obj);
-+	}
-+
-+	mutex_unlock(&dev->wait_all_lock);
-+
-+	if (args.alert) {
-+		struct ntsync_q_entry *entry = &q->entries[args.count];
-+		struct ntsync_obj *obj = entry->obj;
-+		bool all;
-+
-+		all = ntsync_lock_obj(dev, obj);
-+		list_del(&entry->node);
-+		ntsync_unlock_obj(dev, obj, all);
-+
-+		put_obj(obj);
-+	}
-+
-+	signaled = atomic_read(&q->signaled);
-+	if (signaled != -1) {
-+		struct ntsync_wait_args __user *user_args = argp;
-+
-+		/* even if we caught a signal, we need to communicate success */
-+		ret = q->ownerdead ? -EOWNERDEAD : 0;
-+
-+		if (put_user(signaled, &user_args->index))
-+			ret = -EFAULT;
-+	} else if (!ret) {
-+		ret = -ETIMEDOUT;
-+	}
-+
-+	kfree(q);
-+	return ret;
-+}
-+
- static int ntsync_char_open(struct inode *inode, struct file *file)
- {
- 	struct ntsync_device *dev;
-@@ -199,6 +1172,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file)
- 	if (!dev)
- 		return -ENOMEM;
- 
-+	mutex_init(&dev->wait_all_lock);
-+
- 	file->private_data = dev;
- 	dev->file = file;
- 	return nonseekable_open(inode, file);
-@@ -220,8 +1195,16 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
- 	void __user *argp = (void __user *)parm;
- 
- 	switch (cmd) {
-+	case NTSYNC_IOC_CREATE_EVENT:
-+		return ntsync_create_event(dev, argp);
-+	case NTSYNC_IOC_CREATE_MUTEX:
-+		return ntsync_create_mutex(dev, argp);
- 	case NTSYNC_IOC_CREATE_SEM:
- 		return ntsync_create_sem(dev, argp);
-+	case NTSYNC_IOC_WAIT_ALL:
-+		return ntsync_wait_all(dev, argp);
-+	case NTSYNC_IOC_WAIT_ANY:
-+		return ntsync_wait_any(dev, argp);
- 	default:
- 		return -ENOIOCTLCMD;
- 	}
-diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
-index dcfa38fdc93c..4a8095a3fc34 100644
---- a/include/uapi/linux/ntsync.h
-+++ b/include/uapi/linux/ntsync.h
-@@ -16,8 +16,47 @@ struct ntsync_sem_args {
- 	__u32 max;
- };
- 
-+struct ntsync_mutex_args {
-+	__u32 mutex;
-+	__u32 owner;
-+	__u32 count;
-+};
-+
-+struct ntsync_event_args {
-+	__u32 event;
-+	__u32 manual;
-+	__u32 signaled;
-+};
-+
-+#define NTSYNC_WAIT_REALTIME	0x1
-+
-+struct ntsync_wait_args {
-+	__u64 timeout;
-+	__u64 objs;
-+	__u32 count;
-+	__u32 index;
-+	__u32 flags;
-+	__u32 owner;
-+	__u32 alert;
-+	__u32 pad;
-+};
-+
-+#define NTSYNC_MAX_WAIT_COUNT 64
-+
- #define NTSYNC_IOC_CREATE_SEM		_IOWR('N', 0x80, struct ntsync_sem_args)
-+#define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
-+#define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
-+#define NTSYNC_IOC_CREATE_MUTEX		_IOWR('N', 0x84, struct ntsync_sem_args)
-+#define NTSYNC_IOC_CREATE_EVENT		_IOWR('N', 0x87, struct ntsync_event_args)
- 
- #define NTSYNC_IOC_SEM_POST		_IOWR('N', 0x81, __u32)
-+#define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
-+#define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
-+#define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
-+#define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
-+#define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
-+#define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
-+#define NTSYNC_IOC_MUTEX_READ		_IOR ('N', 0x8c, struct ntsync_mutex_args)
-+#define NTSYNC_IOC_EVENT_READ		_IOR ('N', 0x8d, struct ntsync_event_args)
- 
- #endif
-diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
-index 9039f3709aff..d5aeaa8fe3ca 100644
---- a/tools/testing/selftests/Makefile
-+++ b/tools/testing/selftests/Makefile
-@@ -16,6 +16,7 @@ TARGETS += damon
- TARGETS += devices
- TARGETS += dmabuf-heaps
- TARGETS += drivers/dma-buf
-+TARGETS += drivers/ntsync
- TARGETS += drivers/s390x/uvdevice
- TARGETS += drivers/net
- TARGETS += drivers/net/bonding
-diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
-new file mode 100644
-index 000000000000..848573a3d3ea
---- /dev/null
-+++ b/tools/testing/selftests/drivers/ntsync/.gitignore
-@@ -0,0 +1 @@
-+ntsync
-diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
-new file mode 100644
-index 000000000000..dbf2b055c0b2
---- /dev/null
-+++ b/tools/testing/selftests/drivers/ntsync/Makefile
-@@ -0,0 +1,7 @@
-+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
-+TEST_GEN_PROGS := ntsync
-+
-+CFLAGS += $(KHDR_INCLUDES)
-+LDLIBS += -lpthread
-+
-+include ../../lib.mk
-diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
-new file mode 100644
-index 000000000000..60539c826d06
---- /dev/null
-+++ b/tools/testing/selftests/drivers/ntsync/config
-@@ -0,0 +1 @@
-+CONFIG_WINESYNC=y
-diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
-new file mode 100644
-index 000000000000..5fa2c9a0768c
---- /dev/null
-+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
-@@ -0,0 +1,1407 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Various unit tests for the "ntsync" synchronization primitive driver.
-+ *
-+ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
-+ */
-+
-+#define _GNU_SOURCE
-+#include <sys/ioctl.h>
-+#include <sys/stat.h>
-+#include <fcntl.h>
-+#include <time.h>
-+#include <pthread.h>
-+#include <linux/ntsync.h>
-+#include "../../kselftest_harness.h"
-+
-+static int read_sem_state(int sem, __u32 *count, __u32 *max)
-+{
-+	struct ntsync_sem_args args;
-+	int ret;
-+
-+	memset(&args, 0xcc, sizeof(args));
-+	ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
-+	*count = args.count;
-+	*max = args.max;
-+	return ret;
-+}
-+
-+#define check_sem_state(sem, count, max) \
-+	({ \
-+		__u32 __count, __max; \
-+		int ret = read_sem_state((sem), &__count, &__max); \
-+		EXPECT_EQ(0, ret); \
-+		EXPECT_EQ((count), __count); \
-+		EXPECT_EQ((max), __max); \
-+	})
-+
-+static int post_sem(int sem, __u32 *count)
-+{
-+	return ioctl(sem, NTSYNC_IOC_SEM_POST, count);
-+}
-+
-+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
-+{
-+	struct ntsync_mutex_args args;
-+	int ret;
-+
-+	memset(&args, 0xcc, sizeof(args));
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
-+	*count = args.count;
-+	*owner = args.owner;
-+	return ret;
-+}
-+
-+#define check_mutex_state(mutex, count, owner) \
-+	({ \
-+		__u32 __count, __owner; \
-+		int ret = read_mutex_state((mutex), &__count, &__owner); \
-+		EXPECT_EQ(0, ret); \
-+		EXPECT_EQ((count), __count); \
-+		EXPECT_EQ((owner), __owner); \
-+	})
-+
-+static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
-+{
-+	struct ntsync_mutex_args args;
-+	int ret;
-+
-+	args.owner = owner;
-+	args.count = 0xdeadbeef;
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
-+	*count = args.count;
-+	return ret;
-+}
-+
-+static int read_event_state(int event, __u32 *signaled, __u32 *manual)
-+{
-+	struct ntsync_event_args args;
-+	int ret;
-+
-+	memset(&args, 0xcc, sizeof(args));
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
-+	*signaled = args.signaled;
-+	*manual = args.manual;
-+	return ret;
-+}
-+
-+#define check_event_state(event, signaled, manual) \
-+	({ \
-+		__u32 __signaled, __manual; \
-+		int ret = read_event_state((event), &__signaled, &__manual); \
-+		EXPECT_EQ(0, ret); \
-+		EXPECT_EQ((signaled), __signaled); \
-+		EXPECT_EQ((manual), __manual); \
-+	})
-+
-+static int wait_objs(int fd, unsigned long request, __u32 count,
-+		     const int *objs, __u32 owner, int alert, __u32 *index)
-+{
-+	struct ntsync_wait_args args = {0};
-+	struct timespec timeout;
-+	int ret;
-+
-+	clock_gettime(CLOCK_MONOTONIC, &timeout);
-+
-+	args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
-+	args.count = count;
-+	args.objs = (uintptr_t)objs;
-+	args.owner = owner;
-+	args.index = 0xdeadbeef;
-+	args.alert = alert;
-+	ret = ioctl(fd, request, &args);
-+	*index = args.index;
-+	return ret;
-+}
-+
-+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
-+{
-+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
-+}
-+
-+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
-+{
-+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
-+}
-+
-+static int wait_any_alert(int fd, __u32 count, const int *objs,
-+			  __u32 owner, int alert, __u32 *index)
-+{
-+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
-+			 count, objs, owner, alert, index);
-+}
-+
-+static int wait_all_alert(int fd, __u32 count, const int *objs,
-+			  __u32 owner, int alert, __u32 *index)
-+{
-+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
-+			 count, objs, owner, alert, index);
-+}
-+
-+TEST(semaphore_state)
-+{
-+	struct ntsync_sem_args sem_args;
-+	struct timespec timeout;
-+	__u32 count, index;
-+	int fd, ret, sem;
-+
-+	clock_gettime(CLOCK_MONOTONIC, &timeout);
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 3;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	sem_args.count = 2;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+	sem = sem_args.sem;
-+	check_sem_state(sem, 2, 2);
-+
-+	count = 0;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, count);
-+	check_sem_state(sem, 2, 2);
-+
-+	count = 1;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOVERFLOW, errno);
-+	check_sem_state(sem, 2, 2);
-+
-+	ret = wait_any(fd, 1, &sem, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem, 1, 2);
-+
-+	ret = wait_any(fd, 1, &sem, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem, 0, 2);
-+
-+	ret = wait_any(fd, 1, &sem, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	count = 3;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOVERFLOW, errno);
-+	check_sem_state(sem, 0, 2);
-+
-+	count = 2;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+	check_sem_state(sem, 2, 2);
-+
-+	ret = wait_any(fd, 1, &sem, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	ret = wait_any(fd, 1, &sem, 123, &index);
-+	EXPECT_EQ(0, ret);
-+
-+	count = 1;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+	check_sem_state(sem, 1, 2);
-+
-+	count = ~0u;
-+	ret = post_sem(sem, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOVERFLOW, errno);
-+	check_sem_state(sem, 1, 2);
-+
-+	close(sem);
-+
-+	close(fd);
-+}
-+
-+TEST(mutex_state)
-+{
-+	struct ntsync_mutex_args mutex_args;
-+	__u32 owner, count, index;
-+	struct timespec timeout;
-+	int fd, ret, mutex;
-+
-+	clock_gettime(CLOCK_MONOTONIC, &timeout);
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	mutex_args.owner = 123;
-+	mutex_args.count = 0;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	mutex_args.owner = 0;
-+	mutex_args.count = 2;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	mutex_args.owner = 123;
-+	mutex_args.count = 2;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+	mutex = mutex_args.mutex;
-+	check_mutex_state(mutex, 2, 123);
-+
-+	ret = unlock_mutex(mutex, 0, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	ret = unlock_mutex(mutex, 456, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EPERM, errno);
-+	check_mutex_state(mutex, 2, 123);
-+
-+	ret = unlock_mutex(mutex, 123, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, count);
-+	check_mutex_state(mutex, 1, 123);
-+
-+	ret = unlock_mutex(mutex, 123, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, count);
-+	check_mutex_state(mutex, 0, 0);
-+
-+	ret = unlock_mutex(mutex, 123, &count);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EPERM, errno);
-+
-+	ret = wait_any(fd, 1, &mutex, 456, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_mutex_state(mutex, 1, 456);
-+
-+	ret = wait_any(fd, 1, &mutex, 456, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_mutex_state(mutex, 2, 456);
-+
-+	ret = unlock_mutex(mutex, 456, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, count);
-+	check_mutex_state(mutex, 1, 456);
-+
-+	ret = wait_any(fd, 1, &mutex, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	owner = 0;
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	owner = 123;
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EPERM, errno);
-+	check_mutex_state(mutex, 1, 456);
-+
-+	owner = 456;
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(0, ret);
-+
-+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(0, mutex_args.count);
-+	EXPECT_EQ(0, mutex_args.owner);
-+
-+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(0, mutex_args.count);
-+	EXPECT_EQ(0, mutex_args.owner);
-+
-+	ret = wait_any(fd, 1, &mutex, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(0, index);
-+	check_mutex_state(mutex, 1, 123);
-+
-+	owner = 123;
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(0, ret);
-+
-+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
-+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(0, mutex_args.count);
-+	EXPECT_EQ(0, mutex_args.owner);
-+
-+	ret = wait_any(fd, 1, &mutex, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(0, index);
-+	check_mutex_state(mutex, 1, 123);
-+
-+	close(mutex);
-+
-+	mutex_args.owner = 0;
-+	mutex_args.count = 0;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+	mutex = mutex_args.mutex;
-+	check_mutex_state(mutex, 0, 0);
-+
-+	ret = wait_any(fd, 1, &mutex, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_mutex_state(mutex, 1, 123);
-+
-+	close(mutex);
-+
-+	mutex_args.owner = 123;
-+	mutex_args.count = ~0u;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+	mutex = mutex_args.mutex;
-+	check_mutex_state(mutex, ~0u, 123);
-+
-+	ret = wait_any(fd, 1, &mutex, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	close(mutex);
-+
-+	close(fd);
-+}
-+
-+TEST(manual_event_state)
-+{
-+	struct ntsync_event_args event_args;
-+	__u32 index, signaled;
-+	int fd, event, ret;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	event_args.manual = 1;
-+	event_args.signaled = 0;
-+	event_args.event = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, event_args.event);
-+	event = event_args.event;
-+	check_event_state(event, 0, 1);
-+
-+	signaled = 0xdeadbeef;
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event, 1, 1);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+	check_event_state(event, 1, 1);
-+
-+	ret = wait_any(fd, 1, &event, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_event_state(event, 1, 1);
-+
-+	signaled = 0xdeadbeef;
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+	check_event_state(event, 0, 1);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event, 0, 1);
-+
-+	ret = wait_any(fd, 1, &event, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+	check_event_state(event, 0, 1);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event, 0, 1);
-+
-+	close(event);
-+
-+	close(fd);
-+}
-+
-+TEST(auto_event_state)
-+{
-+	struct ntsync_event_args event_args;
-+	__u32 index, signaled;
-+	int fd, event, ret;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	event_args.manual = 0;
-+	event_args.signaled = 1;
-+	event_args.event = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, event_args.event);
-+	event = event_args.event;
-+
-+	check_event_state(event, 1, 0);
-+
-+	signaled = 0xdeadbeef;
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+	check_event_state(event, 1, 0);
-+
-+	ret = wait_any(fd, 1, &event, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_event_state(event, 0, 0);
-+
-+	signaled = 0xdeadbeef;
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event, 0, 0);
-+
-+	ret = wait_any(fd, 1, &event, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+	check_event_state(event, 0, 0);
-+
-+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event, 0, 0);
-+
-+	close(event);
-+
-+	close(fd);
-+}
-+
-+TEST(test_wait_any)
-+{
-+	int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
-+	struct ntsync_mutex_args mutex_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	__u32 owner, index, count, i;
-+	struct timespec timeout;
-+
-+	clock_gettime(CLOCK_MONOTONIC, &timeout);
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 2;
-+	sem_args.max = 3;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+
-+	mutex_args.owner = 0;
-+	mutex_args.count = 0;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+
-+	objs[0] = sem_args.sem;
-+	objs[1] = mutex_args.mutex;
-+
-+	ret = wait_any(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 1, 3);
-+	check_mutex_state(mutex_args.mutex, 0, 0);
-+
-+	ret = wait_any(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 0, 0);
-+
-+	ret = wait_any(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 123);
-+
-+	count = 1;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+
-+	ret = wait_any(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 123);
-+
-+	ret = wait_any(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 2, 123);
-+
-+	ret = wait_any(fd, 2, objs, 456, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	owner = 123;
-+	ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_any(fd, 2, objs, 456, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	EXPECT_EQ(1, index);
-+
-+	ret = wait_any(fd, 2, objs, 456, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, index);
-+
-+	/* test waiting on the same object twice */
-+	count = 2;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+
-+	objs[0] = objs[1] = sem_args.sem;
-+	ret = wait_any(fd, 2, objs, 456, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 1, 3);
-+
-+	ret = wait_any(fd, 0, NULL, 456, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	for (i = 0; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
-+		objs[i] = sem_args.sem;
-+
-+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	ret = wait_any(fd, -1, objs, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	close(sem_args.sem);
-+	close(mutex_args.mutex);
-+
-+	close(fd);
-+}
-+
-+TEST(test_wait_all)
-+{
-+	struct ntsync_event_args event_args = {0};
-+	struct ntsync_mutex_args mutex_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	__u32 owner, index, count;
-+	int objs[2], fd, ret;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 2;
-+	sem_args.max = 3;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+
-+	mutex_args.owner = 0;
-+	mutex_args.count = 0;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+
-+	event_args.manual = true;
-+	event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	objs[0] = sem_args.sem;
-+	objs[1] = mutex_args.mutex;
-+
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 1, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 123);
-+
-+	ret = wait_all(fd, 2, objs, 456, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+	check_sem_state(sem_args.sem, 1, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 123);
-+
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 2, 123);
-+
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_mutex_state(mutex_args.mutex, 2, 123);
-+
-+	count = 3;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 2, 3);
-+	check_mutex_state(mutex_args.mutex, 3, 123);
-+
-+	owner = 123;
-+	ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EOWNERDEAD, errno);
-+	check_sem_state(sem_args.sem, 1, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 123);
-+
-+	objs[0] = sem_args.sem;
-+	objs[1] = event_args.event;
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+	check_sem_state(sem_args.sem, 0, 3);
-+	check_event_state(event_args.event, 1, 1);
-+
-+	/* test waiting on the same object twice */
-+	objs[0] = objs[1] = sem_args.sem;
-+	ret = wait_all(fd, 2, objs, 123, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(EINVAL, errno);
-+
-+	close(sem_args.sem);
-+	close(mutex_args.mutex);
-+	close(event_args.event);
-+
-+	close(fd);
-+}
-+
-+struct wake_args {
-+	int fd;
-+	int obj;
-+};
-+
-+struct wait_args {
-+	int fd;
-+	unsigned long request;
-+	struct ntsync_wait_args *args;
-+	int ret;
-+	int err;
-+};
-+
-+static void *wait_thread(void *arg)
-+{
-+	struct wait_args *args = arg;
-+
-+	args->ret = ioctl(args->fd, args->request, args->args);
-+	args->err = errno;
-+	return NULL;
-+}
-+
-+static __u64 get_abs_timeout(unsigned int ms)
-+{
-+	struct timespec timeout;
-+	clock_gettime(CLOCK_MONOTONIC, &timeout);
-+	return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
-+}
-+
-+static int wait_for_thread(pthread_t thread, unsigned int ms)
-+{
-+	struct timespec timeout;
-+
-+	clock_gettime(CLOCK_REALTIME, &timeout);
-+	timeout.tv_nsec += ms * 1000000;
-+	timeout.tv_sec += (timeout.tv_nsec / 1000000000);
-+	timeout.tv_nsec %= 1000000000;
-+	return pthread_timedjoin_np(thread, NULL, &timeout);
-+}
-+
-+TEST(wake_any)
-+{
-+	struct ntsync_event_args event_args = {0};
-+	struct ntsync_mutex_args mutex_args = {0};
-+	struct ntsync_wait_args wait_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	struct wait_args thread_args;
-+	__u32 count, index, signaled;
-+	int objs[2], fd, ret;
-+	pthread_t thread;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 0;
-+	sem_args.max = 3;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+
-+	mutex_args.owner = 123;
-+	mutex_args.count = 1;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+
-+	objs[0] = sem_args.sem;
-+	objs[1] = mutex_args.mutex;
-+
-+	/* test waking the semaphore */
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	wait_args.objs = (uintptr_t)objs;
-+	wait_args.count = 2;
-+	wait_args.owner = 456;
-+	wait_args.index = 0xdeadbeef;
-+	thread_args.fd = fd;
-+	thread_args.args = &wait_args;
-+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	count = 1;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+	check_sem_state(sem_args.sem, 0, 3);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(0, wait_args.index);
-+
-+	/* test waking the mutex */
-+
-+	/* first grab it again for owner 123 */
-+	ret = wait_any(fd, 1, &mutex_args.mutex, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	wait_args.owner = 456;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = unlock_mutex(mutex_args.mutex, 123, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, count);
-+
-+	ret = pthread_tryjoin_np(thread, NULL);
-+	EXPECT_EQ(EBUSY, ret);
-+
-+	ret = unlock_mutex(mutex_args.mutex, 123, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, mutex_args.count);
-+	check_mutex_state(mutex_args.mutex, 1, 456);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(1, wait_args.index);
-+
-+	/* test waking events */
-+
-+	event_args.manual = false;
-+	event_args.signaled = false;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	objs[1] = event_args.event;
-+	wait_args.timeout = get_abs_timeout(1000);
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event_args.event, 0, 0);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(1, wait_args.index);
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event_args.event, 0, 0);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(1, wait_args.index);
-+
-+	close(event_args.event);
-+
-+	event_args.manual = true;
-+	event_args.signaled = false;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	objs[1] = event_args.event;
-+	wait_args.timeout = get_abs_timeout(1000);
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event_args.event, 1, 1);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(1, wait_args.index);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+	check_event_state(event_args.event, 0, 1);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(1, wait_args.index);
-+
-+	close(event_args.event);
-+
-+	/* delete an object while it's being waited on */
-+
-+	wait_args.timeout = get_abs_timeout(200);
-+	wait_args.owner = 123;
-+	objs[1] = mutex_args.mutex;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	close(sem_args.sem);
-+	close(mutex_args.mutex);
-+
-+	ret = wait_for_thread(thread, 200);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(-1, thread_args.ret);
-+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
-+
-+	close(fd);
-+}
-+
-+TEST(wake_all)
-+{
-+	struct ntsync_event_args manual_event_args = {0};
-+	struct ntsync_event_args auto_event_args = {0};
-+	struct ntsync_mutex_args mutex_args = {0};
-+	struct ntsync_wait_args wait_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	struct wait_args thread_args;
-+	__u32 count, index, signaled;
-+	int objs[4], fd, ret;
-+	pthread_t thread;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 0;
-+	sem_args.max = 3;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+
-+	mutex_args.owner = 123;
-+	mutex_args.count = 1;
-+	mutex_args.mutex = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
-+
-+	manual_event_args.manual = true;
-+	manual_event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	auto_event_args.manual = false;
-+	auto_event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	objs[0] = sem_args.sem;
-+	objs[1] = mutex_args.mutex;
-+	objs[2] = manual_event_args.event;
-+	objs[3] = auto_event_args.event;
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	wait_args.objs = (uintptr_t)objs;
-+	wait_args.count = 4;
-+	wait_args.owner = 456;
-+	thread_args.fd = fd;
-+	thread_args.args = &wait_args;
-+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	count = 1;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+
-+	ret = pthread_tryjoin_np(thread, NULL);
-+	EXPECT_EQ(EBUSY, ret);
-+
-+	check_sem_state(sem_args.sem, 1, 3);
-+
-+	ret = wait_any(fd, 1, &sem_args.sem, 123, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = unlock_mutex(mutex_args.mutex, 123, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, count);
-+
-+	ret = pthread_tryjoin_np(thread, NULL);
-+	EXPECT_EQ(EBUSY, ret);
-+
-+	check_mutex_state(mutex_args.mutex, 0, 0);
-+
-+	ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+
-+	count = 2;
-+	ret = post_sem(sem_args.sem, &count);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, count);
-+	check_sem_state(sem_args.sem, 2, 3);
-+
-+	ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, signaled);
-+
-+	ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+
-+	ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, signaled);
-+
-+	check_sem_state(sem_args.sem, 1, 3);
-+	check_mutex_state(mutex_args.mutex, 1, 456);
-+	check_event_state(manual_event_args.event, 1, 1);
-+	check_event_state(auto_event_args.event, 0, 0);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+
-+	/* delete an object while it's being waited on */
-+
-+	wait_args.timeout = get_abs_timeout(200);
-+	wait_args.owner = 123;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	close(sem_args.sem);
-+	close(mutex_args.mutex);
-+	close(manual_event_args.event);
-+	close(auto_event_args.event);
-+
-+	ret = wait_for_thread(thread, 200);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(-1, thread_args.ret);
-+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
-+
-+	close(fd);
-+}
-+
-+TEST(alert_any)
-+{
-+	struct ntsync_event_args event_args = {0};
-+	struct ntsync_wait_args wait_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	__u32 index, count, signaled;
-+	struct wait_args thread_args;
-+	int objs[2], fd, ret;
-+	pthread_t thread;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 0;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+	objs[0] = sem_args.sem;
-+
-+	sem_args.count = 1;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+	objs[1] = sem_args.sem;
-+
-+	event_args.manual = true;
-+	event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(1, index);
-+
-+	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, index);
-+
-+	/* test wakeup via alert */
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	wait_args.objs = (uintptr_t)objs;
-+	wait_args.count = 2;
-+	wait_args.owner = 123;
-+	wait_args.index = 0xdeadbeef;
-+	wait_args.alert = event_args.event;
-+	thread_args.fd = fd;
-+	thread_args.args = &wait_args;
-+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(2, wait_args.index);
-+
-+	close(event_args.event);
-+
-+	/* test with an auto-reset event */
-+
-+	event_args.manual = false;
-+	event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	count = 1;
-+	ret = post_sem(objs[0], &count);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, index);
-+
-+	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	close(event_args.event);
-+
-+	close(objs[0]);
-+	close(objs[1]);
-+
-+	close(fd);
-+}
-+
-+TEST(alert_all)
-+{
-+	struct ntsync_event_args event_args = {0};
-+	struct ntsync_wait_args wait_args = {0};
-+	struct ntsync_sem_args sem_args = {0};
-+	struct wait_args thread_args;
-+	__u32 index, count, signaled;
-+	int objs[2], fd, ret;
-+	pthread_t thread;
-+
-+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, fd);
-+
-+	sem_args.count = 2;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+	objs[0] = sem_args.sem;
-+
-+	sem_args.count = 1;
-+	sem_args.max = 2;
-+	sem_args.sem = 0xdeadbeef;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_NE(0xdeadbeef, sem_args.sem);
-+	objs[1] = sem_args.sem;
-+
-+	event_args.manual = true;
-+	event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, index);
-+
-+	/* test wakeup via alert */
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	wait_args.timeout = get_abs_timeout(1000);
-+	wait_args.objs = (uintptr_t)objs;
-+	wait_args.count = 2;
-+	wait_args.owner = 123;
-+	wait_args.index = 0xdeadbeef;
-+	wait_args.alert = event_args.event;
-+	thread_args.fd = fd;
-+	thread_args.args = &wait_args;
-+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
-+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(ETIMEDOUT, ret);
-+
-+	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_for_thread(thread, 100);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, thread_args.ret);
-+	EXPECT_EQ(2, wait_args.index);
-+
-+	close(event_args.event);
-+
-+	/* test with an auto-reset event */
-+
-+	event_args.manual = false;
-+	event_args.signaled = true;
-+	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+
-+	count = 2;
-+	ret = post_sem(objs[1], &count);
-+	EXPECT_EQ(0, ret);
-+
-+	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(0, index);
-+
-+	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(0, ret);
-+	EXPECT_EQ(2, index);
-+
-+	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
-+	EXPECT_EQ(-1, ret);
-+	EXPECT_EQ(ETIMEDOUT, errno);
-+
-+	close(event_args.event);
-+
-+	close(objs[0]);
-+	close(objs[1]);
-+
-+	close(fd);
-+}
-+
-+#define STRESS_LOOPS 10000
-+#define STRESS_THREADS 4
-+
-+static unsigned int stress_counter;
-+static int stress_device, stress_start_event, stress_mutex;
-+
-+static void *stress_thread(void *arg)
-+{
-+	struct ntsync_wait_args wait_args = {0};
-+	__u32 index, count, i;
-+	int ret;
-+
-+	wait_args.timeout = UINT64_MAX;
-+	wait_args.count = 1;
-+	wait_args.objs = (uintptr_t)&stress_start_event;
-+	wait_args.owner = gettid();
-+	wait_args.index = 0xdeadbeef;
-+
-+	ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
-+
-+	wait_args.objs = (uintptr_t)&stress_mutex;
-+
-+	for (i = 0; i < STRESS_LOOPS; ++i) {
-+		ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
-+
-+		++stress_counter;
-+
-+		unlock_mutex(stress_mutex, wait_args.owner, &count);
-+	}
-+
-+	return NULL;
-+}
-+
-+TEST(stress_wait)
-+{
-+	struct ntsync_event_args event_args;
-+	struct ntsync_mutex_args mutex_args;
-+	pthread_t threads[STRESS_THREADS];
-+	__u32 signaled, i;
-+	int ret;
-+
-+	stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
-+	ASSERT_LE(0, stress_device);
-+
-+	mutex_args.owner = 0;
-+	mutex_args.count = 0;
-+	ret = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
-+	EXPECT_EQ(0, ret);
-+	stress_mutex = mutex_args.mutex;
-+
-+	event_args.manual = 1;
-+	event_args.signaled = 0;
-+	ret = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
-+	EXPECT_EQ(0, ret);
-+	stress_start_event = event_args.event;
-+
-+	for (i = 0; i < STRESS_THREADS; ++i)
-+		pthread_create(&threads[i], NULL, stress_thread, NULL);
-+
-+	ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
-+	EXPECT_EQ(0, ret);
-+
-+	for (i = 0; i < STRESS_THREADS; ++i) {
-+		ret = pthread_join(threads[i], NULL);
-+		EXPECT_EQ(0, ret);
-+	}
-+
-+	EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
-+
-+	close(stress_start_event);
-+	close(stress_mutex);
-+	close(stress_device);
-+}
-+
-+TEST_HARNESS_MAIN
--- 
-2.46.0.rc1
-
-From 628f7cedd3a6dbd0c2b09bc027cc62e889ccdd57 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:26:57 +0200
-Subject: [PATCH 09/11] perf-per-core
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- Documentation/arch/x86/topology.rst   |   4 +
- arch/x86/events/rapl.c                | 418 ++++++++++++++++++--------
- arch/x86/include/asm/processor.h      |   1 +
- arch/x86/include/asm/topology.h       |   1 +
- arch/x86/kernel/cpu/debugfs.c         |   1 +
- arch/x86/kernel/cpu/topology_common.c |   1 +
- 6 files changed, 305 insertions(+), 121 deletions(-)
-
-diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst
-index 7352ab89a55a..c12837e61bda 100644
---- a/Documentation/arch/x86/topology.rst
-+++ b/Documentation/arch/x86/topology.rst
-@@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
-     The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
-     "core_id."
- 
-+  - topology_logical_core_id();
-+
-+    The logical core ID to which a thread belongs.
-+
- 
- 
- System topology examples
-diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
-index 0c5e7a7c43ac..cd808b699ccc 100644
---- a/arch/x86/events/rapl.c
-+++ b/arch/x86/events/rapl.c
-@@ -39,6 +39,10 @@
-  *	  event: rapl_energy_psys
-  *    perf code: 0x5
-  *
-+ *  per_core counter: consumption of a single physical core
-+ *	  event: rapl_energy_per_core (power_per_core PMU)
-+ *    perf code: 0x1
-+ *
-  * We manage those counters as free running (read-only). They may be
-  * use simultaneously by other tools, such as turbostat.
-  *
-@@ -70,18 +74,25 @@ MODULE_LICENSE("GPL");
- /*
-  * RAPL energy status counters
-  */
--enum perf_rapl_events {
-+enum perf_rapl_pkg_events {
- 	PERF_RAPL_PP0 = 0,		/* all cores */
- 	PERF_RAPL_PKG,			/* entire package */
- 	PERF_RAPL_RAM,			/* DRAM */
- 	PERF_RAPL_PP1,			/* gpu */
- 	PERF_RAPL_PSYS,			/* psys */
- 
--	PERF_RAPL_MAX,
--	NR_RAPL_DOMAINS = PERF_RAPL_MAX,
-+	PERF_RAPL_PKG_EVENTS_MAX,
-+	NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
-+};
-+
-+enum perf_rapl_core_events {
-+	PERF_RAPL_PER_CORE = 0,		/* per-core */
-+
-+	PERF_RAPL_CORE_EVENTS_MAX,
-+	NR_RAPL_CORE_DOMAINS = PERF_RAPL_CORE_EVENTS_MAX,
- };
- 
--static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
-+static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
- 	"pp0-core",
- 	"package",
- 	"dram",
-@@ -89,6 +100,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
- 	"psys",
- };
- 
-+static const char *const rapl_core_domain_names[NR_RAPL_CORE_DOMAINS] __initconst = {
-+	"per-core",
-+};
-+
- /*
-  * event code: LSB 8 bits, passed in attr->config
-  * any other bit is reserved
-@@ -103,6 +118,10 @@ static struct perf_pmu_events_attr event_attr_##v = {				\
- 	.event_str	= str,							\
- };
- 
-+#define rapl_pmu_is_pkg_scope()				\
-+	(boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||	\
-+	 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
-+
- struct rapl_pmu {
- 	raw_spinlock_t		lock;
- 	int			n_active;
-@@ -115,8 +134,9 @@ struct rapl_pmu {
- 
- struct rapl_pmus {
- 	struct pmu		pmu;
-+	cpumask_t		cpumask;
- 	unsigned int		nr_rapl_pmu;
--	struct rapl_pmu		*pmus[] __counted_by(nr_rapl_pmu);
-+	struct rapl_pmu		*rapl_pmu[] __counted_by(nr_rapl_pmu);
- };
- 
- enum rapl_unit_quirk {
-@@ -126,29 +146,45 @@ enum rapl_unit_quirk {
- };
- 
- struct rapl_model {
--	struct perf_msr *rapl_msrs;
--	unsigned long	events;
-+	struct perf_msr *rapl_pkg_msrs;
-+	struct perf_msr *rapl_core_msrs;
-+	unsigned long	pkg_events;
-+	unsigned long	core_events;
- 	unsigned int	msr_power_unit;
- 	enum rapl_unit_quirk	unit_quirk;
- };
- 
-  /* 1/2^hw_unit Joule */
--static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
--static struct rapl_pmus *rapl_pmus;
--static cpumask_t rapl_cpu_mask;
--static unsigned int rapl_cntr_mask;
-+static int rapl_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
-+static struct rapl_pmus *rapl_pmus_pkg;
-+static struct rapl_pmus *rapl_pmus_core;
-+static unsigned int rapl_pkg_cntr_mask;
-+static unsigned int rapl_core_cntr_mask;
- static u64 rapl_timer_ms;
--static struct perf_msr *rapl_msrs;
-+static struct rapl_model *rapl_model;
-+
-+static inline unsigned int get_rapl_pmu_idx(int cpu)
-+{
-+	return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
-+					 topology_logical_die_id(cpu);
-+}
-+
-+static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu)
-+{
-+	return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) :
-+					 topology_die_cpumask(cpu);
-+}
- 
- static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
- {
--	unsigned int rapl_pmu_idx = topology_logical_die_id(cpu);
-+	unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
- 
- 	/*
- 	 * The unsigned check also catches the '-1' return value for non
- 	 * existent mappings in the topology map.
- 	 */
--	return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
-+	return rapl_pmu_idx < rapl_pmus_pkg->nr_rapl_pmu ?
-+	       rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx] : NULL;
- }
- 
- static inline u64 rapl_read_counter(struct perf_event *event)
-@@ -160,7 +196,7 @@ static inline u64 rapl_read_counter(struct perf_event *event)
- 
- static inline u64 rapl_scale(u64 v, int cfg)
- {
--	if (cfg > NR_RAPL_DOMAINS) {
-+	if (cfg > NR_RAPL_PKG_DOMAINS) {
- 		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
- 		return v;
- 	}
-@@ -212,34 +248,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
- 
- static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
- {
--	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
-+	struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
- 	struct perf_event *event;
- 	unsigned long flags;
- 
--	if (!pmu->n_active)
-+	if (!rapl_pmu->n_active)
- 		return HRTIMER_NORESTART;
- 
--	raw_spin_lock_irqsave(&pmu->lock, flags);
-+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
- 
--	list_for_each_entry(event, &pmu->active_list, active_entry)
-+	list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
- 		rapl_event_update(event);
- 
--	raw_spin_unlock_irqrestore(&pmu->lock, flags);
-+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
- 
--	hrtimer_forward_now(hrtimer, pmu->timer_interval);
-+	hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
- 
- 	return HRTIMER_RESTART;
- }
- 
--static void rapl_hrtimer_init(struct rapl_pmu *pmu)
-+static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
- {
--	struct hrtimer *hr = &pmu->hrtimer;
-+	struct hrtimer *hr = &rapl_pmu->hrtimer;
- 
- 	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- 	hr->function = rapl_hrtimer_handle;
- }
- 
--static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
-+static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
- 				   struct perf_event *event)
- {
- 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-@@ -247,39 +283,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
- 
- 	event->hw.state = 0;
- 
--	list_add_tail(&event->active_entry, &pmu->active_list);
-+	list_add_tail(&event->active_entry, &rapl_pmu->active_list);
- 
- 	local64_set(&event->hw.prev_count, rapl_read_counter(event));
- 
--	pmu->n_active++;
--	if (pmu->n_active == 1)
--		rapl_start_hrtimer(pmu);
-+	rapl_pmu->n_active++;
-+	if (rapl_pmu->n_active == 1)
-+		rapl_start_hrtimer(rapl_pmu);
- }
- 
- static void rapl_pmu_event_start(struct perf_event *event, int mode)
- {
--	struct rapl_pmu *pmu = event->pmu_private;
-+	struct rapl_pmu *rapl_pmu = event->pmu_private;
- 	unsigned long flags;
- 
--	raw_spin_lock_irqsave(&pmu->lock, flags);
--	__rapl_pmu_event_start(pmu, event);
--	raw_spin_unlock_irqrestore(&pmu->lock, flags);
-+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
-+	__rapl_pmu_event_start(rapl_pmu, event);
-+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
- }
- 
- static void rapl_pmu_event_stop(struct perf_event *event, int mode)
- {
--	struct rapl_pmu *pmu = event->pmu_private;
-+	struct rapl_pmu *rapl_pmu = event->pmu_private;
- 	struct hw_perf_event *hwc = &event->hw;
- 	unsigned long flags;
- 
--	raw_spin_lock_irqsave(&pmu->lock, flags);
-+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
- 
- 	/* mark event as deactivated and stopped */
- 	if (!(hwc->state & PERF_HES_STOPPED)) {
--		WARN_ON_ONCE(pmu->n_active <= 0);
--		pmu->n_active--;
--		if (pmu->n_active == 0)
--			hrtimer_cancel(&pmu->hrtimer);
-+		WARN_ON_ONCE(rapl_pmu->n_active <= 0);
-+		rapl_pmu->n_active--;
-+		if (rapl_pmu->n_active == 0)
-+			hrtimer_cancel(&rapl_pmu->hrtimer);
- 
- 		list_del(&event->active_entry);
- 
-@@ -297,23 +333,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
- 		hwc->state |= PERF_HES_UPTODATE;
- 	}
- 
--	raw_spin_unlock_irqrestore(&pmu->lock, flags);
-+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
- }
- 
- static int rapl_pmu_event_add(struct perf_event *event, int mode)
- {
--	struct rapl_pmu *pmu = event->pmu_private;
-+	struct rapl_pmu *rapl_pmu = event->pmu_private;
- 	struct hw_perf_event *hwc = &event->hw;
- 	unsigned long flags;
- 
--	raw_spin_lock_irqsave(&pmu->lock, flags);
-+	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
- 
- 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- 
- 	if (mode & PERF_EF_START)
--		__rapl_pmu_event_start(pmu, event);
-+		__rapl_pmu_event_start(rapl_pmu, event);
- 
--	raw_spin_unlock_irqrestore(&pmu->lock, flags);
-+	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
- 
- 	return 0;
- }
-@@ -327,10 +363,14 @@ static int rapl_pmu_event_init(struct perf_event *event)
- {
- 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- 	int bit, ret = 0;
--	struct rapl_pmu *pmu;
-+	struct rapl_pmu *rapl_pmu;
-+	struct rapl_pmus *curr_rapl_pmus;
- 
- 	/* only look at RAPL events */
--	if (event->attr.type != rapl_pmus->pmu.type)
-+	if (event->attr.type == rapl_pmus_pkg->pmu.type ||
-+		(rapl_pmus_core && event->attr.type == rapl_pmus_core->pmu.type))
-+		curr_rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
-+	else
- 		return -ENOENT;
- 
- 	/* check only supported bits are set */
-@@ -340,16 +380,18 @@ static int rapl_pmu_event_init(struct perf_event *event)
- 	if (event->cpu < 0)
- 		return -EINVAL;
- 
--	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
-+	if (curr_rapl_pmus == rapl_pmus_pkg)
-+		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
- 
--	if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
-+	if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
- 		return -EINVAL;
- 
--	cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
-+	cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
- 	bit = cfg - 1;
- 
- 	/* check event supported */
--	if (!(rapl_cntr_mask & (1 << bit)))
-+	if (!(rapl_pkg_cntr_mask & (1 << bit)) &&
-+	    !(rapl_core_cntr_mask & (1 << bit)))
- 		return -EINVAL;
- 
- 	/* unsupported modes and filters */
-@@ -357,12 +399,18 @@ static int rapl_pmu_event_init(struct perf_event *event)
- 		return -EINVAL;
- 
- 	/* must be done before validate_group */
--	pmu = cpu_to_rapl_pmu(event->cpu);
--	if (!pmu)
-+	if (curr_rapl_pmus == rapl_pmus_core) {
-+		rapl_pmu = curr_rapl_pmus->rapl_pmu[topology_logical_core_id(event->cpu)];
-+		event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr;
-+	} else {
-+		rapl_pmu = curr_rapl_pmus->rapl_pmu[get_rapl_pmu_idx(event->cpu)];
-+		event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
-+	}
-+
-+	if (!rapl_pmu)
- 		return -EINVAL;
--	event->cpu = pmu->cpu;
--	event->pmu_private = pmu;
--	event->hw.event_base = rapl_msrs[bit].msr;
-+	event->cpu = rapl_pmu->cpu;
-+	event->pmu_private = rapl_pmu;
- 	event->hw.config = cfg;
- 	event->hw.idx = bit;
- 
-@@ -377,7 +425,7 @@ static void rapl_pmu_event_read(struct perf_event *event)
- static ssize_t rapl_get_attr_cpumask(struct device *dev,
- 				struct device_attribute *attr, char *buf)
- {
--	return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-+	return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_pkg->cpumask);
- }
- 
- static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-@@ -391,17 +439,38 @@ static struct attribute_group rapl_pmu_attr_group = {
- 	.attrs = rapl_pmu_attrs,
- };
- 
-+static ssize_t rapl_get_attr_per_core_cpumask(struct device *dev,
-+					     struct device_attribute *attr, char *buf)
-+{
-+	return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_core->cpumask);
-+}
-+
-+static struct device_attribute dev_attr_per_core_cpumask = __ATTR(cpumask, 0444,
-+								 rapl_get_attr_per_core_cpumask,
-+								 NULL);
-+
-+static struct attribute *rapl_pmu_per_core_attrs[] = {
-+	&dev_attr_per_core_cpumask.attr,
-+	NULL,
-+};
-+
-+static struct attribute_group rapl_pmu_per_core_attr_group = {
-+	.attrs = rapl_pmu_per_core_attrs,
-+};
-+
- RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
- RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
- RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
- RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
- RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
-+RAPL_EVENT_ATTR_STR(energy-per-core,   rapl_per_core, "event=0x01");
- 
- RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
- RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
- RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
- RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
- RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
-+RAPL_EVENT_ATTR_STR(energy-per-core.unit,   rapl_per_core_unit, "Joules");
- 
- /*
-  * we compute in 0.23 nJ increments regardless of MSR
-@@ -411,6 +480,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890
- RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
- RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
- RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
-+RAPL_EVENT_ATTR_STR(energy-per-core.scale,   rapl_per_core_scale, "2.3283064365386962890625e-10");
- 
- /*
-  * There are no default events, but we need to create
-@@ -444,6 +514,13 @@ static const struct attribute_group *rapl_attr_groups[] = {
- 	NULL,
- };
- 
-+static const struct attribute_group *rapl_per_core_attr_groups[] = {
-+	&rapl_pmu_per_core_attr_group,
-+	&rapl_pmu_format_group,
-+	&rapl_pmu_events_group,
-+	NULL,
-+};
-+
- static struct attribute *rapl_events_cores[] = {
- 	EVENT_PTR(rapl_cores),
- 	EVENT_PTR(rapl_cores_unit),
-@@ -504,6 +581,18 @@ static struct attribute_group rapl_events_psys_group = {
- 	.attrs = rapl_events_psys,
- };
- 
-+static struct attribute *rapl_events_per_core[] = {
-+	EVENT_PTR(rapl_per_core),
-+	EVENT_PTR(rapl_per_core_unit),
-+	EVENT_PTR(rapl_per_core_scale),
-+	NULL,
-+};
-+
-+static struct attribute_group rapl_events_per_core_group = {
-+	.name  = "events",
-+	.attrs = rapl_events_per_core,
-+};
-+
- static bool test_msr(int idx, void *data)
- {
- 	return test_bit(idx, (unsigned long *) data);
-@@ -529,11 +618,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
- };
- 
- /*
-- * Force to PERF_RAPL_MAX size due to:
-- * - perf_msr_probe(PERF_RAPL_MAX)
-+ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
-+ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
-  * - want to use same event codes across both architectures
-  */
--static struct perf_msr amd_rapl_msrs[] = {
-+static struct perf_msr amd_rapl_pkg_msrs[] = {
- 	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, NULL, false, 0 },
- 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr, false, RAPL_MSR_MASK },
- 	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   NULL, false, 0 },
-@@ -541,72 +630,104 @@ static struct perf_msr amd_rapl_msrs[] = {
- 	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
- };
- 
--static int rapl_cpu_offline(unsigned int cpu)
-+static struct perf_msr amd_rapl_core_msrs[] = {
-+	[PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group,
-+				 test_msr, false, RAPL_MSR_MASK },
-+};
-+
-+static int __rapl_cpu_offline(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx,
-+			      const struct cpumask *event_cpumask, unsigned int cpu)
- {
--	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-+	struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
- 	int target;
- 
- 	/* Check if exiting cpu is used for collecting rapl events */
--	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
-+	if (!cpumask_test_and_clear_cpu(cpu, &rapl_pmus->cpumask))
- 		return 0;
- 
--	pmu->cpu = -1;
-+	rapl_pmu->cpu = -1;
- 	/* Find a new cpu to collect rapl events */
--	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
-+	target = cpumask_any_but(event_cpumask, cpu);
- 
- 	/* Migrate rapl events to the new target */
- 	if (target < nr_cpu_ids) {
--		cpumask_set_cpu(target, &rapl_cpu_mask);
--		pmu->cpu = target;
--		perf_pmu_migrate_context(pmu->pmu, cpu, target);
-+		cpumask_set_cpu(target, &rapl_pmus->cpumask);
-+		rapl_pmu->cpu = target;
-+		perf_pmu_migrate_context(rapl_pmu->pmu, cpu, target);
- 	}
- 	return 0;
- }
- 
--static int rapl_cpu_online(unsigned int cpu)
-+static int rapl_cpu_offline(unsigned int cpu)
-+{
-+	int ret =  __rapl_cpu_offline(rapl_pmus_pkg, get_rapl_pmu_idx(cpu),
-+				  get_rapl_pmu_cpumask(cpu), cpu);
-+
-+	if (ret == 0 && rapl_model->core_events)
-+		ret = __rapl_cpu_offline(rapl_pmus_core, topology_logical_core_id(cpu),
-+				   topology_sibling_cpumask(cpu), cpu);
-+
-+	return ret;
-+}
-+
-+static int __rapl_cpu_online(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx,
-+			     const struct cpumask *event_cpumask, unsigned int cpu)
- {
--	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-+	struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
- 	int target;
- 
--	if (!pmu) {
--		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
--		if (!pmu)
-+	if (!rapl_pmu) {
-+		rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu));
-+		if (!rapl_pmu)
- 			return -ENOMEM;
- 
--		raw_spin_lock_init(&pmu->lock);
--		INIT_LIST_HEAD(&pmu->active_list);
--		pmu->pmu = &rapl_pmus->pmu;
--		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
--		rapl_hrtimer_init(pmu);
-+		raw_spin_lock_init(&rapl_pmu->lock);
-+		INIT_LIST_HEAD(&rapl_pmu->active_list);
-+		rapl_pmu->pmu = &rapl_pmus->pmu;
-+		rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
-+		rapl_hrtimer_init(rapl_pmu);
- 
--		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
-+		rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu;
- 	}
- 
- 	/*
- 	 * Check if there is an online cpu in the package which collects rapl
- 	 * events already.
- 	 */
--	target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
-+	target = cpumask_any_and(&rapl_pmus->cpumask, event_cpumask);
- 	if (target < nr_cpu_ids)
- 		return 0;
- 
--	cpumask_set_cpu(cpu, &rapl_cpu_mask);
--	pmu->cpu = cpu;
-+	cpumask_set_cpu(cpu, &rapl_pmus->cpumask);
-+	rapl_pmu->cpu = cpu;
- 	return 0;
- }
- 
--static int rapl_check_hw_unit(struct rapl_model *rm)
-+static int rapl_cpu_online(unsigned int cpu)
-+{
-+	int ret =  __rapl_cpu_online(rapl_pmus_pkg, get_rapl_pmu_idx(cpu),
-+				 get_rapl_pmu_cpumask(cpu), cpu);
-+
-+	if (ret == 0 && rapl_model->core_events)
-+		ret = __rapl_cpu_online(rapl_pmus_core, topology_logical_core_id(cpu),
-+				   topology_sibling_cpumask(cpu), cpu);
-+
-+	return ret;
-+}
-+
-+
-+static int rapl_check_hw_unit(void)
- {
- 	u64 msr_rapl_power_unit_bits;
- 	int i;
- 
- 	/* protect rdmsrl() to handle virtualization */
--	if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
-+	if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
- 		return -1;
--	for (i = 0; i < NR_RAPL_DOMAINS; i++)
-+	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
- 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
- 
--	switch (rm->unit_quirk) {
-+	switch (rapl_model->unit_quirk) {
- 	/*
- 	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
- 	 * different than the unit from power unit MSR. See
-@@ -645,22 +766,29 @@ static void __init rapl_advertise(void)
- 	int i;
- 
- 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
--		hweight32(rapl_cntr_mask), rapl_timer_ms);
-+		hweight32(rapl_pkg_cntr_mask) + hweight32(rapl_core_cntr_mask), rapl_timer_ms);
- 
--	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
--		if (rapl_cntr_mask & (1 << i)) {
-+	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
-+		if (rapl_pkg_cntr_mask & (1 << i)) {
- 			pr_info("hw unit of domain %s 2^-%d Joules\n",
--				rapl_domain_names[i], rapl_hw_unit[i]);
-+				rapl_pkg_domain_names[i], rapl_hw_unit[i]);
-+		}
-+	}
-+
-+	for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) {
-+		if (rapl_core_cntr_mask & (1 << i)) {
-+			pr_info("hw unit of domain %s 2^-%d Joules\n",
-+				rapl_core_domain_names[i], rapl_hw_unit[i]);
- 		}
- 	}
- }
- 
--static void cleanup_rapl_pmus(void)
-+static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
- {
- 	int i;
- 
- 	for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
--		kfree(rapl_pmus->pmus[i]);
-+		kfree(rapl_pmus->rapl_pmu[i]);
- 	kfree(rapl_pmus);
- }
- 
-@@ -673,11 +801,17 @@ static const struct attribute_group *rapl_attr_update[] = {
- 	NULL,
- };
- 
--static int __init init_rapl_pmus(void)
-+static const struct attribute_group *rapl_per_core_attr_update[] = {
-+	&rapl_events_per_core_group,
-+};
-+
-+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int nr_rapl_pmu,
-+				 const struct attribute_group **rapl_attr_groups,
-+				 const struct attribute_group **rapl_attr_update)
- {
--	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
-+	struct rapl_pmus *rapl_pmus;
- 
--	rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
-+	rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
- 	if (!rapl_pmus)
- 		return -ENOMEM;
- 
-@@ -693,75 +827,80 @@ static int __init init_rapl_pmus(void)
- 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
- 	rapl_pmus->pmu.module		= THIS_MODULE;
- 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
-+
-+	*rapl_pmus_ptr = rapl_pmus;
-+
- 	return 0;
- }
- 
- static struct rapl_model model_snb = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_PP1),
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_snbep = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM),
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_hsw = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM) |
- 			  BIT(PERF_RAPL_PP1),
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_hsx = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM),
- 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_knl = {
--	.events		= BIT(PERF_RAPL_PKG) |
-+	.pkg_events	= BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM),
- 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_skl = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM) |
- 			  BIT(PERF_RAPL_PP1) |
- 			  BIT(PERF_RAPL_PSYS),
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_msrs,
- };
- 
- static struct rapl_model model_spr = {
--	.events		= BIT(PERF_RAPL_PP0) |
-+	.pkg_events	= BIT(PERF_RAPL_PP0) |
- 			  BIT(PERF_RAPL_PKG) |
- 			  BIT(PERF_RAPL_RAM) |
- 			  BIT(PERF_RAPL_PSYS),
- 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_SPR,
- 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
--	.rapl_msrs      = intel_rapl_spr_msrs,
-+	.rapl_pkg_msrs	= intel_rapl_spr_msrs,
- };
- 
- static struct rapl_model model_amd_hygon = {
--	.events		= BIT(PERF_RAPL_PKG),
-+	.pkg_events	= BIT(PERF_RAPL_PKG),
-+	.core_events	= BIT(PERF_RAPL_PER_CORE),
- 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
--	.rapl_msrs      = amd_rapl_msrs,
-+	.rapl_pkg_msrs	= amd_rapl_pkg_msrs,
-+	.rapl_core_msrs	= amd_rapl_core_msrs,
- };
- 
- static const struct x86_cpu_id rapl_model_match[] __initconst = {
-@@ -817,28 +956,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
- static int __init rapl_pmu_init(void)
- {
- 	const struct x86_cpu_id *id;
--	struct rapl_model *rm;
- 	int ret;
-+	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
-+	int nr_cores = topology_max_packages() * topology_num_cores_per_package();
-+
-+	if (rapl_pmu_is_pkg_scope())
-+		nr_rapl_pmu = topology_max_packages();
- 
- 	id = x86_match_cpu(rapl_model_match);
- 	if (!id)
- 		return -ENODEV;
- 
--	rm = (struct rapl_model *) id->driver_data;
--
--	rapl_msrs = rm->rapl_msrs;
-+	rapl_model = (struct rapl_model *) id->driver_data;
- 
--	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
--					false, (void *) &rm->events);
-+	rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX,
-+					false, (void *) &rapl_model->pkg_events);
- 
--	ret = rapl_check_hw_unit(rm);
-+	ret = rapl_check_hw_unit();
- 	if (ret)
- 		return ret;
- 
--	ret = init_rapl_pmus();
-+	ret = init_rapl_pmus(&rapl_pmus_pkg, nr_rapl_pmu, rapl_attr_groups, rapl_attr_update);
- 	if (ret)
- 		return ret;
- 
-+	if (rapl_model->core_events) {
-+		rapl_core_cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
-+						     PERF_RAPL_CORE_EVENTS_MAX, false,
-+						     (void *) &rapl_model->core_events);
-+
-+		ret = init_rapl_pmus(&rapl_pmus_core, nr_cores,
-+				     rapl_per_core_attr_groups, rapl_per_core_attr_update);
-+		if (ret) {
-+			/*
-+			 * If initialization of per_core PMU fails, reset per_core
-+			 * flag, and continue with power PMU initialization.
-+			 */
-+			pr_warn("Per-core PMU initialization failed (%d)\n", ret);
-+			rapl_model->core_events = 0UL;
-+		}
-+	}
-+
- 	/*
- 	 * Install callbacks. Core will call them for each online cpu.
- 	 */
-@@ -848,10 +1006,24 @@ static int __init rapl_pmu_init(void)
- 	if (ret)
- 		goto out;
- 
--	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
-+	ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
- 	if (ret)
- 		goto out1;
- 
-+	if (rapl_model->core_events) {
-+		ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1);
-+		if (ret) {
-+			/*
-+			 * If registration of per_core PMU fails, cleanup per_core PMU
-+			 * variables, reset the per_core flag and keep the
-+			 * power PMU untouched.
-+			 */
-+			pr_warn("Per-core PMU registration failed (%d)\n", ret);
-+			cleanup_rapl_pmus(rapl_pmus_core);
-+			rapl_model->core_events = 0UL;
-+		}
-+	}
-+
- 	rapl_advertise();
- 	return 0;
- 
-@@ -859,7 +1031,7 @@ static int __init rapl_pmu_init(void)
- 	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
- out:
- 	pr_warn("Initialization failed (%d), disabled\n", ret);
--	cleanup_rapl_pmus();
-+	cleanup_rapl_pmus(rapl_pmus_pkg);
- 	return ret;
- }
- module_init(rapl_pmu_init);
-@@ -867,7 +1039,11 @@ module_init(rapl_pmu_init);
- static void __exit intel_rapl_exit(void)
- {
- 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
--	perf_pmu_unregister(&rapl_pmus->pmu);
--	cleanup_rapl_pmus();
-+	perf_pmu_unregister(&rapl_pmus_pkg->pmu);
-+	cleanup_rapl_pmus(rapl_pmus_pkg);
-+	if (rapl_model->core_events) {
-+		perf_pmu_unregister(&rapl_pmus_core->pmu);
-+		cleanup_rapl_pmus(rapl_pmus_core);
-+	}
- }
- module_exit(intel_rapl_exit);
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index cb4f6c513c48..1ffe4260bef6 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -98,6 +98,7 @@ struct cpuinfo_topology {
- 	// Logical ID mappings
- 	u32			logical_pkg_id;
- 	u32			logical_die_id;
-+	u32			logical_core_id;
- 
- 	// AMD Node ID and Nodes per Package info
- 	u32			amd_node_id;
-diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
-index abe3a8f22cbd..2a6dbf965d92 100644
---- a/arch/x86/include/asm/topology.h
-+++ b/arch/x86/include/asm/topology.h
-@@ -137,6 +137,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
- #define topology_logical_package_id(cpu)	(cpu_data(cpu).topo.logical_pkg_id)
- #define topology_physical_package_id(cpu)	(cpu_data(cpu).topo.pkg_id)
- #define topology_logical_die_id(cpu)		(cpu_data(cpu).topo.logical_die_id)
-+#define topology_logical_core_id(cpu)		(cpu_data(cpu).topo.logical_core_id)
- #define topology_die_id(cpu)			(cpu_data(cpu).topo.die_id)
- #define topology_core_id(cpu)			(cpu_data(cpu).topo.core_id)
- #define topology_ppin(cpu)			(cpu_data(cpu).ppin)
-diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
-index 3baf3e435834..b1eb6d7828db 100644
---- a/arch/x86/kernel/cpu/debugfs.c
-+++ b/arch/x86/kernel/cpu/debugfs.c
-@@ -24,6 +24,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
- 	seq_printf(m, "core_id:             %u\n", c->topo.core_id);
- 	seq_printf(m, "logical_pkg_id:      %u\n", c->topo.logical_pkg_id);
- 	seq_printf(m, "logical_die_id:      %u\n", c->topo.logical_die_id);
-+	seq_printf(m, "logical_core_id:     %u\n", c->topo.logical_core_id);
- 	seq_printf(m, "llc_id:              %u\n", c->topo.llc_id);
- 	seq_printf(m, "l2c_id:              %u\n", c->topo.l2c_id);
- 	seq_printf(m, "amd_node_id:         %u\n", c->topo.amd_node_id);
-diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
-index 9a6069e7133c..23722aa21e2f 100644
---- a/arch/x86/kernel/cpu/topology_common.c
-+++ b/arch/x86/kernel/cpu/topology_common.c
-@@ -151,6 +151,7 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
- 	if (!early) {
- 		c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
- 		c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
-+		c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN);
- 	}
- 
- 	/* Package relative core ID */
--- 
-2.46.0.rc1
-
-From c9314e79325672ebbcf4955ec4b995fd52f07e4c Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:27:08 +0200
-Subject: [PATCH 10/11] t2
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- .../ABI/testing/sysfs-driver-hid-appletb-kbd  |   13 +
- .../admin-guide/kernel-parameters.txt         |    2 +
- Documentation/core-api/printk-formats.rst     |   32 +
- Documentation/leds/well-known-leds.txt        |    8 +
- MAINTAINERS                                   |   18 +
- drivers/acpi/video_detect.c                   |   16 +
- .../firmware/efi/libstub/efi-stub-helper.c    |    3 +
- drivers/firmware/efi/libstub/efistub.h        |   14 +
- drivers/firmware/efi/libstub/x86-stub.c       |   27 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |    3 +
- drivers/gpu/drm/drm_format_helper.c           |   54 +
- drivers/gpu/drm/i915/display/intel_ddi.c      |    4 +
- drivers/gpu/drm/i915/display/intel_fbdev.c    |    6 +-
- drivers/gpu/drm/i915/display/intel_quirks.c   |   15 +
- drivers/gpu/drm/i915/display/intel_quirks.h   |    1 +
- .../gpu/drm/tests/drm_format_helper_test.c    |   81 ++
- drivers/gpu/drm/tiny/Kconfig                  |   12 +
- drivers/gpu/drm/tiny/Makefile                 |    1 +
- drivers/gpu/drm/tiny/appletbdrm.c             |  624 +++++++++
- drivers/gpu/vga/vga_switcheroo.c              |    7 +-
- drivers/hid/Kconfig                           |   35 +
- drivers/hid/Makefile                          |    3 +
- drivers/hid/hid-apple-magic-backlight.c       |  120 ++
- drivers/hid/hid-appletb-bl.c                  |  193 +++
- drivers/hid/hid-appletb-kbd.c                 |  289 +++++
- drivers/hid/hid-core.c                        |   25 +
- drivers/hid/hid-google-hammer.c               |   27 +-
- drivers/hid/hid-multitouch.c                  |   60 +-
- drivers/hid/hid-quirks.c                      |    8 +-
- drivers/hwmon/applesmc.c                      | 1138 ++++++++++++-----
- drivers/input/mouse/bcm5974.c                 |  138 ++
- drivers/pci/vgaarb.c                          |    1 +
- drivers/platform/x86/apple-gmux.c             |   18 +
- drivers/staging/Kconfig                       |    2 +
- drivers/staging/Makefile                      |    1 +
- drivers/staging/apple-bce/Kconfig             |   18 +
- drivers/staging/apple-bce/Makefile            |   28 +
- drivers/staging/apple-bce/apple_bce.c         |  444 +++++++
- drivers/staging/apple-bce/apple_bce.h         |   38 +
- drivers/staging/apple-bce/audio/audio.c       |  711 ++++++++++
- drivers/staging/apple-bce/audio/audio.h       |  125 ++
- drivers/staging/apple-bce/audio/description.h |   42 +
- drivers/staging/apple-bce/audio/pcm.c         |  308 +++++
- drivers/staging/apple-bce/audio/pcm.h         |   16 +
- drivers/staging/apple-bce/audio/protocol.c    |  347 +++++
- drivers/staging/apple-bce/audio/protocol.h    |  147 +++
- .../staging/apple-bce/audio/protocol_bce.c    |  226 ++++
- .../staging/apple-bce/audio/protocol_bce.h    |   72 ++
- drivers/staging/apple-bce/mailbox.c           |  151 +++
- drivers/staging/apple-bce/mailbox.h           |   53 +
- drivers/staging/apple-bce/queue.c             |  390 ++++++
- drivers/staging/apple-bce/queue.h             |  177 +++
- drivers/staging/apple-bce/queue_dma.c         |  220 ++++
- drivers/staging/apple-bce/queue_dma.h         |   50 +
- drivers/staging/apple-bce/vhci/command.h      |  204 +++
- drivers/staging/apple-bce/vhci/queue.c        |  268 ++++
- drivers/staging/apple-bce/vhci/queue.h        |   76 ++
- drivers/staging/apple-bce/vhci/transfer.c     |  661 ++++++++++
- drivers/staging/apple-bce/vhci/transfer.h     |   73 ++
- drivers/staging/apple-bce/vhci/vhci.c         |  759 +++++++++++
- drivers/staging/apple-bce/vhci/vhci.h         |   52 +
- drivers/usb/core/driver.c                     |   14 +
- drivers/usb/storage/uas.c                     |    5 +-
- include/drm/drm_format_helper.h               |    3 +
- include/linux/efi.h                           |    1 +
- include/linux/hid.h                           |    2 +
- include/linux/usb.h                           |    3 +
- lib/test_printf.c                             |   20 +-
- lib/vsprintf.c                                |   36 +-
- scripts/checkpatch.pl                         |    2 +-
- 70 files changed, 8377 insertions(+), 364 deletions(-)
- create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
- create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c
- create mode 100644 drivers/hid/hid-apple-magic-backlight.c
- create mode 100644 drivers/hid/hid-appletb-bl.c
- create mode 100644 drivers/hid/hid-appletb-kbd.c
- create mode 100644 drivers/staging/apple-bce/Kconfig
- create mode 100644 drivers/staging/apple-bce/Makefile
- create mode 100644 drivers/staging/apple-bce/apple_bce.c
- create mode 100644 drivers/staging/apple-bce/apple_bce.h
- create mode 100644 drivers/staging/apple-bce/audio/audio.c
- create mode 100644 drivers/staging/apple-bce/audio/audio.h
- create mode 100644 drivers/staging/apple-bce/audio/description.h
- create mode 100644 drivers/staging/apple-bce/audio/pcm.c
- create mode 100644 drivers/staging/apple-bce/audio/pcm.h
- create mode 100644 drivers/staging/apple-bce/audio/protocol.c
- create mode 100644 drivers/staging/apple-bce/audio/protocol.h
- create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c
- create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h
- create mode 100644 drivers/staging/apple-bce/mailbox.c
- create mode 100644 drivers/staging/apple-bce/mailbox.h
- create mode 100644 drivers/staging/apple-bce/queue.c
- create mode 100644 drivers/staging/apple-bce/queue.h
- create mode 100644 drivers/staging/apple-bce/queue_dma.c
- create mode 100644 drivers/staging/apple-bce/queue_dma.h
- create mode 100644 drivers/staging/apple-bce/vhci/command.h
- create mode 100644 drivers/staging/apple-bce/vhci/queue.c
- create mode 100644 drivers/staging/apple-bce/vhci/queue.h
- create mode 100644 drivers/staging/apple-bce/vhci/transfer.c
- create mode 100644 drivers/staging/apple-bce/vhci/transfer.h
- create mode 100644 drivers/staging/apple-bce/vhci/vhci.c
- create mode 100644 drivers/staging/apple-bce/vhci/vhci.h
-
-diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
-new file mode 100644
-index 000000000000..2a19584d091e
---- /dev/null
-+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
-@@ -0,0 +1,13 @@
-+What:		/sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
-+Date:		September, 2023
-+KernelVersion:	6.5
-+Contact:	linux-input@vger.kernel.org
-+Description:
-+		The set of keys displayed on the Touch Bar.
-+		Valid values are:
-+		== =================
-+		0  Escape key only
-+		1  Function keys
-+		2  Media/brightness keys
-+		3  None
-+		== =================
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 07ac4c81a7dd..c083c476013f 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -415,6 +415,8 @@
- 			      useful so that a dump capture kernel won't be
- 			      shot down by NMI
- 
-+	apple_set_os	[KNL] Report that macOS is being booted to the firmware
-+
- 	autoconf=	[IPV6]
- 			See Documentation/networking/ipv6.rst.
- 
-diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
-index 4451ef501936..c726a846f752 100644
---- a/Documentation/core-api/printk-formats.rst
-+++ b/Documentation/core-api/printk-formats.rst
-@@ -632,6 +632,38 @@ Examples::
- 	%p4cc	Y10  little-endian (0x20303159)
- 	%p4cc	NV12 big-endian (0xb231564e)
- 
-+Generic FourCC code
-+-------------------
-+
-+::
-+	%p4c[hnbl]	gP00 (0x67503030)
-+
-+Print a generic FourCC code, as both ASCII characters and its numerical
-+value as hexadecimal.
-+
-+The additional ``h``, ``r``, ``b``, and ``l`` specifiers are used to specify
-+host, reversed, big or little endian order data respectively. Host endian
-+order means the data is interpreted as a 32-bit integer and the most
-+significant byte is printed first; that is, the character code as printed
-+matches the byte order stored in memory on big-endian systems, and is reversed
-+on little-endian systems.
-+
-+Passed by reference.
-+
-+Examples for a little-endian machine, given &(u32)0x67503030::
-+
-+	%p4ch	gP00 (0x67503030)
-+	%p4cl	gP00 (0x67503030)
-+	%p4cb	00Pg (0x30305067)
-+	%p4cr	00Pg (0x30305067)
-+
-+Examples for a big-endian machine, given &(u32)0x67503030::
-+
-+	%p4ch	gP00 (0x67503030)
-+	%p4cl	00Pg (0x30305067)
-+	%p4cb	gP00 (0x67503030)
-+	%p4cr	00Pg (0x30305067)
-+
- Rust
- ----
- 
-diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
-index 67b44704801f..34e472b363d7 100644
---- a/Documentation/leds/well-known-leds.txt
-+++ b/Documentation/leds/well-known-leds.txt
-@@ -44,6 +44,14 @@ Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900)
- 
- Frontlight/backlight of main keyboard.
- 
-+Good: ":*:kbd_backlight"
-+Good: "input*:*:kbd_backlight"
-+Legacy: "*:*:kbd_backlight"
-+
-+Many drivers have the vendor or product name as the first field of the led name,
-+this makes names inconsistent and is redundant as that information is already in
-+sysfs.
-+
- Legacy: "button-backlight" (Motorola Droid 4)
- 
- Some phones have touch buttons below screen; it is different from main
-diff --git a/MAINTAINERS b/MAINTAINERS
-index b25b2a731512..94540127a563 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -6728,6 +6728,12 @@ S:	Supported
- T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
- F:	drivers/gpu/drm/sun4i/sun8i*
- 
-+DRM DRIVER FOR APPLE TOUCH BARS
-+M:	Kerem Karabay <kekrby@gmail.com>
-+L:	dri-devel@lists.freedesktop.org
-+S:	Maintained
-+F:	drivers/gpu/drm/tiny/appletbdrm.c
-+
- DRM DRIVER FOR ARM PL111 CLCD
- S:	Orphan
- T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
-@@ -9733,6 +9739,18 @@ F:	include/linux/pm.h
- F:	include/linux/suspend.h
- F:	kernel/power/
- 
-+HID APPLE TOUCH BAR DRIVERS
-+M:	Kerem Karabay <kekrby@gmail.com>
-+L:	linux-input@vger.kernel.org
-+S:	Maintained
-+F:	drivers/hid/hid-appletb-*
-+
-+HID APPLE MAGIC BACKLIGHT DRIVER
-+M:	Orlando Chamberlain <orlandoch.dev@gmail.com>
-+L:	linux-input@vger.kernel.org
-+S:	Maintained
-+F:	drivers/hid/apple-magic-backlight.c
-+
- HID CORE LAYER
- M:	Jiri Kosina <jikos@kernel.org>
- M:	Benjamin Tissoires <bentiss@kernel.org>
-diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
-index 2cc3821b2b16..c11cbe5b6eaa 100644
---- a/drivers/acpi/video_detect.c
-+++ b/drivers/acpi/video_detect.c
-@@ -539,6 +539,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
- 		DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"),
- 		},
- 	},
-+	{
-+	 .callback = video_detect_force_native,
-+	 /* Apple MacBook Air 9,1 */
-+	 .matches = {
-+		DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-+		DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir9,1"),
-+		},
-+	},
- 	{
- 	 /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
- 	 .callback = video_detect_force_native,
-@@ -548,6 +556,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
- 		DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"),
- 		},
- 	},
-+	{
-+	 .callback = video_detect_force_native,
-+	 /* Apple MacBook Pro 16,2 */
-+	 .matches = {
-+		DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-+		DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,2"),
-+		},
-+	},
- 	{
- 	 .callback = video_detect_force_native,
- 	 /* Dell Inspiron N4010 */
-diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
-index de659f6a815f..f00a419a29be 100644
---- a/drivers/firmware/efi/libstub/efi-stub-helper.c
-+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
-@@ -20,6 +20,7 @@
- bool efi_nochunk;
- bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE);
- bool efi_novamap;
-+bool efi_apple_set_os;
- 
- static bool efi_noinitrd;
- static bool efi_nosoftreserve;
-@@ -76,6 +77,8 @@ efi_status_t efi_parse_options(char const *cmdline)
- 			efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
- 		} else if (!strcmp(param, "noinitrd")) {
- 			efi_noinitrd = true;
-+		} else if (!strcmp(param, "apple_set_os")) {
-+			efi_apple_set_os = true;
- 		} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
- 			efi_no5lvl = true;
- 		} else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
-diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
-index 27abb4ce0291..89750d043ed8 100644
---- a/drivers/firmware/efi/libstub/efistub.h
-+++ b/drivers/firmware/efi/libstub/efistub.h
-@@ -39,6 +39,7 @@ extern bool efi_nokaslr;
- extern int efi_loglevel;
- extern int efi_mem_encrypt;
- extern bool efi_novamap;
-+extern bool efi_apple_set_os;
- extern const efi_system_table_t *efi_system_table;
- 
- typedef union efi_dxe_services_table efi_dxe_services_table_t;
-@@ -825,6 +826,19 @@ union apple_properties_protocol {
- 	} mixed_mode;
- };
- 
-+typedef struct apple_set_os_protocol apple_set_os_protocol_t;
-+
-+struct apple_set_os_protocol {
-+	u64 version;
-+	efi_status_t (__efiapi *set_os_version) (const char *);
-+	efi_status_t (__efiapi *set_os_vendor) (const char *);
-+	struct {
-+		u32 version;
-+		u32 set_os_version;
-+		u32 set_os_vendor;
-+	} mixed_mode;
-+};
-+
- typedef u32 efi_tcg2_event_log_format;
- 
- #define INITRD_EVENT_TAG_ID 0x8F3B22ECU
-diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
-index 1983fd3bf392..49a89a844df7 100644
---- a/drivers/firmware/efi/libstub/x86-stub.c
-+++ b/drivers/firmware/efi/libstub/x86-stub.c
-@@ -225,6 +225,30 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
- 	}
- }
- 
-+static void apple_set_os(void)
-+{
-+	efi_guid_t guid = APPLE_SET_OS_PROTOCOL_GUID;
-+	apple_set_os_protocol_t *set_os;
-+	efi_status_t status;
-+
-+	status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&set_os);
-+	if (status != EFI_SUCCESS)
-+		return;
-+
-+	if (efi_table_attr(set_os, version) >= 2) {
-+		status = efi_fn_call(set_os, set_os_vendor, "Apple Inc.");
-+		if (status != EFI_SUCCESS)
-+			efi_err("Failed to set OS vendor via apple_set_os\n");
-+	}
-+
-+	/* The version being set doesn't seem to matter */
-+	if (efi_table_attr(set_os, version) > 0) {
-+		status = efi_fn_call(set_os, set_os_version, "Mac OS X 10.9");
-+		if (status != EFI_SUCCESS)
-+			efi_err("Failed to set OS version via apple_set_os\n");
-+	}
-+}
-+
- efi_status_t efi_adjust_memory_range_protection(unsigned long start,
- 						unsigned long size)
- {
-@@ -338,6 +362,9 @@ static void setup_quirks(struct boot_params *boot_params)
- 	if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) &&
- 	    !memcmp(efistub_fw_vendor(), apple, sizeof(apple)))
- 		retrieve_apple_device_properties(boot_params);
-+
-+	if (efi_apple_set_os)
-+		apple_set_os();
- }
- 
- /*
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-index bb0b636d0d75..a05ed98da785 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -2211,6 +2211,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
- 	int ret, retry = 0, i;
- 	bool supports_atomic = false;
- 
-+	if (vga_switcheroo_client_probe_defer(pdev))
-+		return -EPROBE_DEFER;
-+
- 	/* skip devices which are owned by radeon */
- 	for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
- 		if (amdgpu_unsupported_pciidlist[i] == pdev->device)
-diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
-index b1be458ed4dd..28c0e76a1e88 100644
---- a/drivers/gpu/drm/drm_format_helper.c
-+++ b/drivers/gpu/drm/drm_format_helper.c
-@@ -702,6 +702,57 @@ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pi
- }
- EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888);
- 
-+static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels)
-+{
-+	u8 *dbuf8 = dbuf;
-+	const __le32 *sbuf32 = sbuf;
-+	unsigned int x;
-+	u32 pix;
-+
-+	for (x = 0; x < pixels; x++) {
-+		pix = le32_to_cpu(sbuf32[x]);
-+		/* write red-green-blue to output in little endianness */
-+		*dbuf8++ = (pix & 0x00FF0000) >> 16;
-+		*dbuf8++ = (pix & 0x0000FF00) >> 8;
-+		*dbuf8++ = (pix & 0x000000FF) >> 0;
-+	}
-+}
-+
-+/**
-+ * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer
-+ * @dst: Array of BGR888 destination buffers
-+ * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines
-+ *             within @dst; can be NULL if scanlines are stored next to each other.
-+ * @src: Array of XRGB8888 source buffers
-+ * @fb: DRM framebuffer
-+ * @clip: Clip rectangle area to copy
-+ * @state: Transform and conversion state
-+ *
-+ * This function copies parts of a framebuffer to display memory and converts the
-+ * color format during the process. Destination and framebuffer formats must match. The
-+ * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at
-+ * least as many entries as there are planes in @fb's format. Each entry stores the
-+ * value for the format's respective color plane at the same index.
-+ *
-+ * This function does not apply clipping on @dst (i.e. the destination is at the
-+ * top-left corner).
-+ *
-+ * Drivers can use this function for BGR888 devices that don't natively
-+ * support XRGB8888.
-+ */
-+void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
-+			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-+			       const struct drm_rect *clip, struct drm_format_conv_state *state)
-+{
-+	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
-+		3,
-+	};
-+
-+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
-+		    drm_fb_xrgb8888_to_bgr888_line);
-+}
-+EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888);
-+
- static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels)
- {
- 	__le32 *dbuf32 = dbuf;
-@@ -1035,6 +1086,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
- 		} else if (dst_format == DRM_FORMAT_RGB888) {
- 			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state);
- 			return 0;
-+		} else if (dst_format == DRM_FORMAT_BGR888) {
-+			drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip, state);
-+			return 0;
- 		} else if (dst_format == DRM_FORMAT_ARGB8888) {
- 			drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state);
- 			return 0;
-diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
-index 6bff169fa8d4..8d80ae00b838 100644
---- a/drivers/gpu/drm/i915/display/intel_ddi.c
-+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
-@@ -4648,6 +4648,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port)
- 
- static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
- {
-+	struct intel_display *display = to_intel_display(dig_port);
- 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
- 
- 	if (dig_port->base.port != PORT_A)
-@@ -4656,6 +4657,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
- 	if (dig_port->saved_port_bits & DDI_A_4_LANES)
- 		return false;
- 
-+	if (intel_has_quirk(display, QUIRK_DDI_A_FORCE_4_LANES))
-+		return true;
-+
- 	/* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only
- 	 *                     supported configuration
- 	 */
-diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
-index bda702c2cab8..1647e141ae78 100644
---- a/drivers/gpu/drm/i915/display/intel_fbdev.c
-+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
-@@ -196,10 +196,10 @@ static int intelfb_create(struct drm_fb_helper *helper,
- 		return ret;
- 
- 	if (intel_fb &&
--	    (sizes->fb_width > intel_fb->base.width ||
--	     sizes->fb_height > intel_fb->base.height)) {
-+	    (sizes->fb_width != intel_fb->base.width ||
-+	     sizes->fb_height != intel_fb->base.height)) {
- 		drm_dbg_kms(&dev_priv->drm,
--			    "BIOS fb too small (%dx%d), we require (%dx%d),"
-+			    "BIOS fb not valid (%dx%d), we require (%dx%d),"
- 			    " releasing it\n",
- 			    intel_fb->base.width, intel_fb->base.height,
- 			    sizes->fb_width, sizes->fb_height);
-diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
-index 14d5fefc9c5b..727639b8f6a6 100644
---- a/drivers/gpu/drm/i915/display/intel_quirks.c
-+++ b/drivers/gpu/drm/i915/display/intel_quirks.c
-@@ -59,6 +59,18 @@ static void quirk_increase_ddi_disabled_time(struct intel_display *display)
- 	drm_info(display->drm, "Applying Increase DDI Disabled quirk\n");
- }
- 
-+/*
-+ * In some cases, the firmware might not set the lane count to 4 (for example,
-+ * when booting in some dual GPU Macs with the dGPU as the default GPU), this
-+ * quirk is used to force it as otherwise it might not be possible to compute a
-+ * valid link configuration.
-+ */
-+static void quirk_ddi_a_force_4_lanes(struct intel_display *display)
-+{
-+	intel_set_quirk(display, QUIRK_DDI_A_FORCE_4_LANES);
-+	drm_info(display->drm, "Applying DDI A Forced 4 Lanes quirk\n");
-+}
-+
- static void quirk_no_pps_backlight_power_hook(struct intel_display *display)
- {
- 	intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK);
-@@ -201,6 +213,9 @@ static struct intel_quirk intel_quirks[] = {
- 	{ 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
- 	/* HP Notebook - 14-r206nv */
- 	{ 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
-+
-+	/* Apple MacBookPro15,1 */
-+	{ 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes },
- };
- 
- void intel_init_quirks(struct intel_display *display)
-diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h
-index 151c8f4ae576..46e7feba88f4 100644
---- a/drivers/gpu/drm/i915/display/intel_quirks.h
-+++ b/drivers/gpu/drm/i915/display/intel_quirks.h
-@@ -17,6 +17,7 @@ enum intel_quirk_id {
- 	QUIRK_INVERT_BRIGHTNESS,
- 	QUIRK_LVDS_SSC_DISABLE,
- 	QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK,
-+	QUIRK_DDI_A_FORCE_4_LANES,
- };
- 
- void intel_init_quirks(struct intel_display *display);
-diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c
-index 08992636ec05..35cd3405d045 100644
---- a/drivers/gpu/drm/tests/drm_format_helper_test.c
-+++ b/drivers/gpu/drm/tests/drm_format_helper_test.c
-@@ -60,6 +60,11 @@ struct convert_to_rgb888_result {
- 	const u8 expected[TEST_BUF_SIZE];
- };
- 
-+struct convert_to_bgr888_result {
-+	unsigned int dst_pitch;
-+	const u8 expected[TEST_BUF_SIZE];
-+};
-+
- struct convert_to_argb8888_result {
- 	unsigned int dst_pitch;
- 	const u32 expected[TEST_BUF_SIZE];
-@@ -107,6 +112,7 @@ struct convert_xrgb8888_case {
- 	struct convert_to_argb1555_result argb1555_result;
- 	struct convert_to_rgba5551_result rgba5551_result;
- 	struct convert_to_rgb888_result rgb888_result;
-+	struct convert_to_bgr888_result bgr888_result;
- 	struct convert_to_argb8888_result argb8888_result;
- 	struct convert_to_xrgb2101010_result xrgb2101010_result;
- 	struct convert_to_argb2101010_result argb2101010_result;
-@@ -151,6 +157,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
- 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
- 			.expected = { 0x00, 0x00, 0xFF },
- 		},
-+		.bgr888_result = {
-+			.dst_pitch = TEST_USE_DEFAULT_PITCH,
-+			.expected = { 0xFF, 0x00, 0x00 },
-+		},
- 		.argb8888_result = {
- 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
- 			.expected = { 0xFFFF0000 },
-@@ -217,6 +227,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
- 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
- 			.expected = { 0x00, 0x00, 0xFF },
- 		},
-+		.bgr888_result = {
-+			.dst_pitch = TEST_USE_DEFAULT_PITCH,
-+			.expected = { 0xFF, 0x00, 0x00 },
-+		},
- 		.argb8888_result = {
- 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
- 			.expected = { 0xFFFF0000 },
-@@ -330,6 +344,15 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
- 				0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
- 			},
- 		},
-+		.bgr888_result = {
-+			.dst_pitch = TEST_USE_DEFAULT_PITCH,
-+			.expected = {
-+				0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
-+				0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00,
-+				0x00, 0x00, 0xFF, 0xFF, 0x00, 0xFF,
-+				0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF,
-+			},
-+		},
- 		.argb8888_result = {
- 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
- 			.expected = {
-@@ -468,6 +491,17 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
- 				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 			},
- 		},
-+		.bgr888_result = {
-+			.dst_pitch = 15,
-+			.expected = {
-+				0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, 0xA8, 0xF3, 0x03,
-+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+				0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05,
-+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+				0xA8, 0x03, 0x03, 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C,
-+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			},
-+		},
- 		.argb8888_result = {
- 			.dst_pitch = 20,
- 			.expected = {
-@@ -914,6 +948,52 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
- 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
- }
- 
-+static void drm_test_fb_xrgb8888_to_bgr888(struct kunit *test)
-+{
-+	const struct convert_xrgb8888_case *params = test->param_value;
-+	const struct convert_to_bgr888_result *result = &params->bgr888_result;
-+	size_t dst_size;
-+	u8 *buf = NULL;
-+	__le32 *xrgb8888 = NULL;
-+	struct iosys_map dst, src;
-+
-+	struct drm_framebuffer fb = {
-+		.format = drm_format_info(DRM_FORMAT_XRGB8888),
-+		.pitches = { params->pitch, 0, 0 },
-+	};
-+
-+	dst_size = conversion_buf_size(DRM_FORMAT_BGR888, result->dst_pitch,
-+				       &params->clip, 0);
-+	KUNIT_ASSERT_GT(test, dst_size, 0);
-+
-+	buf = kunit_kzalloc(test, dst_size, GFP_KERNEL);
-+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
-+	iosys_map_set_vaddr(&dst, buf);
-+
-+	xrgb8888 = cpubuf_to_le32(test, params->xrgb8888, TEST_BUF_SIZE);
-+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xrgb8888);
-+	iosys_map_set_vaddr(&src, xrgb8888);
-+
-+	/*
-+	 * BGR888 expected results are already in little-endian
-+	 * order, so there's no need to convert the test output.
-+	 */
-+	drm_fb_xrgb8888_to_bgr888(&dst, &result->dst_pitch, &src, &fb, &params->clip,
-+				  &fmtcnv_state);
-+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
-+
-+	buf = dst.vaddr; /* restore original value of buf */
-+	memset(buf, 0, dst_size);
-+
-+	int blit_result = 0;
-+
-+	blit_result = drm_fb_blit(&dst, &result->dst_pitch, DRM_FORMAT_BGR888, &src, &fb, &params->clip,
-+				  &fmtcnv_state);
-+
-+	KUNIT_EXPECT_FALSE(test, blit_result);
-+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
-+}
-+
- static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test)
- {
- 	const struct convert_xrgb8888_case *params = test->param_value;
-@@ -1851,6 +1931,7 @@ static struct kunit_case drm_format_helper_test_cases[] = {
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb1555, convert_xrgb8888_gen_params),
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgba5551, convert_xrgb8888_gen_params),
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgb888, convert_xrgb8888_gen_params),
-+	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_bgr888, convert_xrgb8888_gen_params),
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb8888, convert_xrgb8888_gen_params),
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params),
- 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params),
-diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
-index f6889f649bc1..559a97bce12c 100644
---- a/drivers/gpu/drm/tiny/Kconfig
-+++ b/drivers/gpu/drm/tiny/Kconfig
-@@ -1,5 +1,17 @@
- # SPDX-License-Identifier: GPL-2.0-only
- 
-+config DRM_APPLETBDRM
-+	tristate "DRM support for Apple Touch Bars"
-+	depends on DRM && USB && MMU
-+	select DRM_KMS_HELPER
-+	select DRM_GEM_SHMEM_HELPER
-+	help
-+	  Say Y here if you want support for the display of Touch Bars on x86
-+	  MacBook Pros.
-+
-+	  To compile this driver as a module, choose M here: the
-+	  module will be called appletbdrm.
-+
- config DRM_ARCPGU
- 	tristate "ARC PGU"
- 	depends on DRM && OF
-diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile
-index 76dde89a044b..9a1b412e764a 100644
---- a/drivers/gpu/drm/tiny/Makefile
-+++ b/drivers/gpu/drm/tiny/Makefile
-@@ -1,5 +1,6 @@
- # SPDX-License-Identifier: GPL-2.0-only
- 
-+obj-$(CONFIG_DRM_APPLETBDRM)		+= appletbdrm.o
- obj-$(CONFIG_DRM_ARCPGU)		+= arcpgu.o
- obj-$(CONFIG_DRM_BOCHS)			+= bochs.o
- obj-$(CONFIG_DRM_CIRRUS_QEMU)		+= cirrus.o
-diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c
-new file mode 100644
-index 000000000000..b9440ce0064e
---- /dev/null
-+++ b/drivers/gpu/drm/tiny/appletbdrm.c
-@@ -0,0 +1,624 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Apple Touch Bar DRM Driver
-+ *
-+ * Copyright (c) 2023 Kerem Karabay <kekrby@gmail.com>
-+ */
-+
-+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-+
-+#include <asm/unaligned.h>
-+
-+#include <linux/usb.h>
-+#include <linux/module.h>
-+
-+#include <drm/drm_drv.h>
-+#include <drm/drm_fourcc.h>
-+#include <drm/drm_probe_helper.h>
-+#include <drm/drm_atomic_helper.h>
-+#include <drm/drm_damage_helper.h>
-+#include <drm/drm_format_helper.h>
-+#include <drm/drm_gem_shmem_helper.h>
-+#include <drm/drm_gem_atomic_helper.h>
-+#include <drm/drm_simple_kms_helper.h>
-+#include <drm/drm_gem_framebuffer_helper.h>
-+
-+#define _APPLETBDRM_FOURCC(s)		(((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3])
-+#define APPLETBDRM_FOURCC(s)		_APPLETBDRM_FOURCC(#s)
-+
-+#define APPLETBDRM_PIXEL_FORMAT		APPLETBDRM_FOURCC(RGBA) /* The actual format is BGR888 */
-+#define APPLETBDRM_BITS_PER_PIXEL	24
-+
-+#define APPLETBDRM_MSG_CLEAR_DISPLAY	APPLETBDRM_FOURCC(CLRD)
-+#define APPLETBDRM_MSG_GET_INFORMATION	APPLETBDRM_FOURCC(GINF)
-+#define APPLETBDRM_MSG_UPDATE_COMPLETE	APPLETBDRM_FOURCC(UDCL)
-+#define APPLETBDRM_MSG_SIGNAL_READINESS	APPLETBDRM_FOURCC(REDY)
-+
-+#define APPLETBDRM_BULK_MSG_TIMEOUT	1000
-+
-+#define drm_to_adev(_drm)		container_of(_drm, struct appletbdrm_device, drm)
-+#define adev_to_udev(adev)		interface_to_usbdev(to_usb_interface(adev->dev))
-+
-+struct appletbdrm_device {
-+	struct device *dev;
-+
-+	u8 in_ep;
-+	u8 out_ep;
-+
-+	u32 width;
-+	u32 height;
-+
-+	struct drm_device drm;
-+	struct drm_display_mode mode;
-+	struct drm_connector connector;
-+	struct drm_simple_display_pipe pipe;
-+
-+	bool readiness_signal_received;
-+};
-+
-+struct appletbdrm_request_header {
-+	__le16 unk_00;
-+	__le16 unk_02;
-+	__le32 unk_04;
-+	__le32 unk_08;
-+	__le32 size;
-+} __packed;
-+
-+struct appletbdrm_response_header {
-+	u8 unk_00[16];
-+	u32 msg;
-+} __packed;
-+
-+struct appletbdrm_simple_request {
-+	struct appletbdrm_request_header header;
-+	u32 msg;
-+	u8 unk_14[8];
-+	__le32 size;
-+} __packed;
-+
-+struct appletbdrm_information {
-+	struct appletbdrm_response_header header;
-+	u8 unk_14[12];
-+	__le32 width;
-+	__le32 height;
-+	u8 bits_per_pixel;
-+	__le32 bytes_per_row;
-+	__le32 orientation;
-+	__le32 bitmap_info;
-+	u32 pixel_format;
-+	__le32 width_inches;	/* floating point */
-+	__le32 height_inches;	/* floating point */
-+} __packed;
-+
-+struct appletbdrm_frame {
-+	__le16 begin_x;
-+	__le16 begin_y;
-+	__le16 width;
-+	__le16 height;
-+	__le32 buf_size;
-+	u8 buf[];
-+} __packed;
-+
-+struct appletbdrm_fb_request_footer {
-+	u8 unk_00[12];
-+	__le32 unk_0c;
-+	u8 unk_10[12];
-+	__le32 unk_1c;
-+	__le64 timestamp;
-+	u8 unk_28[12];
-+	__le32 unk_34;
-+	u8 unk_38[20];
-+	__le32 unk_4c;
-+} __packed;
-+
-+struct appletbdrm_fb_request {
-+	struct appletbdrm_request_header header;
-+	__le16 unk_10;
-+	u8 msg_id;
-+	u8 unk_13[29];
-+	/*
-+	 * Contents of `data`:
-+	 * - struct appletbdrm_frame frames[];
-+	 * - struct appletbdrm_fb_request_footer footer;
-+	 * - padding to make the total size a multiple of 16
-+	 */
-+	u8 data[];
-+} __packed;
-+
-+struct appletbdrm_fb_request_response {
-+	struct appletbdrm_response_header header;
-+	u8 unk_14[12];
-+	__le64 timestamp;
-+} __packed;
-+
-+static int appletbdrm_send_request(struct appletbdrm_device *adev,
-+				   struct appletbdrm_request_header *request, size_t size)
-+{
-+	struct usb_device *udev = adev_to_udev(adev);
-+	struct drm_device *drm = &adev->drm;
-+	int ret, actual_size;
-+
-+	ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, adev->out_ep),
-+			   request, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
-+	if (ret) {
-+		drm_err(drm, "Failed to send message (%pe)\n", ERR_PTR(ret));
-+		return ret;
-+	}
-+
-+	if (actual_size != size) {
-+		drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
-+			actual_size, size);
-+		return -EIO;
-+	}
-+
-+	return ret;
-+}
-+
-+static int appletbdrm_read_response(struct appletbdrm_device *adev,
-+				    struct appletbdrm_response_header *response,
-+				    size_t size, u32 expected_response)
-+{
-+	struct usb_device *udev = adev_to_udev(adev);
-+	struct drm_device *drm = &adev->drm;
-+	int ret, actual_size;
-+
-+retry:
-+	ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, adev->in_ep),
-+			   response, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
-+	if (ret) {
-+		drm_err(drm, "Failed to read response (%pe)\n", ERR_PTR(ret));
-+		return ret;
-+	}
-+
-+	/*
-+	 * The device responds to the first request sent in a particular
-+	 * timeframe after the USB device configuration is set with a readiness
-+	 * signal, in which case the response should be read again
-+	 */
-+	if (response->msg == APPLETBDRM_MSG_SIGNAL_READINESS) {
-+		if (!adev->readiness_signal_received) {
-+			adev->readiness_signal_received = true;
-+			goto retry;
-+		}
-+
-+		drm_err(drm, "Encountered unexpected readiness signal\n");
-+		return -EIO;
-+	}
-+
-+	if (actual_size != size) {
-+		drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
-+			actual_size, size);
-+		return -EIO;
-+	}
-+
-+	if (response->msg != expected_response) {
-+		drm_err(drm, "Unexpected response from device (expected %p4ch found %p4ch)\n",
-+			&expected_response, &response->msg);
-+		return -EIO;
-+	}
-+
-+	return 0;
-+}
-+
-+static int appletbdrm_send_msg(struct appletbdrm_device *adev, u32 msg)
-+{
-+	struct appletbdrm_simple_request *request;
-+	int ret;
-+
-+	request = kzalloc(sizeof(*request), GFP_KERNEL);
-+	if (!request)
-+		return -ENOMEM;
-+
-+	request->header.unk_00 = cpu_to_le16(2);
-+	request->header.unk_02 = cpu_to_le16(0x1512);
-+	request->header.size = cpu_to_le32(sizeof(*request) - sizeof(request->header));
-+	request->msg = msg;
-+	request->size = request->header.size;
-+
-+	ret = appletbdrm_send_request(adev, &request->header, sizeof(*request));
-+
-+	kfree(request);
-+
-+	return ret;
-+}
-+
-+static int appletbdrm_clear_display(struct appletbdrm_device *adev)
-+{
-+	return appletbdrm_send_msg(adev, APPLETBDRM_MSG_CLEAR_DISPLAY);
-+}
-+
-+static int appletbdrm_signal_readiness(struct appletbdrm_device *adev)
-+{
-+	return appletbdrm_send_msg(adev, APPLETBDRM_MSG_SIGNAL_READINESS);
-+}
-+
-+static int appletbdrm_get_information(struct appletbdrm_device *adev)
-+{
-+	struct appletbdrm_information *info;
-+	struct drm_device *drm = &adev->drm;
-+	u8 bits_per_pixel;
-+	u32 pixel_format;
-+	int ret;
-+
-+	info = kzalloc(sizeof(*info), GFP_KERNEL);
-+	if (!info)
-+		return -ENOMEM;
-+
-+	ret = appletbdrm_send_msg(adev, APPLETBDRM_MSG_GET_INFORMATION);
-+	if (ret)
-+		return ret;
-+
-+	ret = appletbdrm_read_response(adev, &info->header, sizeof(*info),
-+				       APPLETBDRM_MSG_GET_INFORMATION);
-+	if (ret)
-+		goto free_info;
-+
-+	bits_per_pixel = info->bits_per_pixel;
-+	pixel_format = get_unaligned(&info->pixel_format);
-+
-+	adev->width = get_unaligned_le32(&info->width);
-+	adev->height = get_unaligned_le32(&info->height);
-+
-+	if (bits_per_pixel != APPLETBDRM_BITS_PER_PIXEL) {
-+		drm_err(drm, "Encountered unexpected bits per pixel value (%d)\n", bits_per_pixel);
-+		ret = -EINVAL;
-+		goto free_info;
-+	}
-+
-+	if (pixel_format != APPLETBDRM_PIXEL_FORMAT) {
-+		drm_err(drm, "Encountered unknown pixel format (%p4ch)\n", &pixel_format);
-+		ret = -EINVAL;
-+		goto free_info;
-+	}
-+
-+free_info:
-+	kfree(info);
-+
-+	return ret;
-+}
-+
-+static u32 rect_size(struct drm_rect *rect)
-+{
-+	return drm_rect_width(rect) * drm_rect_height(rect) * (APPLETBDRM_BITS_PER_PIXEL / 8);
-+}
-+
-+static int appletbdrm_flush_damage(struct appletbdrm_device *adev,
-+				   struct drm_plane_state *old_state,
-+				   struct drm_plane_state *state)
-+{
-+	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state);
-+	struct appletbdrm_fb_request_response *response;
-+	struct appletbdrm_fb_request_footer *footer;
-+	struct drm_atomic_helper_damage_iter iter;
-+	struct drm_framebuffer *fb = state->fb;
-+	struct appletbdrm_fb_request *request;
-+	struct drm_device *drm = &adev->drm;
-+	struct appletbdrm_frame *frame;
-+	u64 timestamp = ktime_get_ns();
-+	struct drm_rect damage;
-+	size_t frames_size = 0;
-+	size_t request_size;
-+	int ret;
-+
-+	drm_atomic_helper_damage_iter_init(&iter, old_state, state);
-+	drm_atomic_for_each_plane_damage(&iter, &damage) {
-+		frames_size += struct_size(frame, buf, rect_size(&damage));
-+	}
-+
-+	if (!frames_size)
-+		return 0;
-+
-+	request_size = ALIGN(sizeof(*request) + frames_size + sizeof(*footer), 16);
-+
-+	request = kzalloc(request_size, GFP_KERNEL);
-+	if (!request)
-+		return -ENOMEM;
-+
-+	response = kzalloc(sizeof(*response), GFP_KERNEL);
-+	if (!response) {
-+		ret = -ENOMEM;
-+		goto free_request;
-+	}
-+
-+	ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
-+	if (ret) {
-+		drm_err(drm, "Failed to start CPU framebuffer access (%pe)\n", ERR_PTR(ret));
-+		goto free_response;
-+	}
-+
-+	request->header.unk_00 = cpu_to_le16(2);
-+	request->header.unk_02 = cpu_to_le16(0x12);
-+	request->header.unk_04 = cpu_to_le32(9);
-+	request->header.size = cpu_to_le32(request_size - sizeof(request->header));
-+	request->unk_10 = cpu_to_le16(1);
-+	request->msg_id = timestamp & 0xff;
-+
-+	frame = (struct appletbdrm_frame *)request->data;
-+
-+	drm_atomic_helper_damage_iter_init(&iter, old_state, state);
-+	drm_atomic_for_each_plane_damage(&iter, &damage) {
-+		struct iosys_map dst = IOSYS_MAP_INIT_VADDR(frame->buf);
-+		u32 buf_size = rect_size(&damage);
-+
-+		/*
-+		 * The coordinates need to be translated to the coordinate
-+		 * system the device expects, see the comment in
-+		 * appletbdrm_setup_mode_config
-+		 */
-+		frame->begin_x = cpu_to_le16(damage.y1);
-+		frame->begin_y = cpu_to_le16(adev->height - damage.x2);
-+		frame->width = cpu_to_le16(drm_rect_height(&damage));
-+		frame->height = cpu_to_le16(drm_rect_width(&damage));
-+		frame->buf_size = cpu_to_le32(buf_size);
-+
-+		ret = drm_fb_blit(&dst, NULL, DRM_FORMAT_BGR888,
-+				  &shadow_plane_state->data[0], fb, &damage, &shadow_plane_state->fmtcnv_state);
-+		if (ret) {
-+			drm_err(drm, "Failed to copy damage clip (%pe)\n", ERR_PTR(ret));
-+			goto end_fb_cpu_access;
-+		}
-+
-+		frame = (void *)frame + struct_size(frame, buf, buf_size);
-+	}
-+
-+	footer = (struct appletbdrm_fb_request_footer *)&request->data[frames_size];
-+
-+	footer->unk_0c = cpu_to_le32(0xfffe);
-+	footer->unk_1c = cpu_to_le32(0x80001);
-+	footer->unk_34 = cpu_to_le32(0x80002);
-+	footer->unk_4c = cpu_to_le32(0xffff);
-+	footer->timestamp = cpu_to_le64(timestamp);
-+
-+	ret = appletbdrm_send_request(adev, &request->header, request_size);
-+	if (ret)
-+		goto end_fb_cpu_access;
-+
-+	ret = appletbdrm_read_response(adev, &response->header, sizeof(*response),
-+				       APPLETBDRM_MSG_UPDATE_COMPLETE);
-+	if (ret)
-+		goto end_fb_cpu_access;
-+
-+	if (response->timestamp != footer->timestamp) {
-+		drm_err(drm, "Response timestamp (%llu) doesn't match request timestamp (%llu)\n",
-+			le64_to_cpu(response->timestamp), timestamp);
-+		goto end_fb_cpu_access;
-+	}
-+
-+end_fb_cpu_access:
-+	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
-+free_response:
-+	kfree(response);
-+free_request:
-+	kfree(request);
-+
-+	return ret;
-+}
-+
-+static int appletbdrm_connector_helper_get_modes(struct drm_connector *connector)
-+{
-+	struct appletbdrm_device *adev = drm_to_adev(connector->dev);
-+
-+	return drm_connector_helper_get_modes_fixed(connector, &adev->mode);
-+}
-+
-+static enum drm_mode_status appletbdrm_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
-+						       const struct drm_display_mode *mode)
-+{
-+	struct drm_crtc *crtc = &pipe->crtc;
-+	struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
-+
-+	return drm_crtc_helper_mode_valid_fixed(crtc, mode, &adev->mode);
-+}
-+
-+static void appletbdrm_pipe_disable(struct drm_simple_display_pipe *pipe)
-+{
-+	struct appletbdrm_device *adev = drm_to_adev(pipe->crtc.dev);
-+	int idx;
-+
-+	if (!drm_dev_enter(&adev->drm, &idx))
-+		return;
-+
-+	appletbdrm_clear_display(adev);
-+
-+	drm_dev_exit(idx);
-+}
-+
-+static void appletbdrm_pipe_update(struct drm_simple_display_pipe *pipe,
-+				   struct drm_plane_state *old_state)
-+{
-+	struct drm_crtc *crtc = &pipe->crtc;
-+	struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
-+	int idx;
-+
-+	if (!crtc->state->active || !drm_dev_enter(&adev->drm, &idx))
-+		return;
-+
-+	appletbdrm_flush_damage(adev, old_state, pipe->plane.state);
-+
-+	drm_dev_exit(idx);
-+}
-+
-+static const u32 appletbdrm_formats[] = {
-+	DRM_FORMAT_BGR888,
-+	DRM_FORMAT_XRGB8888, /* emulated */
-+};
-+
-+static const struct drm_mode_config_funcs appletbdrm_mode_config_funcs = {
-+	.fb_create = drm_gem_fb_create_with_dirty,
-+	.atomic_check = drm_atomic_helper_check,
-+	.atomic_commit = drm_atomic_helper_commit,
-+};
-+
-+static const struct drm_connector_funcs appletbdrm_connector_funcs = {
-+	.reset = drm_atomic_helper_connector_reset,
-+	.destroy = drm_connector_cleanup,
-+	.fill_modes = drm_helper_probe_single_connector_modes,
-+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-+};
-+
-+static const struct drm_connector_helper_funcs appletbdrm_connector_helper_funcs = {
-+	.get_modes = appletbdrm_connector_helper_get_modes,
-+};
-+
-+static const struct drm_simple_display_pipe_funcs appletbdrm_pipe_funcs = {
-+	DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS,
-+	.update = appletbdrm_pipe_update,
-+	.disable = appletbdrm_pipe_disable,
-+	.mode_valid = appletbdrm_pipe_mode_valid,
-+};
-+
-+DEFINE_DRM_GEM_FOPS(appletbdrm_drm_fops);
-+
-+static const struct drm_driver appletbdrm_drm_driver = {
-+	DRM_GEM_SHMEM_DRIVER_OPS,
-+	.name			= "appletbdrm",
-+	.desc			= "Apple Touch Bar DRM Driver",
-+	.date			= "20230910",
-+	.major			= 1,
-+	.minor			= 0,
-+	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
-+	.fops			= &appletbdrm_drm_fops,
-+};
-+
-+static int appletbdrm_setup_mode_config(struct appletbdrm_device *adev)
-+{
-+	struct drm_connector *connector = &adev->connector;
-+	struct drm_device *drm = &adev->drm;
-+	struct device *dev = adev->dev;
-+	int ret;
-+
-+	ret = drmm_mode_config_init(drm);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to initialize mode configuration\n");
-+
-+	/*
-+	 * The coordinate system used by the device is different from the
-+	 * coordinate system of the framebuffer in that the x and y axes are
-+	 * swapped, and that the y axis is inverted; so what the device reports
-+	 * as the height is actually the width of the framebuffer and vice
-+	 * versa
-+	 */
-+	drm->mode_config.min_width = 0;
-+	drm->mode_config.min_height = 0;
-+	drm->mode_config.max_width = max(adev->height, DRM_SHADOW_PLANE_MAX_WIDTH);
-+	drm->mode_config.max_height = max(adev->width, DRM_SHADOW_PLANE_MAX_HEIGHT);
-+	drm->mode_config.preferred_depth = APPLETBDRM_BITS_PER_PIXEL;
-+	drm->mode_config.funcs = &appletbdrm_mode_config_funcs;
-+
-+	adev->mode = (struct drm_display_mode) {
-+		DRM_MODE_INIT(60, adev->height, adev->width,
-+			      DRM_MODE_RES_MM(adev->height, 218),
-+			      DRM_MODE_RES_MM(adev->width, 218))
-+	};
-+
-+	ret = drm_connector_init(drm, connector,
-+				 &appletbdrm_connector_funcs, DRM_MODE_CONNECTOR_USB);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to initialize connector\n");
-+
-+	drm_connector_helper_add(connector, &appletbdrm_connector_helper_funcs);
-+
-+	ret = drm_connector_set_panel_orientation(connector,
-+						  DRM_MODE_PANEL_ORIENTATION_RIGHT_UP);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to set panel orientation\n");
-+
-+	connector->display_info.non_desktop = true;
-+	ret = drm_object_property_set_value(&connector->base,
-+					    drm->mode_config.non_desktop_property, true);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to set non-desktop property\n");
-+
-+	ret = drm_simple_display_pipe_init(drm, &adev->pipe, &appletbdrm_pipe_funcs,
-+					   appletbdrm_formats, ARRAY_SIZE(appletbdrm_formats),
-+					   NULL, &adev->connector);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to initialize simple display pipe\n");
-+
-+	drm_plane_enable_fb_damage_clips(&adev->pipe.plane);
-+
-+	drm_mode_config_reset(drm);
-+
-+	ret = drm_dev_register(drm, 0);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to register DRM device\n");
-+
-+	return 0;
-+}
-+
-+static int appletbdrm_probe(struct usb_interface *intf,
-+			    const struct usb_device_id *id)
-+{
-+	struct usb_endpoint_descriptor *bulk_in, *bulk_out;
-+	struct device *dev = &intf->dev;
-+	struct appletbdrm_device *adev;
-+	int ret;
-+
-+	ret = usb_find_common_endpoints(intf->cur_altsetting, &bulk_in, &bulk_out, NULL, NULL);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to find bulk endpoints\n");
-+
-+	adev = devm_drm_dev_alloc(dev, &appletbdrm_drm_driver, struct appletbdrm_device, drm);
-+	if (IS_ERR(adev))
-+		return PTR_ERR(adev);
-+
-+	adev->dev = dev;
-+	adev->in_ep = bulk_in->bEndpointAddress;
-+	adev->out_ep = bulk_out->bEndpointAddress;
-+
-+	usb_set_intfdata(intf, adev);
-+
-+	ret = appletbdrm_get_information(adev);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to get display information\n");
-+
-+	ret = appletbdrm_signal_readiness(adev);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to signal readiness\n");
-+
-+	ret = appletbdrm_clear_display(adev);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "Failed to clear display\n");
-+
-+	return appletbdrm_setup_mode_config(adev);
-+}
-+
-+static void appletbdrm_disconnect(struct usb_interface *intf)
-+{
-+	struct appletbdrm_device *adev = usb_get_intfdata(intf);
-+	struct drm_device *drm = &adev->drm;
-+
-+	drm_dev_unplug(drm);
-+	drm_atomic_helper_shutdown(drm);
-+}
-+
-+static void appletbdrm_shutdown(struct usb_interface *intf)
-+{
-+	struct appletbdrm_device *adev = usb_get_intfdata(intf);
-+
-+	/*
-+	 * The framebuffer needs to be cleared on shutdown since its content
-+	 * persists across boots
-+	 */
-+	drm_atomic_helper_shutdown(&adev->drm);
-+}
-+
-+static const struct usb_device_id appletbdrm_usb_id_table[] = {
-+	{ USB_DEVICE_INTERFACE_CLASS(0x05ac, 0x8302, USB_CLASS_AUDIO_VIDEO) },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(usb, appletbdrm_usb_id_table);
-+
-+static struct usb_driver appletbdrm_usb_driver = {
-+	.name		= "appletbdrm",
-+	.probe		= appletbdrm_probe,
-+	.disconnect	= appletbdrm_disconnect,
-+	.shutdown	= appletbdrm_shutdown,
-+	.id_table	= appletbdrm_usb_id_table,
-+};
-+module_usb_driver(appletbdrm_usb_driver);
-+
-+MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
-+MODULE_DESCRIPTION("Apple Touch Bar DRM Driver");
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
-index 365e6ddbe90f..cf357cd3389d 100644
---- a/drivers/gpu/vga/vga_switcheroo.c
-+++ b/drivers/gpu/vga/vga_switcheroo.c
-@@ -438,12 +438,7 @@ find_active_client(struct list_head *head)
- bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev)
- {
- 	if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
--		/*
--		 * apple-gmux is needed on pre-retina MacBook Pro
--		 * to probe the panel if pdev is the inactive GPU.
--		 */
--		if (apple_gmux_present() && pdev != vga_default_device() &&
--		    !vgasr_priv.handler_flags)
-+		if (apple_gmux_present() && !vgasr_priv.handler_flags)
- 			return true;
- 	}
- 
-diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
-index 08446c89eff6..f26e36dffe51 100644
---- a/drivers/hid/Kconfig
-+++ b/drivers/hid/Kconfig
-@@ -148,6 +148,40 @@ config HID_APPLEIR
- 
- 	Say Y here if you want support for Apple infrared remote control.
- 
-+config HID_APPLETB_BL
-+	tristate "Apple Touch Bar Backlight"
-+	depends on BACKLIGHT_CLASS_DEVICE
-+	help
-+	  Say Y here if you want support for the backlight of Touch Bars on x86
-+	  MacBook Pros.
-+
-+	  To compile this driver as a module, choose M here: the
-+	  module will be called hid-appletb-bl.
-+
-+config HID_APPLETB_KBD
-+	tristate "Apple Touch Bar Keyboard Mode"
-+	depends on USB_HID
-+	help
-+	  Say Y here if you want support for the keyboard mode (escape,
-+	  function, media and brightness keys) of Touch Bars on x86 MacBook
-+	  Pros.
-+
-+	  To compile this driver as a module, choose M here: the
-+	  module will be called hid-appletb-kbd.
-+
-+config HID_APPLE_MAGIC_BACKLIGHT
-+	tristate "Apple Magic Keyboard Backlight"
-+	depends on USB_HID
-+	depends on LEDS_CLASS
-+	depends on NEW_LEDS
-+	help
-+	Say Y here if you want support for the keyboard backlight on Macs with
-+	the magic keyboard (MacBookPro16,x and MacBookAir9,1). Note that this
-+	driver is not for external magic keyboards.
-+
-+	To compile this driver as a module, choose M here: the
-+	module will be called hid-apple-magic-backlight.
-+
- config HID_ASUS
- 	tristate "Asus"
- 	depends on USB_HID
-@@ -723,6 +757,7 @@ config HID_MULTITOUCH
- 	  Say Y here if you have one of the following devices:
- 	  - 3M PCT touch screens
- 	  - ActionStar dual touch panels
-+	  - Touch Bars on x86 MacBook Pros
- 	  - Atmel panels
- 	  - Cando dual touch panels
- 	  - Chunghwa panels
-diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
-index ce71b53ea6c5..685b7c8416a8 100644
---- a/drivers/hid/Makefile
-+++ b/drivers/hid/Makefile
-@@ -29,6 +29,9 @@ obj-$(CONFIG_HID_ALPS)		+= hid-alps.o
- obj-$(CONFIG_HID_ACRUX)		+= hid-axff.o
- obj-$(CONFIG_HID_APPLE)		+= hid-apple.o
- obj-$(CONFIG_HID_APPLEIR)	+= hid-appleir.o
-+obj-$(CONFIG_HID_APPLETB_BL)	+= hid-appletb-bl.o
-+obj-$(CONFIG_HID_APPLETB_KBD)	+= hid-appletb-kbd.o
-+obj-$(CONFIG_HID_APPLE_MAGIC_BACKLIGHT)	+= hid-apple-magic-backlight.o
- obj-$(CONFIG_HID_CREATIVE_SB0540)	+= hid-creative-sb0540.o
- obj-$(CONFIG_HID_ASUS)		+= hid-asus.o
- obj-$(CONFIG_HID_AUREAL)	+= hid-aureal.o
-diff --git a/drivers/hid/hid-apple-magic-backlight.c b/drivers/hid/hid-apple-magic-backlight.c
-new file mode 100644
-index 000000000000..f0fc02ff3b2d
---- /dev/null
-+++ b/drivers/hid/hid-apple-magic-backlight.c
-@@ -0,0 +1,120 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Apple Magic Keyboard Backlight Driver
-+ *
-+ * For Intel Macs with internal Magic Keyboard (MacBookPro16,1-4 and MacBookAir9,1)
-+ *
-+ * Copyright (c) 2022 Kerem Karabay <kekrby@gmail.com>
-+ * Copyright (c) 2023 Orlando Chamberlain <orlandoch.dev@gmail.com>
-+ */
-+
-+#include <linux/hid.h>
-+#include <linux/leds.h>
-+#include <linux/device.h>
-+#include <linux/errno.h>
-+#include <dt-bindings/leds/common.h>
-+
-+#include "hid-ids.h"
-+
-+#define HID_USAGE_MAGIC_BL	0xff00000f
-+
-+#define APPLE_MAGIC_REPORT_ID_POWER 3
-+#define APPLE_MAGIC_REPORT_ID_BRIGHTNESS 1
-+
-+struct apple_magic_backlight {
-+	struct led_classdev cdev;
-+	struct hid_report *brightness;
-+	struct hid_report *power;
-+};
-+
-+static void apple_magic_backlight_report_set(struct hid_report *rep, s32 value, u8 rate)
-+{
-+	rep->field[0]->value[0] = value;
-+	rep->field[1]->value[0] = 0x5e; /* Mimic Windows */
-+	rep->field[1]->value[0] |= rate << 8;
-+
-+	hid_hw_request(rep->device, rep, HID_REQ_SET_REPORT);
-+}
-+
-+static void apple_magic_backlight_set(struct apple_magic_backlight *backlight,
-+				     int brightness, char rate)
-+{
-+	apple_magic_backlight_report_set(backlight->power, brightness ? 1 : 0, rate);
-+	if (brightness)
-+		apple_magic_backlight_report_set(backlight->brightness, brightness, rate);
-+}
-+
-+static int apple_magic_backlight_led_set(struct led_classdev *led_cdev,
-+					 enum led_brightness brightness)
-+{
-+	struct apple_magic_backlight *backlight = container_of(led_cdev,
-+			struct apple_magic_backlight, cdev);
-+
-+	apple_magic_backlight_set(backlight, brightness, 1);
-+	return 0;
-+}
-+
-+static int apple_magic_backlight_probe(struct hid_device *hdev,
-+				       const struct hid_device_id *id)
-+{
-+	struct apple_magic_backlight *backlight;
-+	int rc;
-+
-+	rc = hid_parse(hdev);
-+	if (rc)
-+		return rc;
-+
-+	/*
-+	 * Ensure this usb endpoint is for the keyboard backlight, not touchbar
-+	 * backlight.
-+	 */
-+	if (hdev->collection[0].usage != HID_USAGE_MAGIC_BL)
-+		return -ENODEV;
-+
-+	backlight = devm_kzalloc(&hdev->dev, sizeof(*backlight), GFP_KERNEL);
-+	if (!backlight)
-+		return -ENOMEM;
-+
-+	rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
-+	if (rc)
-+		return rc;
-+
-+	backlight->brightness = hid_register_report(hdev, HID_FEATURE_REPORT,
-+			APPLE_MAGIC_REPORT_ID_BRIGHTNESS, 0);
-+	backlight->power = hid_register_report(hdev, HID_FEATURE_REPORT,
-+			APPLE_MAGIC_REPORT_ID_POWER, 0);
-+
-+	if (!backlight->brightness || !backlight->power) {
-+		rc = -ENODEV;
-+		goto hw_stop;
-+	}
-+
-+	backlight->cdev.name = ":white:" LED_FUNCTION_KBD_BACKLIGHT;
-+	backlight->cdev.max_brightness = backlight->brightness->field[0]->logical_maximum;
-+	backlight->cdev.brightness_set_blocking = apple_magic_backlight_led_set;
-+
-+	apple_magic_backlight_set(backlight, 0, 0);
-+
-+	return devm_led_classdev_register(&hdev->dev, &backlight->cdev);
-+
-+hw_stop:
-+	hid_hw_stop(hdev);
-+	return rc;
-+}
-+
-+static const struct hid_device_id apple_magic_backlight_hid_ids[] = {
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
-+	{ }
-+};
-+MODULE_DEVICE_TABLE(hid, apple_magic_backlight_hid_ids);
-+
-+static struct hid_driver apple_magic_backlight_hid_driver = {
-+	.name = "hid-apple-magic-backlight",
-+	.id_table = apple_magic_backlight_hid_ids,
-+	.probe = apple_magic_backlight_probe,
-+};
-+module_hid_driver(apple_magic_backlight_hid_driver);
-+
-+MODULE_DESCRIPTION("MacBook Magic Keyboard Backlight");
-+MODULE_AUTHOR("Orlando Chamberlain <orlandoch.dev@gmail.com>");
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/hid/hid-appletb-bl.c b/drivers/hid/hid-appletb-bl.c
-new file mode 100644
-index 000000000000..0c5e4b776851
---- /dev/null
-+++ b/drivers/hid/hid-appletb-bl.c
-@@ -0,0 +1,193 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Apple Touch Bar Backlight Driver
-+ *
-+ * Copyright (c) 2017-2018 Ronald Tschalär
-+ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
-+ */
-+
-+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-+
-+#include <linux/hid.h>
-+#include <linux/backlight.h>
-+
-+#include "hid-ids.h"
-+
-+#define APPLETB_BL_ON			1
-+#define APPLETB_BL_DIM			3
-+#define APPLETB_BL_OFF			4
-+
-+#define HID_UP_APPLEVENDOR_TB_BL	0xff120000
-+
-+#define HID_VD_APPLE_TB_BRIGHTNESS	0xff120001
-+#define HID_USAGE_AUX1			0xff120020
-+#define HID_USAGE_BRIGHTNESS		0xff120021
-+
-+struct appletb_bl {
-+	struct hid_field *aux1_field, *brightness_field;
-+	struct backlight_device *bdev;
-+
-+	bool full_on;
-+};
-+
-+const u8 appletb_bl_brightness_map[] = {
-+	APPLETB_BL_OFF,
-+	APPLETB_BL_DIM,
-+	APPLETB_BL_ON
-+};
-+
-+static int appletb_bl_set_brightness(struct appletb_bl *bl, u8 brightness)
-+{
-+	struct hid_report *report = bl->brightness_field->report;
-+	struct hid_device *hdev = report->device;
-+	int ret;
-+
-+	ret = hid_set_field(bl->aux1_field, 0, 1);
-+	if (ret) {
-+		hid_err(hdev, "Failed to set auxiliary field (%pe)\n", ERR_PTR(ret));
-+		return ret;
-+	}
-+
-+	ret = hid_set_field(bl->brightness_field, 0, brightness);
-+	if (ret) {
-+		hid_err(hdev, "Failed to set brightness field (%pe)\n", ERR_PTR(ret));
-+		return ret;
-+	}
-+
-+	if (!bl->full_on) {
-+		ret = hid_hw_power(hdev, PM_HINT_FULLON);
-+		if (ret < 0) {
-+			hid_err(hdev, "Device didn't power on (%pe)\n", ERR_PTR(ret));
-+			return ret;
-+		}
-+
-+		bl->full_on = true;
-+	}
-+
-+	hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
-+
-+	if (brightness == APPLETB_BL_OFF) {
-+		hid_hw_power(hdev, PM_HINT_NORMAL);
-+		bl->full_on = false;
-+	}
-+
-+	return 0;
-+}
-+
-+static int appletb_bl_update_status(struct backlight_device *bdev)
-+{
-+	struct appletb_bl *bl = bl_get_data(bdev);
-+	u16 brightness;
-+
-+	if (bdev->props.state & BL_CORE_SUSPENDED)
-+		brightness = 0;
-+	else
-+		brightness = backlight_get_brightness(bdev);
-+
-+	return appletb_bl_set_brightness(bl, appletb_bl_brightness_map[brightness]);
-+}
-+
-+static const struct backlight_ops appletb_bl_backlight_ops = {
-+	.options = BL_CORE_SUSPENDRESUME,
-+	.update_status = appletb_bl_update_status,
-+};
-+
-+static int appletb_bl_probe(struct hid_device *hdev, const struct hid_device_id *id)
-+{
-+	struct hid_field *aux1_field, *brightness_field;
-+	struct backlight_properties bl_props = { 0 };
-+	struct device *dev = &hdev->dev;
-+	struct appletb_bl *bl;
-+	int ret;
-+
-+	ret = hid_parse(hdev);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "HID parse failed\n");
-+
-+	aux1_field = hid_find_field(hdev, HID_FEATURE_REPORT,
-+				    HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_AUX1);
-+
-+	brightness_field = hid_find_field(hdev, HID_FEATURE_REPORT,
-+					  HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_BRIGHTNESS);
-+
-+	if (!aux1_field || !brightness_field)
-+		return -ENODEV;
-+
-+	if (aux1_field->report != brightness_field->report)
-+		return dev_err_probe(dev, -ENODEV, "Encountered unexpected report structure\n");
-+
-+	bl = devm_kzalloc(dev, sizeof(*bl), GFP_KERNEL);
-+	if (!bl)
-+		return -ENOMEM;
-+
-+	ret = hid_hw_start(hdev, HID_CONNECT_DRIVER);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "HID hardware start failed\n");
-+
-+	ret = hid_hw_open(hdev);
-+	if (ret) {
-+		dev_err_probe(dev, ret, "HID hardware open failed\n");
-+		goto stop_hw;
-+	}
-+
-+	bl->aux1_field = aux1_field;
-+	bl->brightness_field = brightness_field;
-+
-+	ret = appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
-+	if (ret) {
-+		dev_err_probe(dev, ret, "Failed to set touch bar brightness to off\n");
-+		goto close_hw;
-+	}
-+
-+	bl_props.type = BACKLIGHT_RAW;
-+	bl_props.max_brightness = ARRAY_SIZE(appletb_bl_brightness_map) - 1;
-+
-+	bl->bdev = devm_backlight_device_register(dev, "appletb_backlight", dev, bl,
-+						  &appletb_bl_backlight_ops, &bl_props);
-+	if (IS_ERR(bl->bdev)) {
-+		ret = PTR_ERR(bl->bdev);
-+		dev_err_probe(dev, ret, "Failed to register backlight device\n");
-+		goto close_hw;
-+	}
-+
-+	hid_set_drvdata(hdev, bl);
-+
-+	return 0;
-+
-+close_hw:
-+	hid_hw_close(hdev);
-+stop_hw:
-+	hid_hw_stop(hdev);
-+
-+	return ret;
-+}
-+
-+static void appletb_bl_remove(struct hid_device *hdev)
-+{
-+	struct appletb_bl *bl = hid_get_drvdata(hdev);
-+
-+	appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
-+
-+	hid_hw_close(hdev);
-+	hid_hw_stop(hdev);
-+}
-+
-+static const struct hid_device_id appletb_bl_hid_ids[] = {
-+	/* MacBook Pro's 2018, 2019, with T2 chip: iBridge DFR Brightness */
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
-+	{ }
-+};
-+MODULE_DEVICE_TABLE(hid, appletb_bl_hid_ids);
-+
-+static struct hid_driver appletb_bl_hid_driver = {
-+	.name = "hid-appletb-bl",
-+	.id_table = appletb_bl_hid_ids,
-+	.probe = appletb_bl_probe,
-+	.remove = appletb_bl_remove,
-+};
-+module_hid_driver(appletb_bl_hid_driver);
-+
-+MODULE_AUTHOR("Ronald Tschalär");
-+MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
-+MODULE_DESCRIPTION("MacBookPro Touch Bar Backlight Driver");
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c
-new file mode 100644
-index 000000000000..bc004c40805f
---- /dev/null
-+++ b/drivers/hid/hid-appletb-kbd.c
-@@ -0,0 +1,289 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Apple Touch Bar Keyboard Mode Driver
-+ *
-+ * Copyright (c) 2017-2018 Ronald Tschalär
-+ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
-+ */
-+
-+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-+
-+#include <linux/hid.h>
-+#include <linux/usb.h>
-+#include <linux/input.h>
-+#include <linux/sysfs.h>
-+#include <linux/bitops.h>
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/input/sparse-keymap.h>
-+
-+#include "hid-ids.h"
-+
-+#define APPLETB_KBD_MODE_ESC	0
-+#define APPLETB_KBD_MODE_FN	1
-+#define APPLETB_KBD_MODE_SPCL	2
-+#define APPLETB_KBD_MODE_OFF	3
-+#define APPLETB_KBD_MODE_MAX	APPLETB_KBD_MODE_OFF
-+
-+#define HID_USAGE_MODE		0x00ff0004
-+
-+struct appletb_kbd {
-+	struct hid_field *mode_field;
-+
-+	u8 saved_mode;
-+	u8 current_mode;
-+};
-+
-+static const struct key_entry appletb_kbd_keymap[] = {
-+	{ KE_KEY, KEY_ESC, { KEY_ESC } },
-+	{ KE_KEY, KEY_F1,  { KEY_BRIGHTNESSDOWN } },
-+	{ KE_KEY, KEY_F2,  { KEY_BRIGHTNESSUP } },
-+	{ KE_KEY, KEY_F3,  { KEY_RESERVED } },
-+	{ KE_KEY, KEY_F4,  { KEY_RESERVED } },
-+	{ KE_KEY, KEY_F5,  { KEY_KBDILLUMDOWN } },
-+	{ KE_KEY, KEY_F6,  { KEY_KBDILLUMUP } },
-+	{ KE_KEY, KEY_F7,  { KEY_PREVIOUSSONG } },
-+	{ KE_KEY, KEY_F8,  { KEY_PLAYPAUSE } },
-+	{ KE_KEY, KEY_F9,  { KEY_NEXTSONG } },
-+	{ KE_KEY, KEY_F10, { KEY_MUTE } },
-+	{ KE_KEY, KEY_F11, { KEY_VOLUMEDOWN } },
-+	{ KE_KEY, KEY_F12, { KEY_VOLUMEUP } },
-+	{ KE_END, 0 }
-+};
-+
-+static int appletb_kbd_set_mode(struct appletb_kbd *kbd, u8 mode)
-+{
-+	struct hid_report *report = kbd->mode_field->report;
-+	struct hid_device *hdev = report->device;
-+	int ret;
-+
-+	ret = hid_hw_power(hdev, PM_HINT_FULLON);
-+	if (ret) {
-+		hid_err(hdev, "Device didn't resume (%pe)\n", ERR_PTR(ret));
-+		return ret;
-+	}
-+
-+	ret = hid_set_field(kbd->mode_field, 0, mode);
-+	if (ret) {
-+		hid_err(hdev, "Failed to set mode field to %u (%pe)\n", mode, ERR_PTR(ret));
-+		goto power_normal;
-+	}
-+
-+	hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
-+
-+	kbd->current_mode = mode;
-+
-+power_normal:
-+	hid_hw_power(hdev, PM_HINT_NORMAL);
-+
-+	return ret;
-+}
-+
-+static ssize_t mode_show(struct device *dev,
-+			 struct device_attribute *attr, char *buf)
-+{
-+	struct appletb_kbd *kbd = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(buf, "%d\n", kbd->current_mode);
-+}
-+
-+static ssize_t mode_store(struct device *dev,
-+			  struct device_attribute *attr,
-+			  const char *buf, size_t size)
-+{
-+	struct appletb_kbd *kbd = dev_get_drvdata(dev);
-+	u8 mode;
-+	int ret;
-+
-+	ret = kstrtou8(buf, 0, &mode);
-+	if (ret)
-+		return ret;
-+
-+	if (mode > APPLETB_KBD_MODE_MAX)
-+		return -EINVAL;
-+
-+	ret = appletb_kbd_set_mode(kbd, mode);
-+
-+	return ret < 0 ? ret : size;
-+}
-+static DEVICE_ATTR_RW(mode);
-+
-+struct attribute *appletb_kbd_attrs[] = {
-+	&dev_attr_mode.attr,
-+	NULL
-+};
-+ATTRIBUTE_GROUPS(appletb_kbd);
-+
-+static int appletb_tb_key_to_slot(unsigned int code)
-+{
-+	switch (code) {
-+	case KEY_ESC:
-+		return 0;
-+	case KEY_F1 ... KEY_F10:
-+		return code - KEY_F1 + 1;
-+	case KEY_F11 ... KEY_F12:
-+		return code - KEY_F11 + 11;
-+
-+	default:
-+		return -EINVAL;
-+	}
-+}
-+
-+static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field,
-+				      struct hid_usage *usage, __s32 value)
-+{
-+	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
-+	struct key_entry *translation;
-+	struct input_dev *input;
-+	int slot;
-+
-+	if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD || usage->type != EV_KEY)
-+		return 0;
-+
-+	input = field->hidinput->input;
-+
-+	/*
-+	 * Skip non-touch-bar keys.
-+	 *
-+	 * Either the touch bar itself or usbhid generate a slew of key-down
-+	 * events for all the meta keys. None of which we're at all interested
-+	 * in.
-+	 */
-+	slot = appletb_tb_key_to_slot(usage->code);
-+	if (slot < 0)
-+		return 0;
-+
-+	translation = sparse_keymap_entry_from_scancode(input, usage->code);
-+
-+	if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) {
-+		input_event(input, usage->type, translation->keycode, value);
-+
-+		return 1;
-+	}
-+
-+	return kbd->current_mode == APPLETB_KBD_MODE_OFF;
-+}
-+
-+static int appletb_kbd_input_configured(struct hid_device *hdev, struct hid_input *hidinput)
-+{
-+	struct input_dev *input = hidinput->input;
-+
-+	/*
-+	 * Clear various input capabilities that are blindly set by the hid
-+	 * driver (usbkbd.c)
-+	 */
-+	memset(input->evbit, 0, sizeof(input->evbit));
-+	memset(input->keybit, 0, sizeof(input->keybit));
-+	memset(input->ledbit, 0, sizeof(input->ledbit));
-+
-+	__set_bit(EV_REP, input->evbit);
-+
-+	return sparse_keymap_setup(input, appletb_kbd_keymap, NULL);
-+}
-+
-+static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id)
-+{
-+	struct appletb_kbd *kbd;
-+	struct device *dev = &hdev->dev;
-+	struct hid_field *mode_field;
-+	int ret;
-+
-+	ret = hid_parse(hdev);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "HID parse failed\n");
-+
-+	mode_field = hid_find_field(hdev, HID_OUTPUT_REPORT,
-+				    HID_GD_KEYBOARD, HID_USAGE_MODE);
-+	if (!mode_field)
-+		return -ENODEV;
-+
-+	kbd = devm_kzalloc(dev, sizeof(*kbd), GFP_KERNEL);
-+	if (!kbd)
-+		return -ENOMEM;
-+
-+	kbd->mode_field = mode_field;
-+
-+	ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT);
-+	if (ret)
-+		return dev_err_probe(dev, ret, "HID hw start failed\n");
-+
-+	ret = hid_hw_open(hdev);
-+	if (ret) {
-+		dev_err_probe(dev, ret, "HID hw open failed\n");
-+		goto stop_hw;
-+	}
-+
-+	ret = appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
-+	if (ret) {
-+		dev_err_probe(dev, ret, "Failed to set touchbar mode\n");
-+		goto close_hw;
-+	}
-+
-+	hid_set_drvdata(hdev, kbd);
-+
-+	return 0;
-+
-+close_hw:
-+	hid_hw_close(hdev);
-+stop_hw:
-+	hid_hw_stop(hdev);
-+	return ret;
-+}
-+
-+static void appletb_kbd_remove(struct hid_device *hdev)
-+{
-+	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
-+
-+	appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
-+
-+	hid_hw_close(hdev);
-+	hid_hw_stop(hdev);
-+}
-+
-+#ifdef CONFIG_PM
-+static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg)
-+{
-+	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
-+
-+	kbd->saved_mode = kbd->current_mode;
-+	appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
-+
-+	return 0;
-+}
-+
-+static int appletb_kbd_reset_resume(struct hid_device *hdev)
-+{
-+	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
-+
-+	appletb_kbd_set_mode(kbd, kbd->saved_mode);
-+
-+	return 0;
-+}
-+#endif
-+
-+static const struct hid_device_id appletb_kbd_hid_ids[] = {
-+	/* MacBook Pro's 2018, 2019, with T2 chip: iBridge Display */
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
-+	{ }
-+};
-+MODULE_DEVICE_TABLE(hid, appletb_kbd_hid_ids);
-+
-+static struct hid_driver appletb_kbd_hid_driver = {
-+	.name = "hid-appletb-kbd",
-+	.id_table = appletb_kbd_hid_ids,
-+	.probe = appletb_kbd_probe,
-+	.remove = appletb_kbd_remove,
-+	.event = appletb_kbd_hid_event,
-+	.input_configured = appletb_kbd_input_configured,
-+#ifdef CONFIG_PM
-+	.suspend = appletb_kbd_suspend,
-+	.reset_resume = appletb_kbd_reset_resume,
-+#endif
-+	.driver.dev_groups = appletb_kbd_groups,
-+};
-+module_hid_driver(appletb_kbd_hid_driver);
-+
-+MODULE_AUTHOR("Ronald Tschalär");
-+MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
-+MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver");
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
-index 74efda212c55..f4379efdbf30 100644
---- a/drivers/hid/hid-core.c
-+++ b/drivers/hid/hid-core.c
-@@ -1912,6 +1912,31 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
- }
- EXPORT_SYMBOL_GPL(hid_set_field);
- 
-+struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
-+				 unsigned int application, unsigned int usage)
-+{
-+	struct list_head *report_list = &hdev->report_enum[report_type].report_list;
-+	struct hid_report *report;
-+	int i, j;
-+
-+	list_for_each_entry(report, report_list, list) {
-+		if (report->application != application)
-+			continue;
-+
-+		for (i = 0; i < report->maxfield; i++) {
-+			struct hid_field *field = report->field[i];
-+
-+			for (j = 0; j < field->maxusage; j++) {
-+				if (field->usage[j].hid == usage)
-+					return field;
-+			}
-+		}
-+	}
-+
-+	return NULL;
-+}
-+EXPORT_SYMBOL_GPL(hid_find_field);
-+
- static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
- 		const u8 *data)
- {
-diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
-index 25331695ae32..3380694ba18c 100644
---- a/drivers/hid/hid-google-hammer.c
-+++ b/drivers/hid/hid-google-hammer.c
-@@ -418,38 +418,15 @@ static int hammer_event(struct hid_device *hid, struct hid_field *field,
- 	return 0;
- }
- 
--static bool hammer_has_usage(struct hid_device *hdev, unsigned int report_type,
--			unsigned application, unsigned usage)
--{
--	struct hid_report_enum *re = &hdev->report_enum[report_type];
--	struct hid_report *report;
--	int i, j;
--
--	list_for_each_entry(report, &re->report_list, list) {
--		if (report->application != application)
--			continue;
--
--		for (i = 0; i < report->maxfield; i++) {
--			struct hid_field *field = report->field[i];
--
--			for (j = 0; j < field->maxusage; j++)
--				if (field->usage[j].hid == usage)
--					return true;
--		}
--	}
--
--	return false;
--}
--
- static bool hammer_has_folded_event(struct hid_device *hdev)
- {
--	return hammer_has_usage(hdev, HID_INPUT_REPORT,
-+	return !!hid_find_field(hdev, HID_INPUT_REPORT,
- 				HID_GD_KEYBOARD, HID_USAGE_KBD_FOLDED);
- }
- 
- static bool hammer_has_backlight_control(struct hid_device *hdev)
- {
--	return hammer_has_usage(hdev, HID_OUTPUT_REPORT,
-+	return !!hid_find_field(hdev, HID_OUTPUT_REPORT,
- 				HID_GD_KEYBOARD, HID_AD_BRIGHTNESS);
- }
- 
-diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
-index 56fc78841f24..0fed955364c3 100644
---- a/drivers/hid/hid-multitouch.c
-+++ b/drivers/hid/hid-multitouch.c
-@@ -72,6 +72,7 @@ MODULE_LICENSE("GPL");
- #define MT_QUIRK_FORCE_MULTI_INPUT	BIT(20)
- #define MT_QUIRK_DISABLE_WAKEUP		BIT(21)
- #define MT_QUIRK_ORIENTATION_INVERT	BIT(22)
-+#define MT_QUIRK_TOUCH_IS_TIPSTATE	BIT(23)
- 
- #define MT_INPUTMODE_TOUCHSCREEN	0x02
- #define MT_INPUTMODE_TOUCHPAD		0x03
-@@ -145,6 +146,7 @@ struct mt_class {
- 	__s32 sn_height;	/* Signal/noise ratio for height events */
- 	__s32 sn_pressure;	/* Signal/noise ratio for pressure events */
- 	__u8 maxcontacts;
-+	bool is_direct;	/* true for touchscreens */
- 	bool is_indirect;	/* true for touchpads */
- 	bool export_all_inputs;	/* do not ignore mouse, keyboards, etc... */
- };
-@@ -212,6 +214,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
- #define MT_CLS_GOOGLE				0x0111
- #define MT_CLS_RAZER_BLADE_STEALTH		0x0112
- #define MT_CLS_SMART_TECH			0x0113
-+#define MT_CLS_APPLE_TOUCHBAR			0x0114
- 
- #define MT_DEFAULT_MAXCONTACT	10
- #define MT_MAX_MAXCONTACT	250
-@@ -396,6 +399,13 @@ static const struct mt_class mt_classes[] = {
- 			MT_QUIRK_CONTACT_CNT_ACCURATE |
- 			MT_QUIRK_SEPARATE_APP_REPORT,
- 	},
-+	{ .name = MT_CLS_APPLE_TOUCHBAR,
-+		.quirks = MT_QUIRK_HOVERING |
-+			MT_QUIRK_TOUCH_IS_TIPSTATE |
-+			MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE,
-+		.is_direct = true,
-+		.maxcontacts = 11,
-+	},
- 	{ }
- };
- 
-@@ -489,9 +499,6 @@ static void mt_feature_mapping(struct hid_device *hdev,
- 		if (!td->maxcontacts &&
- 		    field->logical_maximum <= MT_MAX_MAXCONTACT)
- 			td->maxcontacts = field->logical_maximum;
--		if (td->mtclass.maxcontacts)
--			/* check if the maxcontacts is given by the class */
--			td->maxcontacts = td->mtclass.maxcontacts;
- 
- 		break;
- 	case HID_DG_BUTTONTYPE:
-@@ -565,13 +572,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td,
- 	mt_application->application = application;
- 	INIT_LIST_HEAD(&mt_application->mt_usages);
- 
--	if (application == HID_DG_TOUCHSCREEN)
-+	if (application == HID_DG_TOUCHSCREEN && !td->mtclass.is_indirect)
- 		mt_application->mt_flags |= INPUT_MT_DIRECT;
- 
- 	/*
- 	 * Model touchscreens providing buttons as touchpads.
- 	 */
--	if (application == HID_DG_TOUCHPAD) {
-+	if (application == HID_DG_TOUCHPAD && !td->mtclass.is_direct) {
- 		mt_application->mt_flags |= INPUT_MT_POINTER;
- 		td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
- 	}
-@@ -635,7 +642,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
- 
- 		if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) {
- 			for (n = 0; n < field->report_count; n++) {
--				if (field->usage[n].hid == HID_DG_CONTACTID) {
-+				unsigned int hid = field->usage[n].hid;
-+
-+				if (hid == HID_DG_CONTACTID || hid == HID_DG_TRANSDUCER_INDEX) {
- 					rdata->is_mt_collection = true;
- 					break;
- 				}
-@@ -807,6 +816,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- 
- 			MT_STORE_FIELD(confidence_state);
- 			return 1;
-+		case HID_DG_TOUCH:
-+			/*
-+			 * Legacy devices use TIPSWITCH and not TOUCH.
-+			 * Let's just ignore this field unless the quirk is set.
-+			 */
-+			if (!(cls->quirks & MT_QUIRK_TOUCH_IS_TIPSTATE))
-+				return -1;
-+
-+			fallthrough;
- 		case HID_DG_TIPSWITCH:
- 			if (field->application != HID_GD_SYSTEM_MULTIAXIS)
- 				input_set_capability(hi->input,
-@@ -814,6 +832,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- 			MT_STORE_FIELD(tip_state);
- 			return 1;
- 		case HID_DG_CONTACTID:
-+		case HID_DG_TRANSDUCER_INDEX:
- 			MT_STORE_FIELD(contactid);
- 			app->touches_by_report++;
- 			return 1;
-@@ -869,10 +888,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- 		case HID_DG_CONTACTMAX:
- 			/* contact max are global to the report */
- 			return -1;
--		case HID_DG_TOUCH:
--			/* Legacy devices use TIPSWITCH and not TOUCH.
--			 * Let's just ignore this field. */
--			return -1;
- 		}
- 		/* let hid-input decide for the others */
- 		return 0;
-@@ -1300,6 +1315,10 @@ static int mt_touch_input_configured(struct hid_device *hdev,
- 	struct input_dev *input = hi->input;
- 	int ret;
- 
-+	/* check if the maxcontacts is given by the class */
-+	if (cls->maxcontacts)
-+		td->maxcontacts = cls->maxcontacts;
-+
- 	if (!td->maxcontacts)
- 		td->maxcontacts = MT_DEFAULT_MAXCONTACT;
- 
-@@ -1307,6 +1326,9 @@ static int mt_touch_input_configured(struct hid_device *hdev,
- 	if (td->serial_maybe)
- 		mt_post_parse_default_settings(td, app);
- 
-+	if (cls->is_direct)
-+		app->mt_flags |= INPUT_MT_DIRECT;
-+
- 	if (cls->is_indirect)
- 		app->mt_flags |= INPUT_MT_POINTER;
- 
-@@ -1733,6 +1755,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 		}
- 	}
- 
-+	ret = hid_parse(hdev);
-+	if (ret != 0)
-+		return ret;
-+
-+	if (mtclass->name == MT_CLS_APPLE_TOUCHBAR &&
-+	    !hid_find_field(hdev, HID_INPUT_REPORT,
-+			    HID_DG_TOUCHPAD, HID_DG_TRANSDUCER_INDEX))
-+		return -ENODEV;
-+
- 	td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL);
- 	if (!td) {
- 		dev_err(&hdev->dev, "cannot allocate multitouch data\n");
-@@ -1780,10 +1811,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 
- 	timer_setup(&td->release_timer, mt_expired_timeout, 0);
- 
--	ret = hid_parse(hdev);
--	if (ret != 0)
--		return ret;
--
- 	if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID)
- 		mt_fix_const_fields(hdev, HID_DG_CONTACTID);
- 
-@@ -2235,6 +2262,11 @@ static const struct hid_device_id mt_devices[] = {
- 		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
- 			USB_DEVICE_ID_XIROKU_CSR2) },
- 
-+	/* Apple Touch Bars */
-+	{ .driver_data = MT_CLS_APPLE_TOUCHBAR,
-+		HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
-+			       USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
-+
- 	/* Google MT devices */
- 	{ .driver_data = MT_CLS_GOOGLE,
- 		HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
-diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
-index e0bbf0c6345d..7c576d6540fe 100644
---- a/drivers/hid/hid-quirks.c
-+++ b/drivers/hid/hid-quirks.c
-@@ -328,8 +328,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) },
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) },
--	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
--	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
- #endif
- #if IS_ENABLED(CONFIG_HID_APPLEIR)
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
-@@ -338,6 +336,12 @@ static const struct hid_device_id hid_have_special_driver[] = {
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
- #endif
-+#if IS_ENABLED(CONFIG_HID_APPLETB_BL)
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
-+#endif
-+#if IS_ENABLED(CONFIG_HID_APPLETB_KBD)
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
-+#endif
- #if IS_ENABLED(CONFIG_HID_ASUS)
- 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
- 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
-diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
-index fc6d6a9053ce..698f44794453 100644
---- a/drivers/hwmon/applesmc.c
-+++ b/drivers/hwmon/applesmc.c
-@@ -6,6 +6,7 @@
-  *
-  * Copyright (C) 2007 Nicolas Boichat <nicolas@boichat.ch>
-  * Copyright (C) 2010 Henrik Rydberg <rydberg@euromail.se>
-+ * Copyright (C) 2019 Paul Pawlowski <paul@mrarm.io>
-  *
-  * Based on hdaps.c driver:
-  * Copyright (C) 2005 Robert Love <rml@novell.com>
-@@ -18,7 +19,7 @@
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- 
- #include <linux/delay.h>
--#include <linux/platform_device.h>
-+#include <linux/acpi.h>
- #include <linux/input.h>
- #include <linux/kernel.h>
- #include <linux/slab.h>
-@@ -35,12 +36,24 @@
- #include <linux/bits.h>
- 
- /* data port used by Apple SMC */
--#define APPLESMC_DATA_PORT	0x300
-+#define APPLESMC_DATA_PORT	0
- /* command/status port used by Apple SMC */
--#define APPLESMC_CMD_PORT	0x304
-+#define APPLESMC_CMD_PORT	4
- 
- #define APPLESMC_NR_PORTS	32 /* 0x300-0x31f */
- 
-+#define APPLESMC_IOMEM_KEY_DATA	0
-+#define APPLESMC_IOMEM_KEY_STATUS	0x4005
-+#define APPLESMC_IOMEM_KEY_NAME	0x78
-+#define APPLESMC_IOMEM_KEY_DATA_LEN	0x7D
-+#define APPLESMC_IOMEM_KEY_SMC_ID	0x7E
-+#define APPLESMC_IOMEM_KEY_CMD		0x7F
-+#define APPLESMC_IOMEM_MIN_SIZE	0x4006
-+
-+#define APPLESMC_IOMEM_KEY_TYPE_CODE		0
-+#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN	5
-+#define APPLESMC_IOMEM_KEY_TYPE_FLAGS		6
-+
- #define APPLESMC_MAX_DATA_LENGTH 32
- 
- /* Apple SMC status bits */
-@@ -74,6 +87,7 @@
- #define FAN_ID_FMT		"F%dID" /* r-o char[16] */
- 
- #define TEMP_SENSOR_TYPE	"sp78"
-+#define FLOAT_TYPE		"flt "
- 
- /* List of keys used to read/write fan speeds */
- static const char *const fan_speed_fmt[] = {
-@@ -83,6 +97,7 @@ static const char *const fan_speed_fmt[] = {
- 	"F%dSf",		/* safe speed - not all models */
- 	"F%dTg",		/* target speed (manual: rw) */
- };
-+#define FAN_MANUAL_FMT "F%dMd"
- 
- #define INIT_TIMEOUT_MSECS	5000	/* wait up to 5s for device init ... */
- #define INIT_WAIT_MSECS		50	/* ... in 50ms increments */
-@@ -119,7 +134,7 @@ struct applesmc_entry {
- };
- 
- /* Register lookup and registers common to all SMCs */
--static struct applesmc_registers {
-+struct applesmc_registers {
- 	struct mutex mutex;		/* register read/write mutex */
- 	unsigned int key_count;		/* number of SMC registers */
- 	unsigned int fan_count;		/* number of fans */
-@@ -133,26 +148,38 @@ static struct applesmc_registers {
- 	bool init_complete;		/* true when fully initialized */
- 	struct applesmc_entry *cache;	/* cached key entries */
- 	const char **index;		/* temperature key index */
--} smcreg = {
--	.mutex = __MUTEX_INITIALIZER(smcreg.mutex),
- };
- 
--static const int debug;
--static struct platform_device *pdev;
--static s16 rest_x;
--static s16 rest_y;
--static u8 backlight_state[2];
-+struct applesmc_device {
-+	struct acpi_device *dev;
-+	struct device *ldev;
-+	struct applesmc_registers reg;
- 
--static struct device *hwmon_dev;
--static struct input_dev *applesmc_idev;
-+	bool port_base_set, iomem_base_set;
-+	u16 port_base;
-+	u8 *__iomem iomem_base;
-+	u32 iomem_base_addr, iomem_base_size;
- 
--/*
-- * Last index written to key_at_index sysfs file, and value to use for all other
-- * key_at_index_* sysfs files.
-- */
--static unsigned int key_at_index;
-+	s16 rest_x;
-+	s16 rest_y;
-+
-+	u8 backlight_state[2];
-+
-+	struct device *hwmon_dev;
-+	struct input_dev *idev;
-+
-+	/*
-+	 * Last index written to key_at_index sysfs file, and value to use for all other
-+	 * key_at_index_* sysfs files.
-+	 */
-+	unsigned int key_at_index;
- 
--static struct workqueue_struct *applesmc_led_wq;
-+	struct workqueue_struct *backlight_wq;
-+	struct work_struct backlight_work;
-+	struct led_classdev backlight_dev;
-+};
-+
-+static const int debug;
- 
- /*
-  * Wait for specific status bits with a mask on the SMC.
-@@ -162,7 +189,7 @@ static struct workqueue_struct *applesmc_led_wq;
-  * run out past 500ms.
-  */
- 
--static int wait_status(u8 val, u8 mask)
-+static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask)
- {
- 	u8 status;
- 	int us;
-@@ -170,7 +197,7 @@ static int wait_status(u8 val, u8 mask)
- 
- 	us = APPLESMC_MIN_WAIT;
- 	for (i = 0; i < 24 ; i++) {
--		status = inb(APPLESMC_CMD_PORT);
-+		status = inb(smc->port_base + APPLESMC_CMD_PORT);
- 		if ((status & mask) == val)
- 			return 0;
- 		usleep_range(us, us * 2);
-@@ -180,13 +207,13 @@ static int wait_status(u8 val, u8 mask)
- 	return -EIO;
- }
- 
--/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
-+/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
- 
--static int send_byte(u8 cmd, u16 port)
-+static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
- {
- 	int status;
- 
--	status = wait_status(0, SMC_STATUS_IB_CLOSED);
-+	status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
- 	if (status)
- 		return status;
- 	/*
-@@ -195,24 +222,25 @@ static int send_byte(u8 cmd, u16 port)
- 	 * this extra read may not happen if status returns both
- 	 * simultaneously and this would appear to be required.
- 	 */
--	status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY);
-+	status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
- 	if (status)
- 		return status;
- 
--	outb(cmd, port);
-+	outb(cmd, smc->port_base + port);
- 	return 0;
- }
- 
--/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
-+/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
- 
--static int send_command(u8 cmd)
-+static int port_send_command(struct applesmc_device *smc, u8 cmd)
- {
- 	int ret;
- 
--	ret = wait_status(0, SMC_STATUS_IB_CLOSED);
-+	ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
- 	if (ret)
- 		return ret;
--	outb(cmd, APPLESMC_CMD_PORT);
-+
-+	outb(cmd, smc->port_base + APPLESMC_CMD_PORT);
- 	return 0;
- }
- 
-@@ -222,110 +250,304 @@ static int send_command(u8 cmd)
-  * If busy is stuck high after the command then the SMC is jammed.
-  */
- 
--static int smc_sane(void)
-+static int port_smc_sane(struct applesmc_device *smc)
- {
- 	int ret;
- 
--	ret = wait_status(0, SMC_STATUS_BUSY);
-+	ret = port_wait_status(smc, 0, SMC_STATUS_BUSY);
- 	if (!ret)
- 		return ret;
--	ret = send_command(APPLESMC_READ_CMD);
-+	ret = port_send_command(smc, APPLESMC_READ_CMD);
- 	if (ret)
- 		return ret;
--	return wait_status(0, SMC_STATUS_BUSY);
-+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
- }
- 
--static int send_argument(const char *key)
-+static int port_send_argument(struct applesmc_device *smc, const char *key)
- {
- 	int i;
- 
- 	for (i = 0; i < 4; i++)
--		if (send_byte(key[i], APPLESMC_DATA_PORT))
-+		if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT))
- 			return -EIO;
- 	return 0;
- }
- 
--static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
-+static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
-+	u8 *buffer, u8 len)
- {
- 	u8 status, data = 0;
- 	int i;
- 	int ret;
- 
--	ret = smc_sane();
-+	ret = port_smc_sane(smc);
- 	if (ret)
- 		return ret;
- 
--	if (send_command(cmd) || send_argument(key)) {
-+	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
- 		pr_warn("%.4s: read arg fail\n", key);
- 		return -EIO;
- 	}
- 
- 	/* This has no effect on newer (2012) SMCs */
--	if (send_byte(len, APPLESMC_DATA_PORT)) {
-+	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
- 		pr_warn("%.4s: read len fail\n", key);
- 		return -EIO;
- 	}
- 
- 	for (i = 0; i < len; i++) {
--		if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
-+		if (port_wait_status(smc,
-+				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
- 				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) {
- 			pr_warn("%.4s: read data[%d] fail\n", key, i);
- 			return -EIO;
- 		}
--		buffer[i] = inb(APPLESMC_DATA_PORT);
-+		buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT);
- 	}
- 
- 	/* Read the data port until bit0 is cleared */
- 	for (i = 0; i < 16; i++) {
- 		udelay(APPLESMC_MIN_WAIT);
--		status = inb(APPLESMC_CMD_PORT);
-+		status = inb(smc->port_base + APPLESMC_CMD_PORT);
- 		if (!(status & SMC_STATUS_AWAITING_DATA))
- 			break;
--		data = inb(APPLESMC_DATA_PORT);
-+		data = inb(smc->port_base + APPLESMC_DATA_PORT);
- 	}
- 	if (i)
- 		pr_warn("flushed %d bytes, last value is: %d\n", i, data);
- 
--	return wait_status(0, SMC_STATUS_BUSY);
-+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
- }
- 
--static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len)
-+static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
-+	const u8 *buffer, u8 len)
- {
- 	int i;
- 	int ret;
- 
--	ret = smc_sane();
-+	ret = port_smc_sane(smc);
- 	if (ret)
- 		return ret;
- 
--	if (send_command(cmd) || send_argument(key)) {
-+	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
- 		pr_warn("%s: write arg fail\n", key);
- 		return -EIO;
- 	}
- 
--	if (send_byte(len, APPLESMC_DATA_PORT)) {
-+	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
- 		pr_warn("%.4s: write len fail\n", key);
- 		return -EIO;
- 	}
- 
- 	for (i = 0; i < len; i++) {
--		if (send_byte(buffer[i], APPLESMC_DATA_PORT)) {
-+		if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
- 			pr_warn("%s: write data fail\n", key);
- 			return -EIO;
- 		}
- 	}
- 
--	return wait_status(0, SMC_STATUS_BUSY);
-+	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
- }
- 
--static int read_register_count(unsigned int *count)
-+static int port_get_smc_key_info(struct applesmc_device *smc,
-+	const char *key, struct applesmc_entry *info)
- {
--	__be32 be;
- 	int ret;
-+	u8 raw[6];
- 
--	ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
-+	ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6);
- 	if (ret)
- 		return ret;
-+	info->len = raw[0];
-+	memcpy(info->type, &raw[1], 4);
-+	info->flags = raw[5];
-+	return 0;
-+}
-+
-+
-+/*
-+ * MMIO based communication.
-+ * TODO: Use updated mechanism for cmd timeout/retry
-+ */
-+
-+static void iomem_clear_status(struct applesmc_device *smc)
-+{
-+	if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS))
-+		iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
-+}
-+
-+static int iomem_wait_read(struct applesmc_device *smc)
-+{
-+	u8 status;
-+	int us;
-+	int i;
-+
-+	us = APPLESMC_MIN_WAIT;
-+	for (i = 0; i < 24 ; i++) {
-+		status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
-+		if (status & 0x20)
-+			return 0;
-+		usleep_range(us, us * 2);
-+		if (i > 9)
-+			us <<= 1;
-+	}
-+
-+	dev_warn(smc->ldev, "%s... timeout\n", __func__);
-+	return -EIO;
-+}
-+
-+static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
-+	u8 *buffer, u8 len)
-+{
-+	u8 err, remote_len;
-+	u32 key_int = *((u32 *) key);
-+
-+	iomem_clear_status(smc);
-+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
-+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
-+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+
-+	if (iomem_wait_read(smc))
-+		return -EIO;
-+
-+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+	if (err != 0) {
-+		dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n",
-+				cmd, key_int, key, err);
-+		return -EIO;
-+	}
-+
-+	if (cmd == APPLESMC_READ_CMD) {
-+		remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
-+		if (remote_len != len) {
-+			dev_warn(smc->ldev,
-+				 "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n",
-+				 cmd, key_int, key, remote_len, len);
-+			return -EINVAL;
-+		}
-+	} else {
-+		remote_len = len;
-+	}
-+
-+	memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA,
-+			remote_len);
-+
-+	dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n",
-+			cmd, key_int, key, len, remote_len);
-+	print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len);
-+	return 0;
-+}
-+
-+static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key,
-+	struct applesmc_entry *e)
-+{
-+	u8 err;
-+	u8 cmd = APPLESMC_GET_KEY_TYPE_CMD;
-+	u32 key_int = *((u32 *) key);
-+
-+	iomem_clear_status(smc);
-+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
-+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
-+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+
-+	if (iomem_wait_read(smc))
-+		return -EIO;
-+
-+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+	if (err != 0) {
-+		dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err);
-+		return -EIO;
-+	}
-+
-+	e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN);
-+	*((uint32_t *) e->type) = ioread32(
-+			smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE);
-+	e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS);
-+
-+	dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n",
-+		key, e->len, e->type, e->flags);
-+	return 0;
-+}
-+
-+static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
-+	const u8 *buffer, u8 len)
-+{
-+	u8 err;
-+	u32 key_int = *((u32 *) key);
-+
-+	iomem_clear_status(smc);
-+	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
-+	memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len);
-+	iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
-+	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
-+	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+
-+	if (iomem_wait_read(smc))
-+		return -EIO;
-+
-+	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
-+	if (err != 0) {
-+		dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err);
-+		print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
-+		return -EIO;
-+	}
-+
-+	dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len);
-+	print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
-+	return 0;
-+}
-+
-+
-+static int read_smc(struct applesmc_device *smc, const char *key,
-+	u8 *buffer, u8 len)
-+{
-+	if (smc->iomem_base_set)
-+		return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
-+	else
-+		return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
-+}
-+
-+static int write_smc(struct applesmc_device *smc, const char *key,
-+	const u8 *buffer, u8 len)
-+{
-+	if (smc->iomem_base_set)
-+		return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
-+	else
-+		return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
-+}
-+
-+static int get_smc_key_by_index(struct applesmc_device *smc,
-+	unsigned int index, char *key)
-+{
-+	__be32 be;
-+
-+	be = cpu_to_be32(index);
-+	if (smc->iomem_base_set)
-+		return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
-+							  (const char *) &be, (u8 *) key, 4);
-+	else
-+		return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
-+							 (const char *) &be, (u8 *) key, 4);
-+}
-+
-+static int get_smc_key_info(struct applesmc_device *smc, const char *key,
-+	struct applesmc_entry *info)
-+{
-+	if (smc->iomem_base_set)
-+		return iomem_get_smc_key_type(smc, key, info);
-+	else
-+		return port_get_smc_key_info(smc, key, info);
-+}
-+
-+static int read_register_count(struct applesmc_device *smc,
-+	unsigned int *count)
-+{
-+	__be32 be;
-+	int ret;
-+
-+	ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4);
-+	if (ret < 0)
-+		return ret;
- 
- 	*count = be32_to_cpu(be);
- 	return 0;
-@@ -338,76 +560,73 @@ static int read_register_count(unsigned int *count)
-  * All functions below are concurrency safe - callers should NOT hold lock.
-  */
- 
--static int applesmc_read_entry(const struct applesmc_entry *entry,
--			       u8 *buf, u8 len)
-+static int applesmc_read_entry(struct applesmc_device *smc,
-+	const struct applesmc_entry *entry, u8 *buf, u8 len)
- {
- 	int ret;
- 
- 	if (entry->len != len)
- 		return -EINVAL;
--	mutex_lock(&smcreg.mutex);
--	ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len);
--	mutex_unlock(&smcreg.mutex);
-+	mutex_lock(&smc->reg.mutex);
-+	ret = read_smc(smc, entry->key, buf, len);
-+	mutex_unlock(&smc->reg.mutex);
- 
- 	return ret;
- }
- 
--static int applesmc_write_entry(const struct applesmc_entry *entry,
--				const u8 *buf, u8 len)
-+static int applesmc_write_entry(struct applesmc_device *smc,
-+	const struct applesmc_entry *entry, const u8 *buf, u8 len)
- {
- 	int ret;
- 
- 	if (entry->len != len)
- 		return -EINVAL;
--	mutex_lock(&smcreg.mutex);
--	ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len);
--	mutex_unlock(&smcreg.mutex);
-+	mutex_lock(&smc->reg.mutex);
-+	ret = write_smc(smc, entry->key, buf, len);
-+	mutex_unlock(&smc->reg.mutex);
- 	return ret;
- }
- 
--static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
-+static const struct applesmc_entry *applesmc_get_entry_by_index(
-+	struct applesmc_device *smc, int index)
- {
--	struct applesmc_entry *cache = &smcreg.cache[index];
--	u8 key[4], info[6];
--	__be32 be;
-+	struct applesmc_entry *cache = &smc->reg.cache[index];
-+	char key[4];
- 	int ret = 0;
- 
- 	if (cache->valid)
- 		return cache;
- 
--	mutex_lock(&smcreg.mutex);
-+	mutex_lock(&smc->reg.mutex);
- 
- 	if (cache->valid)
- 		goto out;
--	be = cpu_to_be32(index);
--	ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
-+	ret = get_smc_key_by_index(smc, index, key);
- 	if (ret)
- 		goto out;
--	ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
-+	memcpy(cache->key, key, 4);
-+
-+	ret = get_smc_key_info(smc, key, cache);
- 	if (ret)
- 		goto out;
--
--	memcpy(cache->key, key, 4);
--	cache->len = info[0];
--	memcpy(cache->type, &info[1], 4);
--	cache->flags = info[5];
- 	cache->valid = true;
- 
- out:
--	mutex_unlock(&smcreg.mutex);
-+	mutex_unlock(&smc->reg.mutex);
- 	if (ret)
- 		return ERR_PTR(ret);
- 	return cache;
- }
- 
--static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
-+static int applesmc_get_lower_bound(struct applesmc_device *smc,
-+	unsigned int *lo, const char *key)
- {
--	int begin = 0, end = smcreg.key_count;
-+	int begin = 0, end = smc->reg.key_count;
- 	const struct applesmc_entry *entry;
- 
- 	while (begin != end) {
- 		int middle = begin + (end - begin) / 2;
--		entry = applesmc_get_entry_by_index(middle);
-+		entry = applesmc_get_entry_by_index(smc, middle);
- 		if (IS_ERR(entry)) {
- 			*lo = 0;
- 			return PTR_ERR(entry);
-@@ -422,16 +641,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
- 	return 0;
- }
- 
--static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
-+static int applesmc_get_upper_bound(struct applesmc_device *smc,
-+	unsigned int *hi, const char *key)
- {
--	int begin = 0, end = smcreg.key_count;
-+	int begin = 0, end = smc->reg.key_count;
- 	const struct applesmc_entry *entry;
- 
- 	while (begin != end) {
- 		int middle = begin + (end - begin) / 2;
--		entry = applesmc_get_entry_by_index(middle);
-+		entry = applesmc_get_entry_by_index(smc, middle);
- 		if (IS_ERR(entry)) {
--			*hi = smcreg.key_count;
-+			*hi = smc->reg.key_count;
- 			return PTR_ERR(entry);
- 		}
- 		if (strcmp(key, entry->key) < 0)
-@@ -444,50 +664,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
- 	return 0;
- }
- 
--static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key)
-+static const struct applesmc_entry *applesmc_get_entry_by_key(
-+	struct applesmc_device *smc, const char *key)
- {
- 	int begin, end;
- 	int ret;
- 
--	ret = applesmc_get_lower_bound(&begin, key);
-+	ret = applesmc_get_lower_bound(smc, &begin, key);
- 	if (ret)
- 		return ERR_PTR(ret);
--	ret = applesmc_get_upper_bound(&end, key);
-+	ret = applesmc_get_upper_bound(smc, &end, key);
- 	if (ret)
- 		return ERR_PTR(ret);
- 	if (end - begin != 1)
- 		return ERR_PTR(-EINVAL);
- 
--	return applesmc_get_entry_by_index(begin);
-+	return applesmc_get_entry_by_index(smc, begin);
- }
- 
--static int applesmc_read_key(const char *key, u8 *buffer, u8 len)
-+static int applesmc_read_key(struct applesmc_device *smc,
-+	const char *key, u8 *buffer, u8 len)
- {
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_key(key);
-+	entry = applesmc_get_entry_by_key(smc, key);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
- 
--	return applesmc_read_entry(entry, buffer, len);
-+	return applesmc_read_entry(smc, entry, buffer, len);
- }
- 
--static int applesmc_write_key(const char *key, const u8 *buffer, u8 len)
-+static int applesmc_write_key(struct applesmc_device *smc,
-+	const char *key, const u8 *buffer, u8 len)
- {
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_key(key);
-+	entry = applesmc_get_entry_by_key(smc, key);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
- 
--	return applesmc_write_entry(entry, buffer, len);
-+	return applesmc_write_entry(smc, entry, buffer, len);
- }
- 
--static int applesmc_has_key(const char *key, bool *value)
-+static int applesmc_has_key(struct applesmc_device *smc,
-+	const char *key, bool *value)
- {
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_key(key);
-+	entry = applesmc_get_entry_by_key(smc, key);
- 	if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL)
- 		return PTR_ERR(entry);
- 
-@@ -498,12 +722,13 @@ static int applesmc_has_key(const char *key, bool *value)
- /*
-  * applesmc_read_s16 - Read 16-bit signed big endian register
-  */
--static int applesmc_read_s16(const char *key, s16 *value)
-+static int applesmc_read_s16(struct applesmc_device *smc,
-+	const char *key, s16 *value)
- {
- 	u8 buffer[2];
- 	int ret;
- 
--	ret = applesmc_read_key(key, buffer, 2);
-+	ret = applesmc_read_key(smc, key, buffer, 2);
- 	if (ret)
- 		return ret;
- 
-@@ -511,31 +736,68 @@ static int applesmc_read_s16(const char *key, s16 *value)
- 	return 0;
- }
- 
-+/**
-+ * applesmc_float_to_u32 - Retrieve the integral part of a float.
-+ * This is needed because Apple made fans use float values in the T2.
-+ * The fractional point is not significantly useful though, and the integral
-+ * part can be easily extracted.
-+ */
-+static inline u32 applesmc_float_to_u32(u32 d)
-+{
-+	u8 sign = (u8) ((d >> 31) & 1);
-+	s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f;
-+	u32 fr = d & ((1u << 23) - 1);
-+
-+	if (sign || exp < 0)
-+		return 0;
-+
-+	return (u32) ((1u << exp) + (fr >> (23 - exp)));
-+}
-+
-+/**
-+ * applesmc_u32_to_float - Convert an u32 into a float.
-+ * See applesmc_float_to_u32 for a rationale.
-+ */
-+static inline u32 applesmc_u32_to_float(u32 d)
-+{
-+	u32 dc = d, bc = 0, exp;
-+
-+	if (!d)
-+		return 0;
-+
-+	while (dc >>= 1)
-+		++bc;
-+	exp = 0x7f + bc;
-+
-+	return (u32) ((exp << 23) |
-+		((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1)));
-+}
- /*
-  * applesmc_device_init - initialize the accelerometer.  Can sleep.
-  */
--static void applesmc_device_init(void)
-+static void applesmc_device_init(struct applesmc_device *smc)
- {
- 	int total;
- 	u8 buffer[2];
- 
--	if (!smcreg.has_accelerometer)
-+	if (!smc->reg.has_accelerometer)
- 		return;
- 
- 	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
--		if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) &&
-+		if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) &&
- 				(buffer[0] != 0x00 || buffer[1] != 0x00))
- 			return;
- 		buffer[0] = 0xe0;
- 		buffer[1] = 0x00;
--		applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2);
-+		applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2);
- 		msleep(INIT_WAIT_MSECS);
- 	}
- 
- 	pr_warn("failed to init the device\n");
- }
- 
--static int applesmc_init_index(struct applesmc_registers *s)
-+static int applesmc_init_index(struct applesmc_device *smc,
-+	struct applesmc_registers *s)
- {
- 	const struct applesmc_entry *entry;
- 	unsigned int i;
-@@ -548,7 +810,7 @@ static int applesmc_init_index(struct applesmc_registers *s)
- 		return -ENOMEM;
- 
- 	for (i = s->temp_begin; i < s->temp_end; i++) {
--		entry = applesmc_get_entry_by_index(i);
-+		entry = applesmc_get_entry_by_index(smc, i);
- 		if (IS_ERR(entry))
- 			continue;
- 		if (strcmp(entry->type, TEMP_SENSOR_TYPE))
-@@ -562,9 +824,9 @@ static int applesmc_init_index(struct applesmc_registers *s)
- /*
-  * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent.
-  */
--static int applesmc_init_smcreg_try(void)
-+static int applesmc_init_smcreg_try(struct applesmc_device *smc)
- {
--	struct applesmc_registers *s = &smcreg;
-+	struct applesmc_registers *s = &smc->reg;
- 	bool left_light_sensor = false, right_light_sensor = false;
- 	unsigned int count;
- 	u8 tmp[1];
-@@ -573,7 +835,7 @@ static int applesmc_init_smcreg_try(void)
- 	if (s->init_complete)
- 		return 0;
- 
--	ret = read_register_count(&count);
-+	ret = read_register_count(smc, &count);
- 	if (ret)
- 		return ret;
- 
-@@ -590,35 +852,35 @@ static int applesmc_init_smcreg_try(void)
- 	if (!s->cache)
- 		return -ENOMEM;
- 
--	ret = applesmc_read_key(FANS_COUNT, tmp, 1);
-+	ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1);
- 	if (ret)
- 		return ret;
- 	s->fan_count = tmp[0];
- 	if (s->fan_count > 10)
- 		s->fan_count = 10;
- 
--	ret = applesmc_get_lower_bound(&s->temp_begin, "T");
-+	ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T");
- 	if (ret)
- 		return ret;
--	ret = applesmc_get_lower_bound(&s->temp_end, "U");
-+	ret = applesmc_get_lower_bound(smc, &s->temp_end, "U");
- 	if (ret)
- 		return ret;
- 	s->temp_count = s->temp_end - s->temp_begin;
- 
--	ret = applesmc_init_index(s);
-+	ret = applesmc_init_index(smc, s);
- 	if (ret)
- 		return ret;
- 
--	ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
-+	ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
- 	if (ret)
- 		return ret;
--	ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
-+	ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
- 	if (ret)
- 		return ret;
--	ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer);
-+	ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer);
- 	if (ret)
- 		return ret;
--	ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight);
-+	ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight);
- 	if (ret)
- 		return ret;
- 
-@@ -634,13 +896,13 @@ static int applesmc_init_smcreg_try(void)
- 	return 0;
- }
- 
--static void applesmc_destroy_smcreg(void)
-+static void applesmc_destroy_smcreg(struct applesmc_device *smc)
- {
--	kfree(smcreg.index);
--	smcreg.index = NULL;
--	kfree(smcreg.cache);
--	smcreg.cache = NULL;
--	smcreg.init_complete = false;
-+	kfree(smc->reg.index);
-+	smc->reg.index = NULL;
-+	kfree(smc->reg.cache);
-+	smc->reg.cache = NULL;
-+	smc->reg.init_complete = false;
- }
- 
- /*
-@@ -649,12 +911,12 @@ static void applesmc_destroy_smcreg(void)
-  * Retries until initialization is successful, or the operation times out.
-  *
-  */
--static int applesmc_init_smcreg(void)
-+static int applesmc_init_smcreg(struct applesmc_device *smc)
- {
- 	int ms, ret;
- 
- 	for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) {
--		ret = applesmc_init_smcreg_try();
-+		ret = applesmc_init_smcreg_try(smc);
- 		if (!ret) {
- 			if (ms)
- 				pr_info("init_smcreg() took %d ms\n", ms);
-@@ -663,50 +925,223 @@ static int applesmc_init_smcreg(void)
- 		msleep(INIT_WAIT_MSECS);
- 	}
- 
--	applesmc_destroy_smcreg();
-+	applesmc_destroy_smcreg(smc);
- 
- 	return ret;
- }
- 
- /* Device model stuff */
--static int applesmc_probe(struct platform_device *dev)
-+
-+static int applesmc_init_resources(struct applesmc_device *smc);
-+static void applesmc_free_resources(struct applesmc_device *smc);
-+static int applesmc_create_modules(struct applesmc_device *smc);
-+static void applesmc_destroy_modules(struct applesmc_device *smc);
-+
-+static int applesmc_add(struct acpi_device *dev)
- {
-+	struct applesmc_device *smc;
- 	int ret;
- 
--	ret = applesmc_init_smcreg();
-+	smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL);
-+	if (!smc)
-+		return -ENOMEM;
-+	smc->dev = dev;
-+	smc->ldev = &dev->dev;
-+	mutex_init(&smc->reg.mutex);
-+
-+	dev_set_drvdata(&dev->dev, smc);
-+
-+	ret = applesmc_init_resources(smc);
- 	if (ret)
--		return ret;
-+		goto out_mem;
-+
-+	ret = applesmc_init_smcreg(smc);
-+	if (ret)
-+		goto out_res;
-+
-+	applesmc_device_init(smc);
-+
-+	ret = applesmc_create_modules(smc);
-+	if (ret)
-+		goto out_reg;
-+
-+	return 0;
-+
-+out_reg:
-+	applesmc_destroy_smcreg(smc);
-+out_res:
-+	applesmc_free_resources(smc);
-+out_mem:
-+	dev_set_drvdata(&dev->dev, NULL);
-+	mutex_destroy(&smc->reg.mutex);
-+	kfree(smc);
-+
-+	return ret;
-+}
-+
-+static void applesmc_remove(struct acpi_device *dev)
-+{
-+	struct applesmc_device *smc = dev_get_drvdata(&dev->dev);
-+
-+	applesmc_destroy_modules(smc);
-+	applesmc_destroy_smcreg(smc);
-+	applesmc_free_resources(smc);
- 
--	applesmc_device_init();
-+	mutex_destroy(&smc->reg.mutex);
-+	kfree(smc);
-+
-+	return;
-+}
-+
-+static acpi_status applesmc_walk_resources(struct acpi_resource *res,
-+	void *data)
-+{
-+	struct applesmc_device *smc = data;
-+
-+	switch (res->type) {
-+	case ACPI_RESOURCE_TYPE_IO:
-+		if (!smc->port_base_set) {
-+			if (res->data.io.address_length < APPLESMC_NR_PORTS)
-+				return AE_ERROR;
-+			smc->port_base = res->data.io.minimum;
-+			smc->port_base_set = true;
-+		}
-+		return AE_OK;
-+
-+	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
-+		if (!smc->iomem_base_set) {
-+			if (res->data.fixed_memory32.address_length <
-+					APPLESMC_IOMEM_MIN_SIZE) {
-+				dev_warn(smc->ldev, "found iomem but it's too small: %u\n",
-+						 res->data.fixed_memory32.address_length);
-+				return AE_OK;
-+			}
-+			smc->iomem_base_addr = res->data.fixed_memory32.address;
-+			smc->iomem_base_size = res->data.fixed_memory32.address_length;
-+			smc->iomem_base_set = true;
-+		}
-+		return AE_OK;
-+
-+	case ACPI_RESOURCE_TYPE_END_TAG:
-+		if (smc->port_base_set)
-+			return AE_OK;
-+		else
-+			return AE_NOT_FOUND;
-+
-+	default:
-+		return AE_OK;
-+	}
-+}
-+
-+static int applesmc_try_enable_iomem(struct applesmc_device *smc);
-+
-+static int applesmc_init_resources(struct applesmc_device *smc)
-+{
-+	int ret;
-+
-+	ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS,
-+			applesmc_walk_resources, smc);
-+	if (ACPI_FAILURE(ret))
-+		return -ENXIO;
-+
-+	if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc"))
-+		return -ENXIO;
-+
-+	if (smc->iomem_base_set) {
-+		if (applesmc_try_enable_iomem(smc))
-+			smc->iomem_base_set = false;
-+	}
-+
-+	return 0;
-+}
-+
-+static int applesmc_try_enable_iomem(struct applesmc_device *smc)
-+{
-+	u8 test_val, ldkn_version;
-+
-+	dev_dbg(smc->ldev, "Trying to enable iomem based communication\n");
-+	smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size);
-+	if (!smc->iomem_base)
-+		goto out;
-+
-+	/* Apple's driver does this check for some reason */
-+	test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
-+	if (test_val == 0xff) {
-+		dev_warn(smc->ldev,
-+			 "iomem enable failed: initial status is 0xff (is %x)\n",
-+			 test_val);
-+		goto out_iomem;
-+	}
-+
-+	if (read_smc(smc, "LDKN", &ldkn_version, 1)) {
-+		dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n");
-+		goto out_iomem;
-+	}
-+
-+	if (ldkn_version < 2) {
-+		dev_warn(smc->ldev,
-+			 "iomem enable failed: ldkn version %u is less than minimum (2)\n",
-+			 ldkn_version);
-+		goto out_iomem;
-+	}
- 
- 	return 0;
-+
-+out_iomem:
-+	iounmap(smc->iomem_base);
-+
-+out:
-+	return -ENXIO;
-+}
-+
-+static void applesmc_free_resources(struct applesmc_device *smc)
-+{
-+	if (smc->iomem_base_set)
-+		iounmap(smc->iomem_base);
-+	release_region(smc->port_base, APPLESMC_NR_PORTS);
- }
- 
- /* Synchronize device with memorized backlight state */
- static int applesmc_pm_resume(struct device *dev)
- {
--	if (smcreg.has_key_backlight)
--		applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+
-+	if (smc->reg.has_key_backlight)
-+		applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
-+
- 	return 0;
- }
- 
- /* Reinitialize device on resume from hibernation */
- static int applesmc_pm_restore(struct device *dev)
- {
--	applesmc_device_init();
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+
-+	applesmc_device_init(smc);
-+
- 	return applesmc_pm_resume(dev);
- }
- 
-+static const struct acpi_device_id applesmc_ids[] = {
-+	{"APP0001", 0},
-+	{"", 0},
-+};
-+
- static const struct dev_pm_ops applesmc_pm_ops = {
- 	.resume = applesmc_pm_resume,
- 	.restore = applesmc_pm_restore,
- };
- 
--static struct platform_driver applesmc_driver = {
--	.probe = applesmc_probe,
--	.driver	= {
--		.name = "applesmc",
--		.pm = &applesmc_pm_ops,
-+static struct acpi_driver applesmc_driver = {
-+	.name = "applesmc",
-+	.class = "applesmc",
-+	.ids = applesmc_ids,
-+	.ops = {
-+		.add = applesmc_add,
-+		.remove = applesmc_remove
-+	},
-+	.drv = {
-+		.pm = &applesmc_pm_ops
- 	},
- };
- 
-@@ -714,25 +1149,26 @@ static struct platform_driver applesmc_driver = {
-  * applesmc_calibrate - Set our "resting" values.  Callers must
-  * hold applesmc_lock.
-  */
--static void applesmc_calibrate(void)
-+static void applesmc_calibrate(struct applesmc_device *smc)
- {
--	applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x);
--	applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y);
--	rest_x = -rest_x;
-+	applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x);
-+	applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y);
-+	smc->rest_x = -smc->rest_x;
- }
- 
- static void applesmc_idev_poll(struct input_dev *idev)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(&idev->dev);
- 	s16 x, y;
- 
--	if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x))
-+	if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x))
- 		return;
--	if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y))
-+	if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y))
- 		return;
- 
- 	x = -x;
--	input_report_abs(idev, ABS_X, x - rest_x);
--	input_report_abs(idev, ABS_Y, y - rest_y);
-+	input_report_abs(idev, ABS_X, x - smc->rest_x);
-+	input_report_abs(idev, ABS_Y, y - smc->rest_y);
- 	input_sync(idev);
- }
- 
-@@ -747,16 +1183,17 @@ static ssize_t applesmc_name_show(struct device *dev,
- static ssize_t applesmc_position_show(struct device *dev,
- 				   struct device_attribute *attr, char *buf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	int ret;
- 	s16 x, y, z;
- 
--	ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x);
-+	ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x);
- 	if (ret)
- 		goto out;
--	ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y);
-+	ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y);
- 	if (ret)
- 		goto out;
--	ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z);
-+	ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z);
- 	if (ret)
- 		goto out;
- 
-@@ -770,6 +1207,7 @@ static ssize_t applesmc_position_show(struct device *dev,
- static ssize_t applesmc_light_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	const struct applesmc_entry *entry;
- 	static int data_length;
- 	int ret;
-@@ -777,7 +1215,7 @@ static ssize_t applesmc_light_show(struct device *dev,
- 	u8 buffer[10];
- 
- 	if (!data_length) {
--		entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY);
-+		entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY);
- 		if (IS_ERR(entry))
- 			return PTR_ERR(entry);
- 		if (entry->len > 10)
-@@ -786,7 +1224,7 @@ static ssize_t applesmc_light_show(struct device *dev,
- 		pr_info("light sensor data length set to %d\n", data_length);
- 	}
- 
--	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
-+	ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
- 	if (ret)
- 		goto out;
- 	/* newer macbooks report a single 10-bit bigendian value */
-@@ -796,7 +1234,7 @@ static ssize_t applesmc_light_show(struct device *dev,
- 	}
- 	left = buffer[2];
- 
--	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
-+	ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
- 	if (ret)
- 		goto out;
- 	right = buffer[2];
-@@ -812,7 +1250,8 @@ static ssize_t applesmc_light_show(struct device *dev,
- static ssize_t applesmc_show_sensor_label(struct device *dev,
- 			struct device_attribute *devattr, char *sysfsbuf)
- {
--	const char *key = smcreg.index[to_index(devattr)];
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	const char *key = smc->reg.index[to_index(devattr)];
- 
- 	return sysfs_emit(sysfsbuf, "%s\n", key);
- }
-@@ -821,12 +1260,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev,
- static ssize_t applesmc_show_temperature(struct device *dev,
- 			struct device_attribute *devattr, char *sysfsbuf)
- {
--	const char *key = smcreg.index[to_index(devattr)];
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	const char *key = smc->reg.index[to_index(devattr)];
- 	int ret;
- 	s16 value;
- 	int temp;
- 
--	ret = applesmc_read_s16(key, &value);
-+	ret = applesmc_read_s16(smc, key, &value);
- 	if (ret)
- 		return ret;
- 
-@@ -838,6 +1278,8 @@ static ssize_t applesmc_show_temperature(struct device *dev,
- static ssize_t applesmc_show_fan_speed(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	const struct applesmc_entry *entry;
- 	int ret;
- 	unsigned int speed = 0;
- 	char newkey[5];
-@@ -846,11 +1288,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
- 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
- 		  to_index(attr));
- 
--	ret = applesmc_read_key(newkey, buffer, 2);
-+	entry = applesmc_get_entry_by_key(smc, newkey);
-+	if (IS_ERR(entry))
-+		return PTR_ERR(entry);
-+
-+	if (!strcmp(entry->type, FLOAT_TYPE)) {
-+		ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4);
-+		speed = applesmc_float_to_u32(speed);
-+	} else {
-+		ret = applesmc_read_entry(smc, entry, buffer, 2);
-+		speed = ((buffer[0] << 8 | buffer[1]) >> 2);
-+	}
-+
- 	if (ret)
- 		return ret;
- 
--	speed = ((buffer[0] << 8 | buffer[1]) >> 2);
- 	return sysfs_emit(sysfsbuf, "%u\n", speed);
- }
- 
-@@ -858,6 +1310,8 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
- 					struct device_attribute *attr,
- 					const char *sysfsbuf, size_t count)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	const struct applesmc_entry *entry;
- 	int ret;
- 	unsigned long speed;
- 	char newkey[5];
-@@ -869,9 +1323,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
- 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
- 		  to_index(attr));
- 
--	buffer[0] = (speed >> 6) & 0xff;
--	buffer[1] = (speed << 2) & 0xff;
--	ret = applesmc_write_key(newkey, buffer, 2);
-+	entry = applesmc_get_entry_by_key(smc, newkey);
-+	if (IS_ERR(entry))
-+		return PTR_ERR(entry);
-+
-+	if (!strcmp(entry->type, FLOAT_TYPE)) {
-+		speed = applesmc_u32_to_float(speed);
-+		ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4);
-+	} else {
-+		buffer[0] = (speed >> 6) & 0xff;
-+		buffer[1] = (speed << 2) & 0xff;
-+		ret = applesmc_write_key(smc, newkey, buffer, 2);
-+	}
- 
- 	if (ret)
- 		return ret;
-@@ -882,15 +1345,30 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
- static ssize_t applesmc_show_fan_manual(struct device *dev,
- 			struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	int ret;
- 	u16 manual = 0;
- 	u8 buffer[2];
-+	char newkey[5];
-+	bool has_newkey = false;
-+
-+	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
-+
-+	ret = applesmc_has_key(smc, newkey, &has_newkey);
-+	if (ret)
-+		return ret;
-+
-+	if (has_newkey) {
-+		ret = applesmc_read_key(smc, newkey, buffer, 1);
-+		manual = buffer[0];
-+	} else {
-+		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
-+		manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
-+	}
- 
--	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
- 	if (ret)
- 		return ret;
- 
--	manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
- 	return sysfs_emit(sysfsbuf, "%d\n", manual);
- }
- 
-@@ -898,29 +1376,42 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
- 					 struct device_attribute *attr,
- 					 const char *sysfsbuf, size_t count)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	int ret;
- 	u8 buffer[2];
-+	char newkey[5];
-+	bool has_newkey = false;
- 	unsigned long input;
- 	u16 val;
- 
- 	if (kstrtoul(sysfsbuf, 10, &input) < 0)
- 		return -EINVAL;
- 
--	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
-+	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
-+
-+	ret = applesmc_has_key(smc, newkey, &has_newkey);
- 	if (ret)
--		goto out;
-+		return ret;
- 
--	val = (buffer[0] << 8 | buffer[1]);
-+	if (has_newkey) {
-+		buffer[0] = input & 1;
-+		ret = applesmc_write_key(smc, newkey, buffer, 1);
-+	} else {
-+		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
-+		val = (buffer[0] << 8 | buffer[1]);
-+		if (ret)
-+			goto out;
- 
--	if (input)
--		val = val | (0x01 << to_index(attr));
--	else
--		val = val & ~(0x01 << to_index(attr));
-+		if (input)
-+			val = val | (0x01 << to_index(attr));
-+		else
-+			val = val & ~(0x01 << to_index(attr));
- 
--	buffer[0] = (val >> 8) & 0xFF;
--	buffer[1] = val & 0xFF;
-+		buffer[0] = (val >> 8) & 0xFF;
-+		buffer[1] = val & 0xFF;
- 
--	ret = applesmc_write_key(FANS_MANUAL, buffer, 2);
-+		ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
-+	}
- 
- out:
- 	if (ret)
-@@ -932,13 +1423,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
- static ssize_t applesmc_show_fan_position(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	int ret;
- 	char newkey[5];
- 	u8 buffer[17];
- 
- 	scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr));
- 
--	ret = applesmc_read_key(newkey, buffer, 16);
-+	ret = applesmc_read_key(smc, newkey, buffer, 16);
- 	buffer[16] = 0;
- 
- 	if (ret)
-@@ -950,43 +1442,79 @@ static ssize_t applesmc_show_fan_position(struct device *dev,
- static ssize_t applesmc_calibrate_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
--	return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y);
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y);
- }
- 
- static ssize_t applesmc_calibrate_store(struct device *dev,
- 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
- {
--	applesmc_calibrate();
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+
-+	applesmc_calibrate(smc);
- 
- 	return count;
- }
- 
- static void applesmc_backlight_set(struct work_struct *work)
- {
--	applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
-+	struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work);
-+
-+	applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
- }
--static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
- 
- static void applesmc_brightness_set(struct led_classdev *led_cdev,
- 						enum led_brightness value)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev);
- 	int ret;
- 
--	backlight_state[0] = value;
--	ret = queue_work(applesmc_led_wq, &backlight_work);
-+	smc->backlight_state[0] = value;
-+	ret = queue_work(smc->backlight_wq, &smc->backlight_work);
- 
- 	if (debug && (!ret))
- 		dev_dbg(led_cdev->dev, "work was already on the queue.\n");
- }
- 
-+static ssize_t applesmc_BCLM_store(struct device *dev,
-+		struct device_attribute *attr, char *sysfsbuf, size_t count)
-+{
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	u8 val;
-+
-+	if (kstrtou8(sysfsbuf, 10, &val) < 0)
-+		return -EINVAL;
-+
-+	if (val < 0 || val > 100)
-+		return -EINVAL;
-+
-+	if (applesmc_write_key(smc, "BCLM", &val, 1))
-+		return -ENODEV;
-+	return count;
-+}
-+
-+static ssize_t applesmc_BCLM_show(struct device *dev,
-+		struct device_attribute *attr, char *sysfsbuf)
-+{
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+	u8 val;
-+
-+	if (applesmc_read_key(smc, "BCLM", &val, 1))
-+		return -ENODEV;
-+
-+	return sysfs_emit(sysfsbuf, "%d\n", val);
-+}
-+
- static ssize_t applesmc_key_count_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	int ret;
- 	u8 buffer[4];
- 	u32 count;
- 
--	ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4);
-+	ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4);
- 	if (ret)
- 		return ret;
- 
-@@ -998,13 +1526,14 @@ static ssize_t applesmc_key_count_show(struct device *dev,
- static ssize_t applesmc_key_at_index_read_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	const struct applesmc_entry *entry;
- 	int ret;
- 
--	entry = applesmc_get_entry_by_index(key_at_index);
-+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
--	ret = applesmc_read_entry(entry, sysfsbuf, entry->len);
-+	ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len);
- 	if (ret)
- 		return ret;
- 
-@@ -1014,9 +1543,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
- static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_index(key_at_index);
-+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
- 
-@@ -1026,9 +1556,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
- static ssize_t applesmc_key_at_index_type_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_index(key_at_index);
-+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
- 
-@@ -1038,9 +1569,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
- static ssize_t applesmc_key_at_index_name_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	const struct applesmc_entry *entry;
- 
--	entry = applesmc_get_entry_by_index(key_at_index);
-+	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
- 	if (IS_ERR(entry))
- 		return PTR_ERR(entry);
- 
-@@ -1050,28 +1582,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
- static ssize_t applesmc_key_at_index_show(struct device *dev,
- 				struct device_attribute *attr, char *sysfsbuf)
- {
--	return sysfs_emit(sysfsbuf, "%d\n", key_at_index);
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
-+
-+	return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index);
- }
- 
- static ssize_t applesmc_key_at_index_store(struct device *dev,
- 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
- {
-+	struct applesmc_device *smc = dev_get_drvdata(dev);
- 	unsigned long newkey;
- 
- 	if (kstrtoul(sysfsbuf, 10, &newkey) < 0
--	    || newkey >= smcreg.key_count)
-+	    || newkey >= smc->reg.key_count)
- 		return -EINVAL;
- 
--	key_at_index = newkey;
-+	smc->key_at_index = newkey;
- 	return count;
- }
- 
--static struct led_classdev applesmc_backlight = {
--	.name			= "smc::kbd_backlight",
--	.default_trigger	= "nand-disk",
--	.brightness_set		= applesmc_brightness_set,
--};
--
- static struct applesmc_node_group info_group[] = {
- 	{ "name", applesmc_name_show },
- 	{ "key_count", applesmc_key_count_show },
-@@ -1111,19 +1640,25 @@ static struct applesmc_node_group temp_group[] = {
- 	{ }
- };
- 
-+static struct applesmc_node_group BCLM_group[] = {
-+	{ "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store },
-+	{ }
-+};
-+
- /* Module stuff */
- 
- /*
-  * applesmc_destroy_nodes - remove files and free associated memory
-  */
--static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
-+static void applesmc_destroy_nodes(struct applesmc_device *smc,
-+	struct applesmc_node_group *groups)
- {
- 	struct applesmc_node_group *grp;
- 	struct applesmc_dev_attr *node;
- 
- 	for (grp = groups; grp->nodes; grp++) {
- 		for (node = grp->nodes; node->sda.dev_attr.attr.name; node++)
--			sysfs_remove_file(&pdev->dev.kobj,
-+			sysfs_remove_file(&smc->dev->dev.kobj,
- 					  &node->sda.dev_attr.attr);
- 		kfree(grp->nodes);
- 		grp->nodes = NULL;
-@@ -1133,7 +1668,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
- /*
-  * applesmc_create_nodes - create a two-dimensional group of sysfs files
-  */
--static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
-+static int applesmc_create_nodes(struct applesmc_device *smc,
-+	struct applesmc_node_group *groups, int num)
- {
- 	struct applesmc_node_group *grp;
- 	struct applesmc_dev_attr *node;
-@@ -1157,7 +1693,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
- 			sysfs_attr_init(attr);
- 			attr->name = node->name;
- 			attr->mode = 0444 | (grp->store ? 0200 : 0);
--			ret = sysfs_create_file(&pdev->dev.kobj, attr);
-+			ret = sysfs_create_file(&smc->dev->dev.kobj, attr);
- 			if (ret) {
- 				attr->name = NULL;
- 				goto out;
-@@ -1167,57 +1703,56 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
- 
- 	return 0;
- out:
--	applesmc_destroy_nodes(groups);
-+	applesmc_destroy_nodes(smc, groups);
- 	return ret;
- }
- 
- /* Create accelerometer resources */
--static int applesmc_create_accelerometer(void)
-+static int applesmc_create_accelerometer(struct applesmc_device *smc)
- {
- 	int ret;
--
--	if (!smcreg.has_accelerometer)
-+	if (!smc->reg.has_accelerometer)
- 		return 0;
- 
--	ret = applesmc_create_nodes(accelerometer_group, 1);
-+	ret = applesmc_create_nodes(smc, accelerometer_group, 1);
- 	if (ret)
- 		goto out;
- 
--	applesmc_idev = input_allocate_device();
--	if (!applesmc_idev) {
-+	smc->idev = input_allocate_device();
-+	if (!smc->idev) {
- 		ret = -ENOMEM;
- 		goto out_sysfs;
- 	}
- 
- 	/* initial calibrate for the input device */
--	applesmc_calibrate();
-+	applesmc_calibrate(smc);
- 
- 	/* initialize the input device */
--	applesmc_idev->name = "applesmc";
--	applesmc_idev->id.bustype = BUS_HOST;
--	applesmc_idev->dev.parent = &pdev->dev;
--	input_set_abs_params(applesmc_idev, ABS_X,
-+	smc->idev->name = "applesmc";
-+	smc->idev->id.bustype = BUS_HOST;
-+	smc->idev->dev.parent = &smc->dev->dev;
-+	input_set_abs_params(smc->idev, ABS_X,
- 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
--	input_set_abs_params(applesmc_idev, ABS_Y,
-+	input_set_abs_params(smc->idev, ABS_Y,
- 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
- 
--	ret = input_setup_polling(applesmc_idev, applesmc_idev_poll);
-+	ret = input_setup_polling(smc->idev, applesmc_idev_poll);
- 	if (ret)
- 		goto out_idev;
- 
--	input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL);
-+	input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL);
- 
--	ret = input_register_device(applesmc_idev);
-+	ret = input_register_device(smc->idev);
- 	if (ret)
- 		goto out_idev;
- 
- 	return 0;
- 
- out_idev:
--	input_free_device(applesmc_idev);
-+	input_free_device(smc->idev);
- 
- out_sysfs:
--	applesmc_destroy_nodes(accelerometer_group);
-+	applesmc_destroy_nodes(smc, accelerometer_group);
- 
- out:
- 	pr_warn("driver init failed (ret=%d)!\n", ret);
-@@ -1225,44 +1760,55 @@ static int applesmc_create_accelerometer(void)
- }
- 
- /* Release all resources used by the accelerometer */
--static void applesmc_release_accelerometer(void)
-+static void applesmc_release_accelerometer(struct applesmc_device *smc)
- {
--	if (!smcreg.has_accelerometer)
-+	if (!smc->reg.has_accelerometer)
- 		return;
--	input_unregister_device(applesmc_idev);
--	applesmc_destroy_nodes(accelerometer_group);
-+	input_unregister_device(smc->idev);
-+	applesmc_destroy_nodes(smc, accelerometer_group);
- }
- 
--static int applesmc_create_light_sensor(void)
-+static int applesmc_create_light_sensor(struct applesmc_device *smc)
- {
--	if (!smcreg.num_light_sensors)
-+	if (!smc->reg.num_light_sensors)
- 		return 0;
--	return applesmc_create_nodes(light_sensor_group, 1);
-+	return applesmc_create_nodes(smc, light_sensor_group, 1);
- }
- 
--static void applesmc_release_light_sensor(void)
-+static void applesmc_release_light_sensor(struct applesmc_device *smc)
- {
--	if (!smcreg.num_light_sensors)
-+	if (!smc->reg.num_light_sensors)
- 		return;
--	applesmc_destroy_nodes(light_sensor_group);
-+	applesmc_destroy_nodes(smc, light_sensor_group);
- }
- 
--static int applesmc_create_key_backlight(void)
-+static int applesmc_create_key_backlight(struct applesmc_device *smc)
- {
--	if (!smcreg.has_key_backlight)
-+	int ret;
-+
-+	if (!smc->reg.has_key_backlight)
- 		return 0;
--	applesmc_led_wq = create_singlethread_workqueue("applesmc-led");
--	if (!applesmc_led_wq)
-+	smc->backlight_wq = create_singlethread_workqueue("applesmc-led");
-+	if (!smc->backlight_wq)
- 		return -ENOMEM;
--	return led_classdev_register(&pdev->dev, &applesmc_backlight);
-+
-+	INIT_WORK(&smc->backlight_work, applesmc_backlight_set);
-+	smc->backlight_dev.name = "smc::kbd_backlight";
-+	smc->backlight_dev.default_trigger = "nand-disk";
-+	smc->backlight_dev.brightness_set = applesmc_brightness_set;
-+	ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev);
-+	if (ret)
-+		destroy_workqueue(smc->backlight_wq);
-+
-+	return ret;
- }
- 
--static void applesmc_release_key_backlight(void)
-+static void applesmc_release_key_backlight(struct applesmc_device *smc)
- {
--	if (!smcreg.has_key_backlight)
-+	if (!smc->reg.has_key_backlight)
- 		return;
--	led_classdev_unregister(&applesmc_backlight);
--	destroy_workqueue(applesmc_led_wq);
-+	led_classdev_unregister(&smc->backlight_dev);
-+	destroy_workqueue(smc->backlight_wq);
- }
- 
- static int applesmc_dmi_match(const struct dmi_system_id *id)
-@@ -1291,6 +1837,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
- 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
- 	  DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") },
- 	},
-+	{ applesmc_dmi_match, "Apple iMacPro", {
-+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
-+	  DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") },
-+	},
- 	{ applesmc_dmi_match, "Apple MacPro", {
- 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
- 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") },
-@@ -1306,90 +1856,91 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
- 	{ .ident = NULL }
- };
- 
--static int __init applesmc_init(void)
-+static int applesmc_create_modules(struct applesmc_device *smc)
- {
- 	int ret;
- 
--	if (!dmi_check_system(applesmc_whitelist)) {
--		pr_warn("supported laptop not found!\n");
--		ret = -ENODEV;
--		goto out;
--	}
--
--	if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
--								"applesmc")) {
--		ret = -ENXIO;
--		goto out;
--	}
--
--	ret = platform_driver_register(&applesmc_driver);
--	if (ret)
--		goto out_region;
--
--	pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
--					       NULL, 0);
--	if (IS_ERR(pdev)) {
--		ret = PTR_ERR(pdev);
--		goto out_driver;
--	}
--
--	/* create register cache */
--	ret = applesmc_init_smcreg();
-+	ret = applesmc_create_nodes(smc, info_group, 1);
- 	if (ret)
--		goto out_device;
--
--	ret = applesmc_create_nodes(info_group, 1);
-+		goto out;
-+	ret = applesmc_create_nodes(smc, BCLM_group, 1);
- 	if (ret)
--		goto out_smcreg;
-+		goto out_info;
- 
--	ret = applesmc_create_nodes(fan_group, smcreg.fan_count);
-+	ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count);
- 	if (ret)
--		goto out_info;
-+		goto out_bclm;
- 
--	ret = applesmc_create_nodes(temp_group, smcreg.index_count);
-+	ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count);
- 	if (ret)
- 		goto out_fans;
- 
--	ret = applesmc_create_accelerometer();
-+	ret = applesmc_create_accelerometer(smc);
- 	if (ret)
- 		goto out_temperature;
- 
--	ret = applesmc_create_light_sensor();
-+	ret = applesmc_create_light_sensor(smc);
- 	if (ret)
- 		goto out_accelerometer;
- 
--	ret = applesmc_create_key_backlight();
-+	ret = applesmc_create_key_backlight(smc);
- 	if (ret)
- 		goto out_light_sysfs;
- 
--	hwmon_dev = hwmon_device_register(&pdev->dev);
--	if (IS_ERR(hwmon_dev)) {
--		ret = PTR_ERR(hwmon_dev);
-+	smc->hwmon_dev = hwmon_device_register(&smc->dev->dev);
-+	if (IS_ERR(smc->hwmon_dev)) {
-+		ret = PTR_ERR(smc->hwmon_dev);
- 		goto out_light_ledclass;
- 	}
- 
- 	return 0;
- 
- out_light_ledclass:
--	applesmc_release_key_backlight();
-+	applesmc_release_key_backlight(smc);
- out_light_sysfs:
--	applesmc_release_light_sensor();
-+	applesmc_release_light_sensor(smc);
- out_accelerometer:
--	applesmc_release_accelerometer();
-+	applesmc_release_accelerometer(smc);
- out_temperature:
--	applesmc_destroy_nodes(temp_group);
-+	applesmc_destroy_nodes(smc, temp_group);
- out_fans:
--	applesmc_destroy_nodes(fan_group);
-+	applesmc_destroy_nodes(smc, fan_group);
-+out_bclm:
-+	applesmc_destroy_nodes(smc, BCLM_group);
- out_info:
--	applesmc_destroy_nodes(info_group);
--out_smcreg:
--	applesmc_destroy_smcreg();
--out_device:
--	platform_device_unregister(pdev);
--out_driver:
--	platform_driver_unregister(&applesmc_driver);
--out_region:
--	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
-+	applesmc_destroy_nodes(smc, info_group);
-+out:
-+	return ret;
-+}
-+
-+static void applesmc_destroy_modules(struct applesmc_device *smc)
-+{
-+	hwmon_device_unregister(smc->hwmon_dev);
-+	applesmc_release_key_backlight(smc);
-+	applesmc_release_light_sensor(smc);
-+	applesmc_release_accelerometer(smc);
-+	applesmc_destroy_nodes(smc, temp_group);
-+	applesmc_destroy_nodes(smc, fan_group);
-+	applesmc_destroy_nodes(smc, BCLM_group);
-+	applesmc_destroy_nodes(smc, info_group);
-+}
-+
-+static int __init applesmc_init(void)
-+{
-+	int ret;
-+
-+	if (!dmi_check_system(applesmc_whitelist)) {
-+		pr_warn("supported laptop not found!\n");
-+		ret = -ENODEV;
-+		goto out;
-+	}
-+
-+	ret = acpi_bus_register_driver(&applesmc_driver);
-+	if (ret)
-+		goto out;
-+
-+	return 0;
-+
- out:
- 	pr_warn("driver init failed (ret=%d)!\n", ret);
- 	return ret;
-@@ -1397,23 +1948,14 @@ static int __init applesmc_init(void)
- 
- static void __exit applesmc_exit(void)
- {
--	hwmon_device_unregister(hwmon_dev);
--	applesmc_release_key_backlight();
--	applesmc_release_light_sensor();
--	applesmc_release_accelerometer();
--	applesmc_destroy_nodes(temp_group);
--	applesmc_destroy_nodes(fan_group);
--	applesmc_destroy_nodes(info_group);
--	applesmc_destroy_smcreg();
--	platform_device_unregister(pdev);
--	platform_driver_unregister(&applesmc_driver);
--	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
-+	acpi_bus_unregister_driver(&applesmc_driver);
- }
- 
- module_init(applesmc_init);
- module_exit(applesmc_exit);
- 
- MODULE_AUTHOR("Nicolas Boichat");
-+MODULE_AUTHOR("Paul Pawlowski");
- MODULE_DESCRIPTION("Apple SMC");
- MODULE_LICENSE("GPL v2");
- MODULE_DEVICE_TABLE(dmi, applesmc_whitelist);
-diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
-index ca150618d32f..4e692b272ae9 100644
---- a/drivers/input/mouse/bcm5974.c
-+++ b/drivers/input/mouse/bcm5974.c
-@@ -83,6 +83,24 @@
- #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO	0x0273
- #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS	0x0274
- 
-+/* T2-Attached Devices */
-+/* MacbookAir8,1 (2018) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K	0x027a
-+/* MacbookPro15,2 (2018) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132	0x027b
-+/* MacbookPro15,1 (2018) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680	0x027c
-+/* MacbookPro15,4 (2019) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213	0x027d
-+/* MacbookPro16,2 (2020) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K	0x027e
-+/* MacbookPro16,3 (2020) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223	0x027f
-+/* MacbookAir9,1 (2020) */
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K	0x0280
-+/* MacbookPro16,1 (2019)*/
-+#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F	0x0340
-+
- #define BCM5974_DEVICE(prod) {					\
- 	.match_flags = (USB_DEVICE_ID_MATCH_DEVICE |		\
- 			USB_DEVICE_ID_MATCH_INT_CLASS |		\
-@@ -147,6 +165,22 @@ static const struct usb_device_id bcm5974_table[] = {
- 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
- 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
- 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
-+	/* MacbookAir8,1 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K),
-+	/* MacbookPro15,2 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132),
-+	/* MacbookPro15,1 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680),
-+	/* MacbookPro15,4 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213),
-+	/* MacbookPro16,2 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K),
-+	/* MacbookPro16,3 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223),
-+	/* MacbookAir9,1 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K),
-+	/* MacbookPro16,1 */
-+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F),
- 	/* Terminating entry */
- 	{}
- };
-@@ -483,6 +517,110 @@ static const struct bcm5974_config bcm5974_config_table[] = {
- 		{ SN_COORD, -203, 6803 },
- 		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
- 	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -6243, 6749 },
-+		{ SN_COORD, -170, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -6243, 6749 },
-+		{ SN_COORD, -170, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -7456, 7976 },
-+		{ SN_COORD, -1768, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -6243, 6749 },
-+		{ SN_COORD, -170, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -7823, 8329 },
-+		{ SN_COORD, -370, 7925 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -6243, 6749 },
-+		{ SN_COORD, -170, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -6243, 6749 },
-+		{ SN_COORD, -170, 7685 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
-+	{
-+		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F,
-+		0,
-+		0,
-+		HAS_INTEGRATED_BUTTON,
-+		0, sizeof(struct bt_data),
-+		0x83, DATAFORMAT(TYPE4),
-+		{ SN_PRESSURE, 0, 300 },
-+		{ SN_WIDTH, 0, 2048 },
-+		{ SN_COORD, -8916, 9918 },
-+		{ SN_COORD, -1934, 9835 },
-+		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
-+	},
- 	{}
- };
- 
-diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
-index 78748e8d2dba..2b2b558cebe6 100644
---- a/drivers/pci/vgaarb.c
-+++ b/drivers/pci/vgaarb.c
-@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev)
- 	pci_dev_put(vga_default);
- 	vga_default = pci_dev_get(pdev);
- }
-+EXPORT_SYMBOL_GPL(vga_set_default_device);
- 
- /**
-  * vga_remove_vgacon - deactivate VGA console
-diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
-index 1417e230edbd..e69785af8e1d 100644
---- a/drivers/platform/x86/apple-gmux.c
-+++ b/drivers/platform/x86/apple-gmux.c
-@@ -21,6 +21,7 @@
- #include <linux/delay.h>
- #include <linux/pci.h>
- #include <linux/vga_switcheroo.h>
-+#include <linux/vgaarb.h>
- #include <linux/debugfs.h>
- #include <acpi/video.h>
- #include <asm/io.h>
-@@ -107,6 +108,10 @@ struct apple_gmux_config {
- 
- # define MMIO_GMUX_MAX_BRIGHTNESS	0xffff
- 
-+static bool force_igd;
-+module_param(force_igd, bool, 0);
-+MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)");
-+
- static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port)
- {
- 	return inb(gmux_data->iostart + port);
-@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
- 	gmux_enable_interrupts(gmux_data);
- 	gmux_read_switch_state(gmux_data);
- 
-+	if (force_igd) {
-+		struct pci_dev *pdev;
-+
-+		pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0));
-+		if (pdev) {
-+			pr_info("Switching to IGD");
-+			gmux_switchto(VGA_SWITCHEROO_IGD);
-+			vga_set_default_device(pdev);
-+		} else {
-+			pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?");
-+		}
-+	}
-+
- 	/*
- 	 * Retina MacBook Pros cannot switch the panel's AUX separately
- 	 * and need eDP pre-calibration. They are distinguishable from
-diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
-index db4a392841b1..580df4ce4f9f 100644
---- a/drivers/staging/Kconfig
-+++ b/drivers/staging/Kconfig
-@@ -66,4 +66,6 @@ source "drivers/staging/fieldbus/Kconfig"
- 
- source "drivers/staging/vme_user/Kconfig"
- 
-+source "drivers/staging/apple-bce/Kconfig"
-+
- endif # STAGING
-diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
-index 5390879b5d1b..528be2d3b546 100644
---- a/drivers/staging/Makefile
-+++ b/drivers/staging/Makefile
-@@ -22,3 +22,4 @@ obj-$(CONFIG_GREYBUS)		+= greybus/
- obj-$(CONFIG_BCM2835_VCHIQ)	+= vc04_services/
- obj-$(CONFIG_XIL_AXIS_FIFO)	+= axis-fifo/
- obj-$(CONFIG_FIELDBUS_DEV)     += fieldbus/
-+obj-$(CONFIG_APPLE_BCE)		+= apple-bce/
-diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig
-new file mode 100644
-index 000000000000..fe92bc441e89
---- /dev/null
-+++ b/drivers/staging/apple-bce/Kconfig
-@@ -0,0 +1,18 @@
-+config APPLE_BCE
-+	tristate "Apple BCE driver (VHCI and Audio support)"
-+	default m
-+	depends on X86
-+	select SOUND
-+	select SND
-+	select SND_PCM
-+	select SND_JACK
-+	help
-+	  VHCI and audio support on Apple MacBooks with the T2 Chip.
-+	  This driver is divided in three components:
-+	    - BCE (Buffer Copy Engine): which establishes a basic communication
-+	      channel with the T2 chip. This component is required by the other two:
-+	      - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse
-+	        and other system devices depend on this virtual USB host controller
-+	      - Audio: a driver for the T2 audio interface.
-+	 
-+	  If "M" is selected, the module will be called apple-bce.'
-diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile
-new file mode 100644
-index 000000000000..8cfbd3f64af6
---- /dev/null
-+++ b/drivers/staging/apple-bce/Makefile
-@@ -0,0 +1,28 @@
-+modname := apple-bce
-+obj-$(CONFIG_APPLE_BCE) += $(modname).o
-+
-+apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o
-+
-+MY_CFLAGS += -DWITHOUT_NVME_PATCH
-+#MY_CFLAGS += -g -DDEBUG
-+ccflags-y += ${MY_CFLAGS}
-+CC += ${MY_CFLAGS}
-+
-+KVERSION := $(KERNELRELEASE)
-+ifeq ($(origin KERNELRELEASE), undefined)
-+KVERSION := $(shell uname -r)
-+endif
-+
-+KDIR := /lib/modules/$(KVERSION)/build
-+PWD := $(shell pwd)
-+
-+.PHONY: all
-+
-+all:
-+	$(MAKE) -C $(KDIR) M=$(PWD) modules
-+
-+clean:
-+	$(MAKE) -C $(KDIR) M=$(PWD) clean
-+
-+install:
-+	$(MAKE) -C $(KDIR) M=$(PWD) modules_install
-diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c
-new file mode 100644
-index 000000000000..5e2f2f3b973c
---- /dev/null
-+++ b/drivers/staging/apple-bce/apple_bce.c
-@@ -0,0 +1,444 @@
-+#include "apple_bce.h"
-+#include <linux/module.h>
-+#include <linux/crc32.h>
-+#include "audio/audio.h"
-+#include <linux/version.h>
-+
-+static dev_t bce_chrdev;
-+static struct class *bce_class;
-+
-+struct apple_bce_device *global_bce;
-+
-+static int bce_create_command_queues(struct apple_bce_device *bce);
-+static void bce_free_command_queues(struct apple_bce_device *bce);
-+static irqreturn_t bce_handle_mb_irq(int irq, void *dev);
-+static irqreturn_t bce_handle_dma_irq(int irq, void *dev);
-+static int bce_fw_version_handshake(struct apple_bce_device *bce);
-+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq);
-+
-+static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id)
-+{
-+    struct apple_bce_device *bce = NULL;
-+    int status = 0;
-+    int nvec;
-+
-+    pr_info("apple-bce: capturing our device\n");
-+
-+    if (pci_enable_device(dev))
-+        return -ENODEV;
-+    if (pci_request_regions(dev, "apple-bce")) {
-+        status = -ENODEV;
-+        goto fail;
-+    }
-+    pci_set_master(dev);
-+    nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI);
-+    if (nvec < 5) {
-+        status = -EINVAL;
-+        goto fail;
-+    }
-+
-+    bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL);
-+    if (!bce) {
-+        status = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    bce->pci = dev;
-+    pci_set_drvdata(dev, bce);
-+
-+    bce->devt = bce_chrdev;
-+    bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce");
-+    if (IS_ERR_OR_NULL(bce->dev)) {
-+        status = PTR_ERR(bce_class);
-+        goto fail;
-+    }
-+
-+    bce->reg_mem_mb = pci_iomap(dev, 4, 0);
-+    bce->reg_mem_dma = pci_iomap(dev, 2, 0);
-+
-+    if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) {
-+        dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n");
-+        goto fail;
-+    }
-+
-+    bce_mailbox_init(&bce->mbox, bce->reg_mem_mb);
-+    bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb);
-+
-+    spin_lock_init(&bce->queues_lock);
-+    ida_init(&bce->queue_ida);
-+
-+    if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox")))
-+        goto fail;
-+    if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma")))
-+        goto fail_interrupt_0;
-+
-+    if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) {
-+        dev_warn(&dev->dev, "dma: Setting mask failed\n");
-+        goto fail_interrupt;
-+    }
-+
-+    /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0
-+       is a bus master, so we need to work around this. */
-+    bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
-+#ifndef WITHOUT_NVME_PATCH
-+    if ((status = pci_enable_device_mem(bce->pci0))) {
-+        dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n");
-+        goto fail_dev0;
-+    }
-+#endif
-+    pci_set_master(bce->pci0);
-+
-+    bce_timestamp_start(&bce->timestamp, true);
-+
-+    if ((status = bce_fw_version_handshake(bce)))
-+        goto fail_ts;
-+    pr_info("apple-bce: handshake done\n");
-+
-+    if ((status = bce_create_command_queues(bce))) {
-+        pr_info("apple-bce: Creating command queues failed\n");
-+        goto fail_ts;
-+    }
-+
-+    global_bce = bce;
-+
-+    bce_vhci_create(bce, &bce->vhci);
-+
-+    return 0;
-+
-+fail_ts:
-+    bce_timestamp_stop(&bce->timestamp);
-+#ifndef WITHOUT_NVME_PATCH
-+    pci_disable_device(bce->pci0);
-+fail_dev0:
-+#endif
-+    pci_dev_put(bce->pci0);
-+fail_interrupt:
-+    pci_free_irq(dev, 4, dev);
-+fail_interrupt_0:
-+    pci_free_irq(dev, 0, dev);
-+fail:
-+    if (bce && bce->dev) {
-+        device_destroy(bce_class, bce->devt);
-+
-+        if (!IS_ERR_OR_NULL(bce->reg_mem_mb))
-+            pci_iounmap(dev, bce->reg_mem_mb);
-+        if (!IS_ERR_OR_NULL(bce->reg_mem_dma))
-+            pci_iounmap(dev, bce->reg_mem_dma);
-+
-+        kfree(bce);
-+    }
-+
-+    pci_free_irq_vectors(dev);
-+    pci_release_regions(dev);
-+    pci_disable_device(dev);
-+
-+    if (!status)
-+        status = -EINVAL;
-+    return status;
-+}
-+
-+static int bce_create_command_queues(struct apple_bce_device *bce)
-+{
-+    int status;
-+    struct bce_queue_memcfg *cfg;
-+
-+    bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20);
-+    bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20);
-+    if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) {
-+        status = -ENOMEM;
-+        goto err;
-+    }
-+    bce->queues[0] = (struct bce_queue *) bce->cmd_cq;
-+    bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq;
-+
-+    cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL);
-+    if (!cfg) {
-+        status = -ENOMEM;
-+        goto err;
-+    }
-+    bce_get_cq_memcfg(bce->cmd_cq, cfg);
-+    if ((status = bce_register_command_queue(bce, cfg, false)))
-+        goto err;
-+    bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg);
-+    if ((status = bce_register_command_queue(bce, cfg, true)))
-+        goto err;
-+    kfree(cfg);
-+
-+    return 0;
-+
-+err:
-+    if (bce->cmd_cq)
-+        bce_free_cq(bce, bce->cmd_cq);
-+    if (bce->cmd_cmdq)
-+        bce_free_cmdq(bce, bce->cmd_cmdq);
-+    return status;
-+}
-+
-+static void bce_free_command_queues(struct apple_bce_device *bce)
-+{
-+    bce_free_cq(bce, bce->cmd_cq);
-+    bce_free_cmdq(bce, bce->cmd_cmdq);
-+    bce->cmd_cq = NULL;
-+    bce->queues[0] = NULL;
-+}
-+
-+static irqreturn_t bce_handle_mb_irq(int irq, void *dev)
-+{
-+    struct apple_bce_device *bce = pci_get_drvdata(dev);
-+    bce_mailbox_handle_interrupt(&bce->mbox);
-+    return IRQ_HANDLED;
-+}
-+
-+static irqreturn_t bce_handle_dma_irq(int irq, void *dev)
-+{
-+    int i;
-+    struct apple_bce_device *bce = pci_get_drvdata(dev);
-+    spin_lock(&bce->queues_lock);
-+    for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++)
-+        if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ)
-+            bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]);
-+    spin_unlock(&bce->queues_lock);
-+    return IRQ_HANDLED;
-+}
-+
-+static int bce_fw_version_handshake(struct apple_bce_device *bce)
-+{
-+    u64 result;
-+    int status;
-+
-+    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION),
-+            &result)))
-+        return status;
-+    if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION ||
-+        BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) {
-+        pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result));
-+        return -EINVAL;
-+    }
-+    return 0;
-+}
-+
-+static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq)
-+{
-+    int status;
-+    int cmd_type;
-+    u64 result;
-+    // OS X uses an bidirectional direction, but that's not really needed
-+    dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
-+    if (dma_mapping_error(&bce->pci->dev, a))
-+        return -ENOMEM;
-+    cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ;
-+    status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result);
-+    dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
-+    if (status)
-+        return status;
-+    if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY)
-+        return -EINVAL;
-+    return 0;
-+}
-+
-+static void apple_bce_remove(struct pci_dev *dev)
-+{
-+    struct apple_bce_device *bce = pci_get_drvdata(dev);
-+    bce->is_being_removed = true;
-+
-+    bce_vhci_destroy(&bce->vhci);
-+
-+    bce_timestamp_stop(&bce->timestamp);
-+#ifndef WITHOUT_NVME_PATCH
-+    pci_disable_device(bce->pci0);
-+#endif
-+    pci_dev_put(bce->pci0);
-+    pci_free_irq(dev, 0, dev);
-+    pci_free_irq(dev, 4, dev);
-+    bce_free_command_queues(bce);
-+    pci_iounmap(dev, bce->reg_mem_mb);
-+    pci_iounmap(dev, bce->reg_mem_dma);
-+    device_destroy(bce_class, bce->devt);
-+    pci_free_irq_vectors(dev);
-+    pci_release_regions(dev);
-+    pci_disable_device(dev);
-+    kfree(bce);
-+}
-+
-+static int bce_save_state_and_sleep(struct apple_bce_device *bce)
-+{
-+    int attempt, status = 0;
-+    u64 resp;
-+    dma_addr_t dma_addr;
-+    void *dma_ptr = NULL;
-+    size_t size = max(PAGE_SIZE, 4096UL);
-+
-+    for (attempt = 0; attempt < 5; ++attempt) {
-+        pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size);
-+        dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL);
-+        if (!dma_ptr) {
-+            pr_err("apple-bce: suspend failed (data alloc failed)\n");
-+            break;
-+        }
-+        BUG_ON((dma_addr % 4096) != 0);
-+        status = bce_mailbox_send(&bce->mbox,
-+                BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp);
-+        if (status) {
-+            pr_err("apple-bce: suspend failed (mailbox send)\n");
-+            break;
-+        }
-+        if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
-+            bce->saved_data_dma_addr = dma_addr;
-+            bce->saved_data_dma_ptr = dma_ptr;
-+            bce->saved_data_dma_size = size;
-+            return 0;
-+        } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) {
-+            dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
-+            /* The 0x10ff magic value was extracted from Apple's driver */
-+            size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1);
-+            pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size);
-+            continue;
-+        } else {
-+            pr_err("apple-bce: suspend failed (invalid device response)\n");
-+            status = -EINVAL;
-+            break;
-+        }
-+    }
-+    if (dma_ptr)
-+        dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
-+    if (!status)
-+        return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp);
-+    return status;
-+}
-+
-+static int bce_restore_state_and_wake(struct apple_bce_device *bce)
-+{
-+    int status;
-+    u64 resp;
-+    if (!bce->saved_data_dma_ptr) {
-+        if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) {
-+            pr_err("apple-bce: resume with no state failed (mailbox send)\n");
-+            return status;
-+        }
-+        if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) {
-+            pr_err("apple-bce: resume with no state failed (invalid device response)\n");
-+            return -EINVAL;
-+        }
-+        return 0;
-+    }
-+
-+    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE,
-+            (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) {
-+        pr_err("apple-bce: resume with state failed (mailbox send)\n");
-+        goto finish_with_state;
-+    }
-+    if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
-+        pr_err("apple-bce: resume with state failed (invalid device response)\n");
-+        status = -EINVAL;
-+        goto finish_with_state;
-+    }
-+
-+finish_with_state:
-+    dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr);
-+    bce->saved_data_dma_ptr = NULL;
-+    return status;
-+}
-+
-+static int apple_bce_suspend(struct device *dev)
-+{
-+    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
-+    int status;
-+
-+    bce_timestamp_stop(&bce->timestamp);
-+
-+    if ((status = bce_save_state_and_sleep(bce)))
-+        return status;
-+
-+    return 0;
-+}
-+
-+static int apple_bce_resume(struct device *dev)
-+{
-+    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
-+    int status;
-+
-+    pci_set_master(bce->pci);
-+    pci_set_master(bce->pci0);
-+
-+    if ((status = bce_restore_state_and_wake(bce)))
-+        return status;
-+
-+    bce_timestamp_start(&bce->timestamp, false);
-+
-+    return 0;
-+}
-+
-+static struct pci_device_id apple_bce_ids[  ] = {
-+        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) },
-+        { 0, },
-+};
-+
-+struct dev_pm_ops apple_bce_pci_driver_pm = {
-+        .suspend = apple_bce_suspend,
-+        .resume = apple_bce_resume
-+};
-+struct pci_driver apple_bce_pci_driver = {
-+        .name = "apple-bce",
-+        .id_table = apple_bce_ids,
-+        .probe = apple_bce_probe,
-+        .remove = apple_bce_remove,
-+        .driver = {
-+                .pm = &apple_bce_pci_driver_pm
-+        }
-+};
-+
-+
-+static int __init apple_bce_module_init(void)
-+{
-+    int result;
-+    if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce")))
-+        goto fail_chrdev;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
-+    bce_class = class_create(THIS_MODULE, "apple-bce");
-+#else
-+    bce_class = class_create("apple-bce");
-+#endif
-+    if (IS_ERR(bce_class)) {
-+        result = PTR_ERR(bce_class);
-+        goto fail_class;
-+    }
-+    if ((result = bce_vhci_module_init())) {
-+        pr_err("apple-bce: bce-vhci init failed");
-+        goto fail_class;
-+    }
-+
-+    result = pci_register_driver(&apple_bce_pci_driver);
-+    if (result)
-+        goto fail_drv;
-+
-+    aaudio_module_init();
-+
-+    return 0;
-+
-+fail_drv:
-+    pci_unregister_driver(&apple_bce_pci_driver);
-+fail_class:
-+    class_destroy(bce_class);
-+fail_chrdev:
-+    unregister_chrdev_region(bce_chrdev, 1);
-+    if (!result)
-+        result = -EINVAL;
-+    return result;
-+}
-+static void __exit apple_bce_module_exit(void)
-+{
-+    pci_unregister_driver(&apple_bce_pci_driver);
-+
-+    aaudio_module_exit();
-+    bce_vhci_module_exit();
-+    class_destroy(bce_class);
-+    unregister_chrdev_region(bce_chrdev, 1);
-+}
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("MrARM");
-+MODULE_DESCRIPTION("Apple BCE Driver");
-+MODULE_VERSION("0.01");
-+MODULE_ALIAS("pci:v0000106Bd00001801sv*sd*bc*sc*i*");
-+module_init(apple_bce_module_init);
-+module_exit(apple_bce_module_exit);
-diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h
-new file mode 100644
-index 000000000000..f13ab8d5742e
---- /dev/null
-+++ b/drivers/staging/apple-bce/apple_bce.h
-@@ -0,0 +1,38 @@
-+#pragma once
-+
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "mailbox.h"
-+#include "queue.h"
-+#include "vhci/vhci.h"
-+
-+#define BC_PROTOCOL_VERSION 0x20001
-+#define BCE_MAX_QUEUE_COUNT 0x100
-+
-+#define BCE_QUEUE_USER_MIN 2
-+#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1)
-+
-+struct apple_bce_device {
-+    struct pci_dev *pci, *pci0;
-+    dev_t devt;
-+    struct device *dev;
-+    void __iomem *reg_mem_mb;
-+    void __iomem *reg_mem_dma;
-+    struct bce_mailbox mbox;
-+    struct bce_timestamp timestamp;
-+    struct bce_queue *queues[BCE_MAX_QUEUE_COUNT];
-+    struct spinlock queues_lock;
-+    struct ida queue_ida;
-+    struct bce_queue_cq *cmd_cq;
-+    struct bce_queue_cmdq *cmd_cmdq;
-+    struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT];
-+    bool is_being_removed;
-+
-+    dma_addr_t saved_data_dma_addr;
-+    void *saved_data_dma_ptr;
-+    size_t saved_data_dma_size;
-+
-+    struct bce_vhci vhci;
-+};
-+
-+extern struct apple_bce_device *global_bce;
-\ No newline at end of file
-diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c
-new file mode 100644
-index 000000000000..bd16ddd16c1d
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/audio.c
-@@ -0,0 +1,711 @@
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include <linux/module.h>
-+#include <linux/random.h>
-+#include <sound/core.h>
-+#include <sound/initval.h>
-+#include <sound/pcm.h>
-+#include <sound/jack.h>
-+#include "audio.h"
-+#include "pcm.h"
-+#include <linux/version.h>
-+
-+static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1;
-+static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1;
-+
-+static dev_t aaudio_chrdev;
-+static struct class *aaudio_class;
-+
-+static int aaudio_init_cmd(struct aaudio_device *a);
-+static int aaudio_init_bs(struct aaudio_device *a);
-+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id);
-+static void aaudio_free_dev(struct aaudio_subdevice *sdev);
-+
-+static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id)
-+{
-+    struct aaudio_device *aaudio = NULL;
-+    struct aaudio_subdevice *sdev = NULL;
-+    int status = 0;
-+    u32 cfg;
-+
-+    pr_info("aaudio: capturing our device\n");
-+
-+    if (pci_enable_device(dev))
-+        return -ENODEV;
-+    if (pci_request_regions(dev, "aaudio")) {
-+        status = -ENODEV;
-+        goto fail;
-+    }
-+    pci_set_master(dev);
-+
-+    aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL);
-+    if (!aaudio) {
-+        status = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    aaudio->bce = global_bce;
-+    if (!aaudio->bce) {
-+        dev_warn(&dev->dev, "aaudio: No BCE available\n");
-+        status = -EINVAL;
-+        goto fail;
-+    }
-+
-+    aaudio->pci = dev;
-+    pci_set_drvdata(dev, aaudio);
-+
-+    aaudio->devt = aaudio_chrdev;
-+    aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio");
-+    if (IS_ERR_OR_NULL(aaudio->dev)) {
-+        status = PTR_ERR(aaudio_class);
-+        goto fail;
-+    }
-+    device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER);
-+
-+    init_completion(&aaudio->remote_alive);
-+    INIT_LIST_HEAD(&aaudio->subdevice_list);
-+
-+    /* Init: set an unknown flag in the bitset */
-+    if (pci_read_config_dword(dev, 4, &cfg))
-+        dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n");
-+    if (pci_write_config_dword(dev, 4, cfg | 6u))
-+        dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n");
-+
-+    dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0));
-+    aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0);
-+    aaudio->reg_mem_bs = pci_iomap(dev, 0, 0);
-+    aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0);
-+
-+    aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000);
-+
-+    if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) {
-+        dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n");
-+        goto fail;
-+    }
-+
-+    if (aaudio_bce_init(aaudio)) {
-+        dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n");
-+        goto fail;
-+    }
-+
-+    if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) {
-+        dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n");
-+        goto fail;
-+    }
-+
-+    strcpy(aaudio->card->shortname, "Apple T2 Audio");
-+    strcpy(aaudio->card->longname, "Apple T2 Audio");
-+    strcpy(aaudio->card->mixername, "Apple T2 Audio");
-+    /* Dynamic alsa ids start at 100 */
-+    aaudio->next_alsa_id = 100;
-+
-+    if (aaudio_init_cmd(aaudio)) {
-+        dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n");
-+        goto fail_snd;
-+    }
-+
-+    if (aaudio_init_bs(aaudio)) {
-+        dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n");
-+        goto fail_snd;
-+    }
-+
-+    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
-+        dev_err(&dev->dev, "Failed to set remote access\n");
-+        return status;
-+    }
-+
-+    if (snd_card_register(aaudio->card)) {
-+        dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n");
-+        goto fail_snd;
-+    }
-+
-+    list_for_each_entry(sdev, &aaudio->subdevice_list, list) {
-+        struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id];
-+
-+        if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) {
-+            struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc;
-+
-+            snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min);
-+        }
-+    }
-+
-+    return 0;
-+
-+fail_snd:
-+    snd_card_free(aaudio->card);
-+fail:
-+    if (aaudio && aaudio->dev)
-+        device_destroy(aaudio_class, aaudio->devt);
-+    kfree(aaudio);
-+
-+    if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs))
-+        pci_iounmap(dev, aaudio->reg_mem_bs);
-+    if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg))
-+        pci_iounmap(dev, aaudio->reg_mem_cfg);
-+
-+    pci_release_regions(dev);
-+    pci_disable_device(dev);
-+
-+    if (!status)
-+        status = -EINVAL;
-+    return status;
-+}
-+
-+
-+
-+static void aaudio_remove(struct pci_dev *dev)
-+{
-+    struct aaudio_subdevice *sdev;
-+    struct aaudio_device *aaudio = pci_get_drvdata(dev);
-+
-+    snd_card_free(aaudio->card);
-+    while (!list_empty(&aaudio->subdevice_list)) {
-+        sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list);
-+        list_del(&sdev->list);
-+        aaudio_free_dev(sdev);
-+    }
-+    pci_iounmap(dev, aaudio->reg_mem_bs);
-+    pci_iounmap(dev, aaudio->reg_mem_cfg);
-+    device_destroy(aaudio_class, aaudio->devt);
-+    pci_free_irq_vectors(dev);
-+    pci_release_regions(dev);
-+    pci_disable_device(dev);
-+    kfree(aaudio);
-+}
-+
-+static int aaudio_suspend(struct device *dev)
-+{
-+    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
-+
-+    if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF))
-+        dev_warn(aaudio->dev, "Failed to reset remote access\n");
-+
-+    pci_disable_device(aaudio->pci);
-+    return 0;
-+}
-+
-+static int aaudio_resume(struct device *dev)
-+{
-+    int status;
-+    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
-+
-+    if ((status = pci_enable_device(aaudio->pci)))
-+        return status;
-+    pci_set_master(aaudio->pci);
-+
-+    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
-+        dev_err(aaudio->dev, "Failed to set remote access\n");
-+        return status;
-+    }
-+
-+    return 0;
-+}
-+
-+static int aaudio_init_cmd(struct aaudio_device *a)
-+{
-+    int status;
-+    struct aaudio_send_ctx sctx;
-+    struct aaudio_msg buf;
-+    u64 dev_cnt, dev_i;
-+    aaudio_device_id_t *dev_l;
-+
-+    if ((status = aaudio_send(a, &sctx, 500,
-+                              aaudio_msg_write_alive_notification, 1, 3))) {
-+        dev_err(a->dev, "Sending alive notification failed\n");
-+        return status;
-+    }
-+
-+    if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) {
-+        dev_err(a->dev, "Timed out waiting for remote\n");
-+        return -ETIMEDOUT;
-+    }
-+    dev_info(a->dev, "Continuing init\n");
-+
-+    buf = aaudio_reply_alloc();
-+    if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) {
-+        dev_err(a->dev, "Failed to get device list\n");
-+        aaudio_reply_free(&buf);
-+        return status;
-+    }
-+    for (dev_i = 0; dev_i < dev_cnt; ++dev_i)
-+        aaudio_init_dev(a, dev_l[dev_i]);
-+    aaudio_reply_free(&buf);
-+
-+    return 0;
-+}
-+
-+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm);
-+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev);
-+
-+static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id)
-+{
-+    struct aaudio_subdevice *sdev;
-+    struct aaudio_msg buf = aaudio_reply_alloc();
-+    u64 uid_len, stream_cnt, i;
-+    aaudio_object_id_t *stream_list;
-+    char *uid;
-+
-+    sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL);
-+
-+    if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0),
-+            NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) {
-+        dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id);
-+        goto fail;
-+    }
-+    dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid);
-+
-+    sdev->a = a;
-+    INIT_LIST_HEAD(&sdev->list);
-+    sdev->dev_id = dev_id;
-+    sdev->buf_id = AAUDIO_BUFFER_ID_NONE;
-+    strncpy(sdev->uid, uid, uid_len);
-+    sdev->uid[uid_len + 1] = '\0';
-+
-+    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
-+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32)))
-+        dev_warn(a->dev, "Failed to query device input latency\n");
-+    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
-+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32)))
-+        dev_warn(a->dev, "Failed to query device output latency\n");
-+
-+    if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
-+        dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id);
-+        goto fail;
-+    }
-+    if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) {
-+        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
-+                sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS);
-+        stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS;
-+    }
-+    sdev->in_stream_cnt = stream_cnt;
-+    for (i = 0; i < stream_cnt; i++) {
-+        sdev->in_streams[i].id = stream_list[i];
-+        sdev->in_streams[i].buffer_cnt = 0;
-+        aaudio_init_stream_info(sdev, &sdev->in_streams[i]);
-+        sdev->in_streams[i].latency += sdev->in_latency;
-+    }
-+
-+    if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
-+        dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id);
-+        goto fail;
-+    }
-+    if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) {
-+        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
-+                 sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS);
-+        stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS;
-+    }
-+    sdev->out_stream_cnt = stream_cnt;
-+    for (i = 0; i < stream_cnt; i++) {
-+        sdev->out_streams[i].id = stream_list[i];
-+        sdev->out_streams[i].buffer_cnt = 0;
-+        aaudio_init_stream_info(sdev, &sdev->out_streams[i]);
-+        sdev->out_streams[i].latency += sdev->in_latency;
-+    }
-+
-+    if (sdev->is_pcm)
-+        aaudio_create_pcm(sdev);
-+    /* Headphone Jack status */
-+    if (!strcmp(sdev->uid, "Codec Output")) {
-+        if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false))
-+            dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid);
-+        aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id,
-+                AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0));
-+        aaudio_handle_jack_connection_change(sdev);
-+    }
-+
-+    aaudio_reply_free(&buf);
-+
-+    list_add_tail(&sdev->list, &a->subdevice_list);
-+    return;
-+
-+fail:
-+    aaudio_reply_free(&buf);
-+    kfree(sdev);
-+}
-+
-+static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm)
-+{
-+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
-+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0,
-+            &strm->desc, sizeof(strm->desc)))
-+        dev_warn(sdev->a->dev, "Failed to query stream descriptor\n");
-+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
-+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32)))
-+        dev_warn(sdev->a->dev, "Failed to query stream latency\n");
-+    if (strm->desc.format_id == AAUDIO_FORMAT_LPCM)
-+        sdev->is_pcm = true;
-+}
-+
-+static void aaudio_free_dev(struct aaudio_subdevice *sdev)
-+{
-+    size_t i;
-+    for (i = 0; i < sdev->in_stream_cnt; i++) {
-+        if (sdev->in_streams[i].alsa_hw_desc)
-+            kfree(sdev->in_streams[i].alsa_hw_desc);
-+        if (sdev->in_streams[i].buffers)
-+            kfree(sdev->in_streams[i].buffers);
-+    }
-+    for (i = 0; i < sdev->out_stream_cnt; i++) {
-+        if (sdev->out_streams[i].alsa_hw_desc)
-+            kfree(sdev->out_streams[i].alsa_hw_desc);
-+        if (sdev->out_streams[i].buffers)
-+            kfree(sdev->out_streams[i].buffers);
-+    }
-+    kfree(sdev);
-+}
-+
-+static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id)
-+{
-+    struct aaudio_subdevice *sdev;
-+    list_for_each_entry(sdev, &a->subdevice_list, list) {
-+        if (dev_id == sdev->dev_id)
-+            return sdev;
-+    }
-+    return NULL;
-+}
-+
-+static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid)
-+{
-+    struct aaudio_subdevice *sdev;
-+    list_for_each_entry(sdev, &a->subdevice_list, list) {
-+        if (!strcmp(uid, sdev->uid))
-+            return sdev;
-+    }
-+    return NULL;
-+}
-+
-+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
-+        struct aaudio_buffer_struct_stream *bs_strm);
-+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
-+        struct aaudio_buffer_struct_stream *bs_strm);
-+
-+static int aaudio_init_bs(struct aaudio_device *a)
-+{
-+    int i, j;
-+    struct aaudio_buffer_struct_device *dev;
-+    struct aaudio_subdevice *sdev;
-+    u32 ver, sig, bs_base;
-+
-+    ver = ioread32(&a->reg_mem_gpr[0]);
-+    if (ver < 3) {
-+        dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver);
-+        return -EINVAL;
-+    }
-+    sig = ioread32(&a->reg_mem_gpr[1]);
-+    if (sig != AAUDIO_SIG) {
-+        dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig);
-+        return -EINVAL;
-+    }
-+    bs_base = ioread32(&a->reg_mem_gpr[2]);
-+    a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base);
-+    if (a->bs->signature != AAUDIO_SIG) {
-+        dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature);
-+        return -EINVAL;
-+    }
-+    dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version);
-+    dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices);
-+    for (i = 0; i < a->bs->num_devices; i++) {
-+        dev = &a->bs->devices[i];
-+        dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name);
-+
-+        sdev = aaudio_find_dev_by_uid(a, dev->name);
-+        if (!sdev) {
-+            dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name);
-+            continue;
-+        }
-+        sdev->buf_id = (u8) i;
-+        dev->num_input_streams = 0;
-+        for (j = 0; j < dev->num_output_streams; j++) {
-+            dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j,
-+                     dev->output_streams[j].num_buffers);
-+            if (j < sdev->out_stream_cnt)
-+                aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]);
-+        }
-+    }
-+
-+    list_for_each_entry(sdev, &a->subdevice_list, list) {
-+        if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE)
-+            continue;
-+        sdev->buf_id = i;
-+        dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid);
-+        strcpy(a->bs->devices[i].name, sdev->uid);
-+        a->bs->devices[i].num_input_streams = 0;
-+        a->bs->devices[i].num_output_streams = 0;
-+        a->bs->num_devices = ++i;
-+    }
-+    list_for_each_entry(sdev, &a->subdevice_list, list) {
-+        if (sdev->in_stream_cnt == 1) {
-+            dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id);
-+            aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]);
-+            a->bs->devices[sdev->buf_id].num_input_streams = 1;
-+            wmb();
-+
-+            if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) {
-+                dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n");
-+            }
-+        }
-+    }
-+
-+    return 0;
-+}
-+
-+static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
-+                                  struct aaudio_buffer_struct_stream *bs_strm)
-+{
-+    size_t i;
-+    strm->buffer_cnt = bs_strm->num_buffers;
-+    if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) {
-+        dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers,
-+                AAUDIO_DEIVCE_MAX_BUFFER_COUNT);
-+        strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT;
-+    }
-+    if (!strm->buffer_cnt)
-+        return;
-+    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
-+    if (!strm->buffers) {
-+        dev_err(a->dev, "Buffer list allocation failed\n");
-+        return;
-+    }
-+    for (i = 0; i < strm->buffer_cnt; i++) {
-+        strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address;
-+        strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address;
-+        strm->buffers[i].size = bs_strm->buffers[i].size;
-+    }
-+
-+    if (strm->buffer_cnt == 1) {
-+        strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
-+        if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
-+            kfree(strm->alsa_hw_desc);
-+            strm->alsa_hw_desc = NULL;
-+        }
-+    }
-+}
-+
-+static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
-+        struct aaudio_buffer_struct_stream *bs_strm)
-+{
-+    size_t size;
-+    dma_addr_t dma_addr;
-+    void *dma_ptr;
-+    size = strm->desc.bytes_per_packet * 16640;
-+    dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL);
-+    if (!dma_ptr) {
-+        dev_err(a->dev, "dma_alloc_coherent failed\n");
-+        return;
-+    }
-+    bs_strm->buffers[0].address = dma_addr;
-+    bs_strm->buffers[0].size = size;
-+    bs_strm->num_buffers = 1;
-+
-+    memset(dma_ptr, 0, size);
-+
-+    strm->buffer_cnt = 1;
-+    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
-+    if (!strm->buffers) {
-+        dev_err(a->dev, "Buffer list allocation failed\n");
-+        return;
-+    }
-+    strm->buffers[0].dma_addr = dma_addr;
-+    strm->buffers[0].ptr = dma_ptr;
-+    strm->buffers[0].size = size;
-+
-+    strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
-+    if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
-+        kfree(strm->alsa_hw_desc);
-+        strm->alsa_hw_desc = NULL;
-+    }
-+}
-+
-+static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg);
-+
-+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg)
-+{
-+    struct aaudio_send_ctx sctx;
-+    struct aaudio_msg_base base;
-+    if (aaudio_msg_read_base(msg, &base))
-+        return;
-+    switch (base.msg) {
-+        case AAUDIO_MSG_NOTIFICATION_BOOT:
-+            dev_info(a->dev, "Received boot notification from remote\n");
-+
-+            /* Resend the alive notify */
-+            if (aaudio_send(a, &sctx, 500,
-+                    aaudio_msg_write_alive_notification, 1, 3)) {
-+                pr_err("Sending alive notification failed\n");
-+            }
-+            break;
-+        case AAUDIO_MSG_NOTIFICATION_ALIVE:
-+            dev_info(a->dev, "Received alive notification from remote\n");
-+            complete_all(&a->remote_alive);
-+            break;
-+        case AAUDIO_MSG_PROPERTY_CHANGED:
-+            aaudio_handle_prop_change(a, msg);
-+            break;
-+        default:
-+            dev_info(a->dev, "Unhandled notification %i", base.msg);
-+            break;
-+    }
-+}
-+
-+struct aaudio_prop_change_work_struct {
-+    struct work_struct ws;
-+    struct aaudio_device *a;
-+    aaudio_device_id_t dev;
-+    aaudio_object_id_t obj;
-+    struct aaudio_prop_addr prop;
-+};
-+
-+static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev)
-+{
-+    u32 plugged;
-+    if (!sdev->jack)
-+        return;
-+    /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just
-+     * always pick the OUTPUT status. */
-+    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id,
-+            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) {
-+        dev_err(sdev->a->dev, "Failed to get jack enable status\n");
-+        return;
-+    }
-+    dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged");
-+    snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0);
-+}
-+
-+void aaudio_handle_prop_change_work(struct work_struct *ws)
-+{
-+    struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws);
-+    struct aaudio_subdevice *sdev;
-+
-+    sdev = aaudio_find_dev_by_dev_id(work->a, work->dev);
-+    if (!sdev) {
-+        dev_err(work->a->dev, "Property notification change: device not found\n");
-+        goto done;
-+    }
-+    dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid);
-+
-+    if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) {
-+        aaudio_handle_jack_connection_change(sdev);
-+    }
-+
-+done:
-+    kfree(work);
-+}
-+
-+void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg)
-+{
-+    /* NOTE: This is a scheduled work because this callback will generally need to query device information and this
-+     * is not possible when we are in the reply parsing code's context. */
-+    struct aaudio_prop_change_work_struct *work;
-+    work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL);
-+    work->a = a;
-+    INIT_WORK(&work->ws, aaudio_handle_prop_change_work);
-+    aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop);
-+    schedule_work(&work->ws);
-+}
-+
-+#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \
-+    if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \
-+        pr_err("aaudio: Failed to reply to a command\n");
-+
-+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg)
-+{
-+    ktime_t time_os = ktime_get_boottime();
-+    struct aaudio_send_ctx sctx;
-+    struct aaudio_subdevice *sdev;
-+    u64 devid, timestamp, update_seed;
-+    aaudio_msg_read_update_timestamp(msg, &devid, &timestamp, &update_seed);
-+    dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed);
-+
-+    sdev = aaudio_find_dev_by_dev_id(a, devid);
-+    aaudio_handle_timestamp(sdev, time_os, timestamp);
-+
-+    aaudio_send_cmd_response(a, &sctx, msg,
-+            aaudio_msg_write_update_timestamp_response);
-+}
-+
-+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg)
-+{
-+    struct aaudio_msg_base base;
-+    if (aaudio_msg_read_base(msg, &base))
-+        return;
-+    switch (base.msg) {
-+        case AAUDIO_MSG_UPDATE_TIMESTAMP:
-+            aaudio_handle_cmd_timestamp(a, msg);
-+            break;
-+        default:
-+            dev_info(a->dev, "Unhandled device command %i", base.msg);
-+            break;
-+    }
-+}
-+
-+static struct pci_device_id aaudio_ids[  ] = {
-+        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) },
-+        { 0, },
-+};
-+
-+struct dev_pm_ops aaudio_pci_driver_pm = {
-+        .suspend = aaudio_suspend,
-+        .resume = aaudio_resume
-+};
-+struct pci_driver aaudio_pci_driver = {
-+        .name = "aaudio",
-+        .id_table = aaudio_ids,
-+        .probe = aaudio_probe,
-+        .remove = aaudio_remove,
-+        .driver = {
-+                .pm = &aaudio_pci_driver_pm
-+        }
-+};
-+
-+
-+int aaudio_module_init(void)
-+{
-+    int result;
-+    if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio")))
-+        goto fail_chrdev;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
-+    aaudio_class = class_create(THIS_MODULE, "aaudio");
-+#else
-+    aaudio_class = class_create("aaudio");
-+#endif
-+    if (IS_ERR(aaudio_class)) {
-+        result = PTR_ERR(aaudio_class);
-+        goto fail_class;
-+    }
-+    
-+    result = pci_register_driver(&aaudio_pci_driver);
-+    if (result)
-+        goto fail_drv;
-+    return 0;
-+
-+fail_drv:
-+    pci_unregister_driver(&aaudio_pci_driver);
-+fail_class:
-+    class_destroy(aaudio_class);
-+fail_chrdev:
-+    unregister_chrdev_region(aaudio_chrdev, 1);
-+    if (!result)
-+        result = -EINVAL;
-+    return result;
-+}
-+
-+void aaudio_module_exit(void)
-+{
-+    pci_unregister_driver(&aaudio_pci_driver);
-+    class_destroy(aaudio_class);
-+    unregister_chrdev_region(aaudio_chrdev, 1);
-+}
-+
-+struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = {
-+        {"Speaker", 0},
-+        {"Digital Mic", 1},
-+        {"Codec Output", 2},
-+        {"Codec Input", 3},
-+        {"Bridge Loopback", 4},
-+        {}
-+};
-+
-+module_param_named(index, aaudio_alsa_index, int, 0444);
-+MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard.");
-+module_param_named(id, aaudio_alsa_id, charp, 0444);
-+MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard.");
-diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h
-new file mode 100644
-index 000000000000..004bc1e22ea4
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/audio.h
-@@ -0,0 +1,125 @@
-+#ifndef AAUDIO_H
-+#define AAUDIO_H
-+
-+#include <linux/types.h>
-+#include <sound/pcm.h>
-+#include "../apple_bce.h"
-+#include "protocol_bce.h"
-+#include "description.h"
-+
-+#define AAUDIO_SIG 0x19870423
-+
-+#define AAUDIO_DEVICE_MAX_UID_LEN 128
-+#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1
-+#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1
-+#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1
-+
-+#define AAUDIO_BUFFER_ID_NONE 0xffu
-+
-+struct snd_card;
-+struct snd_pcm;
-+struct snd_pcm_hardware;
-+struct snd_jack;
-+
-+struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer {
-+    size_t address;
-+    size_t size;
-+    size_t pad[4];
-+};
-+struct aaudio_buffer_struct_stream {
-+    u8 num_buffers;
-+    struct aaudio_buffer_struct_buffer buffers[100];
-+    char filler[32];
-+};
-+struct aaudio_buffer_struct_device {
-+    char name[128];
-+    u8 num_input_streams;
-+    u8 num_output_streams;
-+    struct aaudio_buffer_struct_stream input_streams[5];
-+    struct aaudio_buffer_struct_stream output_streams[5];
-+    char filler[128];
-+};
-+struct aaudio_buffer_struct {
-+    u32 version;
-+    u32 signature;
-+    u32 flags;
-+    u8 num_devices;
-+    struct aaudio_buffer_struct_device devices[20];
-+};
-+
-+struct aaudio_device;
-+struct aaudio_dma_buf {
-+    dma_addr_t dma_addr;
-+    void *ptr;
-+    size_t size;
-+};
-+struct aaudio_stream {
-+    aaudio_object_id_t id;
-+    size_t buffer_cnt;
-+    struct aaudio_dma_buf *buffers;
-+
-+    struct aaudio_apple_description desc;
-+    struct snd_pcm_hardware *alsa_hw_desc;
-+    u32 latency;
-+
-+    bool waiting_for_first_ts;
-+
-+    ktime_t remote_timestamp;
-+    snd_pcm_sframes_t frame_min;
-+    int started;
-+};
-+struct aaudio_subdevice {
-+    struct aaudio_device *a;
-+    struct list_head list;
-+    aaudio_device_id_t dev_id;
-+    u32 in_latency, out_latency;
-+    u8 buf_id;
-+    int alsa_id;
-+    char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1];
-+    size_t in_stream_cnt;
-+    struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS];
-+    size_t out_stream_cnt;
-+    struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS];
-+    bool is_pcm;
-+    struct snd_pcm *pcm;
-+    struct snd_jack *jack;
-+};
-+struct aaudio_alsa_pcm_id_mapping {
-+    const char *name;
-+    int alsa_id;
-+};
-+
-+struct aaudio_device {
-+    struct pci_dev *pci;
-+    dev_t devt;
-+    struct device *dev;
-+    void __iomem *reg_mem_bs;
-+    dma_addr_t reg_mem_bs_dma;
-+    void __iomem *reg_mem_cfg;
-+
-+    u32 __iomem *reg_mem_gpr;
-+
-+    struct aaudio_buffer_struct *bs;
-+
-+    struct apple_bce_device *bce;
-+    struct aaudio_bce bcem;
-+
-+    struct snd_card *card;
-+
-+    struct list_head subdevice_list;
-+    int next_alsa_id;
-+
-+    struct completion remote_alive;
-+};
-+
-+void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg);
-+void aaudio_handle_prop_change_work(struct work_struct *ws);
-+void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg);
-+void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg);
-+
-+int aaudio_module_init(void);
-+void aaudio_module_exit(void);
-+
-+extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[];
-+
-+#endif //AAUDIO_H
-diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h
-new file mode 100644
-index 000000000000..dfef3ab68f27
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/description.h
-@@ -0,0 +1,42 @@
-+#ifndef AAUDIO_DESCRIPTION_H
-+#define AAUDIO_DESCRIPTION_H
-+
-+#include <linux/types.h>
-+
-+struct aaudio_apple_description {
-+    u64 sample_rate_double;
-+    u32 format_id;
-+    u32 format_flags;
-+    u32 bytes_per_packet;
-+    u32 frames_per_packet;
-+    u32 bytes_per_frame;
-+    u32 channels_per_frame;
-+    u32 bits_per_channel;
-+    u32 reserved;
-+};
-+
-+enum {
-+    AAUDIO_FORMAT_LPCM = 0x6c70636d  // 'lpcm'
-+};
-+
-+enum {
-+    AAUDIO_FORMAT_FLAG_FLOAT = 1,
-+    AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2,
-+    AAUDIO_FORMAT_FLAG_SIGNED = 4,
-+    AAUDIO_FORMAT_FLAG_PACKED = 8,
-+    AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16,
-+    AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32,
-+    AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64
-+};
-+
-+static inline u64 aaudio_double_to_u64(u64 d)
-+{
-+    u8 sign = (u8) ((d >> 63) & 1);
-+    s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023;
-+    u64 fr = d & ((1LL << 52) - 1);
-+    if (sign || exp < 0)
-+        return 0;
-+    return (u64) ((1LL << exp) + (fr >> (52 - exp)));
-+}
-+
-+#endif //AAUDIO_DESCRIPTION_H
-diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c
-new file mode 100644
-index 000000000000..1026e10a9ac5
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/pcm.c
-@@ -0,0 +1,308 @@
-+#include "pcm.h"
-+#include "audio.h"
-+
-+static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc)
-+{
-+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) {
-+        if (desc->bits_per_channel == 32) {
-+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
-+                return SNDRV_PCM_FMTBIT_FLOAT_BE;
-+            else
-+                return SNDRV_PCM_FMTBIT_FLOAT_LE;
-+        } else if (desc->bits_per_channel == 64) {
-+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
-+                return SNDRV_PCM_FMTBIT_FLOAT64_BE;
-+            else
-+                return SNDRV_PCM_FMTBIT_FLOAT64_LE;
-+        } else {
-+            pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel);
-+            return 0;
-+        }
-+    }
-+#define DEFINE_BPC_OPTION(val, b) \
-+    case val: \
-+        if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \
-+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
-+                return SNDRV_PCM_FMTBIT_S ## b ## BE; \
-+            else \
-+                return SNDRV_PCM_FMTBIT_U ## b ## BE; \
-+        } else { \
-+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
-+                return SNDRV_PCM_FMTBIT_S ## b ## LE; \
-+            else \
-+                return SNDRV_PCM_FMTBIT_U ## b ## LE; \
-+        }
-+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) {
-+        switch (desc->bits_per_channel) {
-+            case 8:
-+            case 16:
-+            case 32:
-+                break;
-+            DEFINE_BPC_OPTION(24, 24_3)
-+            default:
-+                pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel);
-+                return 0;
-+        }
-+    }
-+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) {
-+        switch (desc->bits_per_channel) {
-+            DEFINE_BPC_OPTION(24, 32_)
-+            default:
-+                pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel);
-+                return 0;
-+        }
-+    }
-+    switch (desc->bits_per_channel) {
-+        case 8:
-+            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED)
-+                return SNDRV_PCM_FMTBIT_S8;
-+            else
-+                return SNDRV_PCM_FMTBIT_U8;
-+        DEFINE_BPC_OPTION(16, 16_)
-+        DEFINE_BPC_OPTION(24, 24_)
-+        DEFINE_BPC_OPTION(32, 32_)
-+        default:
-+            pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel);
-+            return 0;
-+    }
-+}
-+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw,
-+        size_t buf_size)
-+{
-+    uint rate;
-+    alsa_hw->info = (SNDRV_PCM_INFO_MMAP |
-+                     SNDRV_PCM_INFO_BLOCK_TRANSFER |
-+                     SNDRV_PCM_INFO_MMAP_VALID |
-+                     SNDRV_PCM_INFO_DOUBLE);
-+    if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE)
-+        pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n");
-+    if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED))
-+        alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED;
-+    alsa_hw->formats = aaudio_get_alsa_fmtbit(desc);
-+    if (!alsa_hw->formats)
-+        return -EINVAL;
-+    rate = (uint) aaudio_double_to_u64(desc->sample_rate_double);
-+    alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate);
-+    alsa_hw->rate_min = rate;
-+    alsa_hw->rate_max = rate;
-+    alsa_hw->channels_min = desc->channels_per_frame;
-+    alsa_hw->channels_max = desc->channels_per_frame;
-+    alsa_hw->buffer_bytes_max = buf_size;
-+    alsa_hw->period_bytes_min = desc->bytes_per_packet;
-+    alsa_hw->period_bytes_max = desc->bytes_per_packet;
-+    alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet);
-+    alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet);
-+    pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n",
-+            alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min,
-+            alsa_hw->period_bytes_min);
-+    return 0;
-+}
-+
-+static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream)
-+{
-+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
-+    if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-+        return &sdev->out_streams[substream->number];
-+    else
-+        return &sdev->in_streams[substream->number];
-+}
-+
-+static int aaudio_pcm_open(struct snd_pcm_substream *substream)
-+{
-+    pr_debug("aaudio_pcm_open\n");
-+    substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc;
-+
-+    return 0;
-+}
-+
-+static int aaudio_pcm_close(struct snd_pcm_substream *substream)
-+{
-+    pr_debug("aaudio_pcm_close\n");
-+    return 0;
-+}
-+
-+static int aaudio_pcm_prepare(struct snd_pcm_substream *substream)
-+{
-+    return 0;
-+}
-+
-+static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params)
-+{
-+    struct aaudio_stream *astream = aaudio_pcm_stream(substream);
-+    pr_debug("aaudio_pcm_hw_params\n");
-+
-+    if (!astream->buffer_cnt || !astream->buffers)
-+        return -EINVAL;
-+
-+    substream->runtime->dma_area = astream->buffers[0].ptr;
-+    substream->runtime->dma_addr = astream->buffers[0].dma_addr;
-+    substream->runtime->dma_bytes = astream->buffers[0].size;
-+    return 0;
-+}
-+
-+static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream)
-+{
-+    pr_debug("aaudio_pcm_hw_free\n");
-+    return 0;
-+}
-+
-+static void aaudio_pcm_start(struct snd_pcm_substream *substream)
-+{
-+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
-+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
-+    void *buf;
-+    size_t s;
-+    ktime_t time_start, time_end;
-+    bool back_buffer;
-+    time_start = ktime_get();
-+
-+    back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
-+
-+    if (back_buffer) {
-+        s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr);
-+        buf = kmalloc(s, GFP_KERNEL);
-+        memcpy_fromio(buf, substream->runtime->dma_area, s);
-+        time_end = ktime_get();
-+        pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start),
-+                substream->runtime->control->appl_ptr);
-+    }
-+
-+    stream->waiting_for_first_ts = true;
-+    stream->frame_min = stream->latency;
-+
-+    aaudio_cmd_start_io(sdev->a, sdev->dev_id);
-+    if (back_buffer)
-+        memcpy_toio(substream->runtime->dma_area, buf, s);
-+
-+    time_end = ktime_get();
-+    pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start));
-+}
-+
-+static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
-+{
-+    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
-+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
-+    pr_debug("aaudio_pcm_trigger %x\n", cmd);
-+
-+    /* We only supports triggers on the #0 buffer */
-+    if (substream->number != 0)
-+        return 0;
-+    switch (cmd) {
-+        case SNDRV_PCM_TRIGGER_START:
-+            aaudio_pcm_start(substream);
-+            stream->started = 1;
-+            break;
-+        case SNDRV_PCM_TRIGGER_STOP:
-+            aaudio_cmd_stop_io(sdev->a, sdev->dev_id);
-+            stream->started = 0;
-+            break;
-+        default:
-+            return -EINVAL;
-+    }
-+    return 0;
-+}
-+
-+static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream)
-+{
-+    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
-+    ktime_t time_from_start;
-+    snd_pcm_sframes_t frames;
-+    snd_pcm_sframes_t buffer_time_length;
-+
-+    if (!stream->started || stream->waiting_for_first_ts) {
-+        pr_warn("aaudio_pcm_pointer while not started\n");
-+        return 0;
-+    }
-+
-+    /* Approximate the pointer based on the last received timestamp */
-+    time_from_start = ktime_get_boottime() - stream->remote_timestamp;
-+    buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate;
-+    frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length;
-+    if (ktime_to_ns(time_from_start) < buffer_time_length) {
-+        if (frames < stream->frame_min)
-+            frames = stream->frame_min;
-+        else
-+            stream->frame_min = 0;
-+    } else {
-+        if (ktime_to_ns(time_from_start) < 2 * buffer_time_length)
-+            stream->frame_min = frames;
-+        else
-+            stream->frame_min = 0; /* Heavy desync */
-+    }
-+    frames -= stream->latency;
-+    if (frames < 0)
-+        frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size;
-+    return (snd_pcm_uframes_t) frames;
-+}
-+
-+static struct snd_pcm_ops aaudio_pcm_ops = {
-+        .open =        aaudio_pcm_open,
-+        .close =       aaudio_pcm_close,
-+        .ioctl =       snd_pcm_lib_ioctl,
-+        .hw_params =   aaudio_pcm_hw_params,
-+        .hw_free =     aaudio_pcm_hw_free,
-+        .prepare =     aaudio_pcm_prepare,
-+        .trigger =     aaudio_pcm_trigger,
-+        .pointer =     aaudio_pcm_pointer,
-+        .mmap    =     snd_pcm_lib_mmap_iomem
-+};
-+
-+int aaudio_create_pcm(struct aaudio_subdevice *sdev)
-+{
-+    struct snd_pcm *pcm;
-+    struct aaudio_alsa_pcm_id_mapping *id_mapping;
-+    int err;
-+
-+    if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) {
-+        return -EINVAL;
-+    }
-+
-+    for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) {
-+        if (!strcmp(sdev->uid, id_mapping->name)) {
-+            sdev->alsa_id = id_mapping->alsa_id;
-+            break;
-+        }
-+    }
-+    if (!id_mapping->name)
-+        sdev->alsa_id = sdev->a->next_alsa_id++;
-+    err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id,
-+            (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm);
-+    if (err < 0)
-+        return err;
-+    pcm->private_data = sdev;
-+    pcm->nonatomic = 1;
-+    sdev->pcm = pcm;
-+    strcpy(pcm->name, sdev->uid);
-+    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops);
-+    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops);
-+    return 0;
-+}
-+
-+static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp)
-+{
-+    unsigned long flags;
-+    struct aaudio_stream *stream;
-+
-+    stream = aaudio_pcm_stream(substream);
-+    snd_pcm_stream_lock_irqsave(substream, flags);
-+    stream->remote_timestamp = timestamp;
-+    if (stream->waiting_for_first_ts) {
-+        stream->waiting_for_first_ts = false;
-+        snd_pcm_stream_unlock_irqrestore(substream, flags);
-+        return;
-+    }
-+    snd_pcm_stream_unlock_irqrestore(substream, flags);
-+    snd_pcm_period_elapsed(substream);
-+}
-+
-+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp)
-+{
-+    struct snd_pcm_substream *substream;
-+
-+    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
-+    if (substream)
-+        aaudio_handle_stream_timestamp(substream, dev_timestamp);
-+    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
-+    if (substream)
-+        aaudio_handle_stream_timestamp(substream, os_timestamp);
-+}
-diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h
-new file mode 100644
-index 000000000000..ea5f35fbe408
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/pcm.h
-@@ -0,0 +1,16 @@
-+#ifndef AAUDIO_PCM_H
-+#define AAUDIO_PCM_H
-+
-+#include <linux/types.h>
-+#include <linux/ktime.h>
-+
-+struct aaudio_subdevice;
-+struct aaudio_apple_description;
-+struct snd_pcm_hardware;
-+
-+int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size);
-+int aaudio_create_pcm(struct aaudio_subdevice *sdev);
-+
-+void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp);
-+
-+#endif //AAUDIO_PCM_H
-diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c
-new file mode 100644
-index 000000000000..2314813aeead
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/protocol.c
-@@ -0,0 +1,347 @@
-+#include "protocol.h"
-+#include "protocol_bce.h"
-+#include "audio.h"
-+
-+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base)
-+{
-+    if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2)
-+        return -EINVAL;
-+    *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1));
-+    return 0;
-+}
-+
-+#define READ_START(type) \
-+    size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \
-+    if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \
-+        return -EINVAL;
-+#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id
-+#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); })
-+#define READ_VAR(type, var) *var = READ_VAL(type)
-+
-+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg)
-+{
-+    READ_START(AAUDIO_MSG_START_IO_RESPONSE);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg)
-+{
-+    READ_START(AAUDIO_MSG_STOP_IO_RESPONSE);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
-+        u64 *timestamp, u64 *update_seed)
-+{
-+    READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP);
-+    READ_DEVID_VAR(devid);
-+    READ_VAR(u64, timestamp);
-+    READ_VAR(u64, update_seed);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop, void **data, u64 *data_size)
-+{
-+    READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE);
-+    READ_VAR(aaudio_object_id_t, obj);
-+    READ_VAR(u32, &prop->element);
-+    READ_VAR(u32, &prop->scope);
-+    READ_VAR(u32, &prop->selector);
-+    READ_VAR(u64, data_size);
-+    *data = ((u8 *) msg->data + offset);
-+    /* offset += data_size; */
-+    return 0;
-+}
-+
-+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj)
-+{
-+    READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE);
-+    READ_VAR(aaudio_object_id_t, obj);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop)
-+{
-+    READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE);
-+    READ_VAR(aaudio_object_id_t, obj);
-+    READ_VAR(u32, &prop->element);
-+    READ_VAR(u32, &prop->scope);
-+    READ_VAR(u32, &prop->selector);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop)
-+{
-+    READ_START(AAUDIO_MSG_PROPERTY_CHANGED);
-+    READ_DEVID_VAR(devid);
-+    READ_VAR(aaudio_object_id_t, obj);
-+    READ_VAR(u32, &prop->element);
-+    READ_VAR(u32, &prop->scope);
-+    READ_VAR(u32, &prop->selector);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg)
-+{
-+    READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
-+{
-+    READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE);
-+    READ_VAR(u64, str_cnt);
-+    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
-+    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
-+    return 0;
-+}
-+
-+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
-+{
-+    READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE);
-+    READ_VAR(u64, str_cnt);
-+    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
-+    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
-+    return 0;
-+}
-+
-+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg)
-+{
-+    READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE);
-+    return 0;
-+}
-+
-+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt)
-+{
-+    READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE);
-+    READ_VAR(u64, dev_cnt);
-+    *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
-+    /* offset += dev_cnt * sizeof(aaudio_device_id_t); */
-+    return 0;
-+}
-+
-+#define WRITE_START_OF_TYPE(typev, devid) \
-+    size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \
-+    ((struct aaudio_msg_header *) msg->data)->type = (typev); \
-+    ((struct aaudio_msg_header *) msg->data)->device_id = (devid);
-+#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid)
-+#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0)
-+#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0)
-+#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); }
-+#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; }
-+#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0)
-+#define WRITE_END() { msg->size = offset; }
-+
-+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
-+{
-+    WRITE_START_COMMAND(dev);
-+    WRITE_BASE(AAUDIO_MSG_START_IO);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
-+{
-+    WRITE_START_COMMAND(dev);
-+    WRITE_BASE(AAUDIO_MSG_STOP_IO);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size)
-+{
-+    WRITE_START_COMMAND(dev);
-+    WRITE_BASE(AAUDIO_MSG_GET_PROPERTY);
-+    WRITE_VAL(aaudio_object_id_t, obj);
-+    WRITE_VAL(u32, prop.element);
-+    WRITE_VAL(u32, prop.scope);
-+    WRITE_VAL(u32, prop.selector);
-+    WRITE_VAL(u64, qualifier_size);
-+    WRITE_BIN(qualifier, qualifier_size);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size)
-+{
-+    WRITE_START_COMMAND(dev);
-+    WRITE_BASE(AAUDIO_MSG_SET_PROPERTY);
-+    WRITE_VAL(aaudio_object_id_t, obj);
-+    WRITE_VAL(u32, prop.element);
-+    WRITE_VAL(u32, prop.scope);
-+    WRITE_VAL(u32, prop.selector);
-+    WRITE_VAL(u64, data_size);
-+    WRITE_BIN(data, data_size);
-+    WRITE_VAL(u64, qualifier_size);
-+    WRITE_BIN(qualifier, qualifier_size);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop)
-+{
-+    WRITE_START_COMMAND(dev);
-+    WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER);
-+    WRITE_VAL(aaudio_object_id_t, obj);
-+    WRITE_VAL(u32, prop.element);
-+    WRITE_VAL(u32, prop.scope);
-+    WRITE_VAL(u32, prop.selector);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid)
-+{
-+    WRITE_START_COMMAND(devid);
-+    WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
-+{
-+    WRITE_START_COMMAND(devid);
-+    WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
-+{
-+    WRITE_START_COMMAND(devid);
-+    WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode)
-+{
-+    WRITE_START_COMMAND(0);
-+    WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS);
-+    WRITE_VAL(u64, mode);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver)
-+{
-+    WRITE_START_NOTIFICATION();
-+    WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE);
-+    WRITE_VAL(u32, proto_ver);
-+    WRITE_VAL(u32, msg_ver);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg)
-+{
-+    WRITE_START_RESPONSE();
-+    WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE);
-+    WRITE_END();
-+}
-+
-+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg)
-+{
-+    WRITE_START_COMMAND(0);
-+    WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST);
-+    WRITE_END();
-+}
-+
-+#define CMD_SHARED_VARS_NO_REPLY \
-+    int status = 0; \
-+    struct aaudio_send_ctx sctx;
-+#define CMD_SHARED_VARS \
-+    CMD_SHARED_VARS_NO_REPLY \
-+    struct aaudio_msg reply = aaudio_reply_alloc(); \
-+    struct aaudio_msg *buf = &reply;
-+#define CMD_SEND_REQUEST(fn, ...) \
-+    if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \
-+        return status;
-+#define CMD_DEF_SHARED_AND_SEND(fn, ...) \
-+    CMD_SHARED_VARS \
-+    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
-+#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \
-+    CMD_SHARED_VARS_NO_REPLY \
-+    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
-+#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \
-+    status = fn(buf, ##__VA_ARGS__); \
-+    return status;
-+#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \
-+    status = fn(buf, ##__VA_ARGS__); \
-+    aaudio_reply_free(&reply); \
-+    return status;
-+
-+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response);
-+}
-+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response);
-+}
-+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
-+        aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size)
-+{
-+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size);
-+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size);
-+}
-+int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
-+        aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
-+{
-+    int status;
-+    struct aaudio_msg reply = aaudio_reply_alloc();
-+    void *r_data;
-+    u64 r_data_size;
-+    if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size,
-+            &r_data, &r_data_size)))
-+        goto finish;
-+    if (r_data_size != data_size) {
-+        status = -EINVAL;
-+        goto finish;
-+    }
-+    memcpy(data, r_data, data_size);
-+finish:
-+    aaudio_reply_free(&reply);
-+    return status;
-+}
-+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size,
-+            qualifier, qualifier_size);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj);
-+}
-+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop);
-+}
-+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response);
-+}
-+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
-+        aaudio_object_id_t **str_l, u64 *str_cnt)
-+{
-+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid);
-+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt);
-+}
-+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
-+        aaudio_object_id_t **str_l, u64 *str_cnt)
-+{
-+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid);
-+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt);
-+}
-+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode)
-+{
-+    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode);
-+    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response);
-+}
-+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
-+        aaudio_device_id_t **dev_l, u64 *dev_cnt)
-+{
-+    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list);
-+    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt);
-+}
-\ No newline at end of file
-diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h
-new file mode 100644
-index 000000000000..3427486f3f57
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/protocol.h
-@@ -0,0 +1,147 @@
-+#ifndef AAUDIO_PROTOCOL_H
-+#define AAUDIO_PROTOCOL_H
-+
-+#include <linux/types.h>
-+
-+struct aaudio_device;
-+
-+typedef u64 aaudio_device_id_t;
-+typedef u64 aaudio_object_id_t;
-+
-+struct aaudio_msg {
-+    void *data;
-+    size_t size;
-+};
-+
-+struct __attribute__((packed)) aaudio_msg_header {
-+    char tag[4];
-+    u8 type;
-+    aaudio_device_id_t device_id; // Idk, use zero for commands?
-+};
-+struct __attribute__((packed)) aaudio_msg_base {
-+    u32 msg;
-+    u32 status;
-+};
-+
-+struct aaudio_prop_addr {
-+    u32 scope;
-+    u32 selector;
-+    u32 element;
-+};
-+#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el }
-+
-+enum {
-+    AAUDIO_MSG_TYPE_COMMAND = 1,
-+    AAUDIO_MSG_TYPE_RESPONSE = 2,
-+    AAUDIO_MSG_TYPE_NOTIFICATION = 3
-+};
-+
-+enum {
-+    AAUDIO_MSG_START_IO = 0,
-+    AAUDIO_MSG_START_IO_RESPONSE = 1,
-+    AAUDIO_MSG_STOP_IO = 2,
-+    AAUDIO_MSG_STOP_IO_RESPONSE = 3,
-+    AAUDIO_MSG_UPDATE_TIMESTAMP = 4,
-+    AAUDIO_MSG_GET_PROPERTY = 7,
-+    AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8,
-+    AAUDIO_MSG_SET_PROPERTY = 9,
-+    AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10,
-+    AAUDIO_MSG_PROPERTY_LISTENER = 11,
-+    AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12,
-+    AAUDIO_MSG_PROPERTY_CHANGED = 13,
-+    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18,
-+    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19,
-+    AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24,
-+    AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25,
-+    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26,
-+    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27,
-+    AAUDIO_MSG_SET_REMOTE_ACCESS = 32,
-+    AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33,
-+    AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34,
-+
-+    AAUDIO_MSG_NOTIFICATION_ALIVE = 100,
-+    AAUDIO_MSG_GET_DEVICE_LIST = 101,
-+    AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102,
-+    AAUDIO_MSG_NOTIFICATION_BOOT = 104
-+};
-+
-+enum {
-+    AAUDIO_REMOTE_ACCESS_OFF = 0,
-+    AAUDIO_REMOTE_ACCESS_ON = 2
-+};
-+
-+enum {
-+    AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob'
-+    AAUDIO_PROP_SCOPE_INPUT  = 0x696e7074, // 'inpt'
-+    AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470  // 'outp'
-+};
-+
-+enum {
-+    AAUDIO_PROP_UID          = 0x75696420, // 'uid '
-+    AAUDIO_PROP_BOOL_VALUE   = 0x6263766c, // 'bcvl'
-+    AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack'
-+    AAUDIO_PROP_SEL_VOLUME   = 0x64656176, // 'deav'
-+    AAUDIO_PROP_LATENCY      = 0x6c746e63, // 'ltnc'
-+    AAUDIO_PROP_PHYS_FORMAT  = 0x70667420  // 'pft '
-+};
-+
-+int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base);
-+
-+int aaudio_msg_read_start_io_response(struct aaudio_msg *msg);
-+int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg);
-+int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
-+        u64 *timestamp, u64 *update_seed);
-+int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop, void **data, u64 *data_size);
-+int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj);
-+int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop);
-+int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
-+        struct aaudio_prop_addr *prop);
-+int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg);
-+int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
-+int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
-+int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg);
-+int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt);
-+
-+void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
-+void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
-+void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size);
-+void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size);
-+void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop);
-+void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid);
-+void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
-+void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
-+void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode);
-+void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver);
-+void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg);
-+void aaudio_msg_write_get_device_list(struct aaudio_msg *msg);
-+
-+
-+int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid);
-+int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid);
-+int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
-+        aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size);
-+int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
-+        aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
-+int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
-+int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
-+        struct aaudio_prop_addr prop);
-+int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid);
-+int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
-+        aaudio_object_id_t **str_l, u64 *str_cnt);
-+int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
-+        aaudio_object_id_t **str_l, u64 *str_cnt);
-+int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode);
-+int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
-+        aaudio_device_id_t **dev_l, u64 *dev_cnt);
-+
-+
-+
-+#endif //AAUDIO_PROTOCOL_H
-diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c
-new file mode 100644
-index 000000000000..28f2dfd44d67
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/protocol_bce.c
-@@ -0,0 +1,226 @@
-+#include "protocol_bce.h"
-+
-+#include "audio.h"
-+
-+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq);
-+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq);
-+static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
-+                                 bce_sq_completion cfn);
-+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count);
-+
-+int aaudio_bce_init(struct aaudio_device *dev)
-+{
-+    int status;
-+    struct aaudio_bce *bce = &dev->bcem;
-+    bce->cq = bce_create_cq(dev->bce, 0x80);
-+    spin_lock_init(&bce->spinlock);
-+    if (!bce->cq)
-+        return -EINVAL;
-+    if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE,
-+            aaudio_bce_out_queue_completion))) {
-+        return status;
-+    }
-+    if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE,
-+            aaudio_bce_in_queue_completion))) {
-+        return status;
-+    }
-+    aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count);
-+    return 0;
-+}
-+
-+int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
-+        bce_sq_completion cfn)
-+{
-+    q->cq = dev->bcem.cq;
-+    q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
-+    q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT;
-+    /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */
-+    q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev);
-+    if (!q->sq)
-+        return -EINVAL;
-+
-+    q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL);
-+    if (!q->data) {
-+        bce_destroy_sq(dev->bce, q->sq);
-+        return -EINVAL;
-+    }
-+    return 0;
-+}
-+
-+static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4])
-+{
-+    char tag_zero[5];
-+    b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT;
-+    *tagn = b->tag_num;
-+    snprintf(tag_zero, 5, "S%03d", b->tag_num);
-+    *((u32 *) tag) = *((u32 *) tag_zero);
-+}
-+
-+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag)
-+{
-+    int status;
-+    size_t index;
-+    void *dptr;
-+    struct aaudio_msg_header *header;
-+    if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout)))
-+        return status;
-+    spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
-+    index = b->qout.data_tail;
-+    dptr = (u8 *) b->qout.data + index * b->qout.el_size;
-+    ctx->msg.data = dptr;
-+    header = dptr;
-+    if (tag)
-+        *((u32 *) header->tag) = *((u32 *) tag);
-+    else
-+        aaudio_send_create_tag(b, &ctx->tag_n, header->tag);
-+    return 0;
-+}
-+
-+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx)
-+{
-+    struct bce_qe_submission *s = bce_next_submission(b->qout.sq);
-+#ifdef DEBUG
-+    pr_debug("aaudio: Sending command data\n");
-+    print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true);
-+#endif
-+    bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size);
-+    bce_submit_to_device(b->qout.sq);
-+    b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count;
-+    spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
-+}
-+
-+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply)
-+{
-+    struct aaudio_bce_queue_entry ent;
-+    DECLARE_COMPLETION_ONSTACK(cmpl);
-+    ent.msg = reply;
-+    ent.cmpl = &cmpl;
-+    b->pending_entries[ctx->tag_n] = &ent;
-+    __aaudio_send(b, ctx); /* unlocks the spinlock */
-+    ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout);
-+    if (ctx->timeout == 0) {
-+        /* Remove the pending queue entry; this will be normally handled by the completion route but
-+         * during a timeout it won't */
-+        spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
-+        if (b->pending_entries[ctx->tag_n] == &ent)
-+            b->pending_entries[ctx->tag_n] = NULL;
-+        spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
-+        return -ETIMEDOUT;
-+    }
-+    return 0;
-+}
-+
-+static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply)
-+{
-+    const char *tag;
-+    int tagn;
-+    unsigned long irq_flags;
-+    char tag_zero[5];
-+    struct aaudio_bce_queue_entry *entry;
-+
-+    tag = ((struct aaudio_msg_header *) reply->data)->tag;
-+    if (tag[0] != 'S') {
-+        pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag);
-+        return;
-+    }
-+    *((u32 *) tag_zero) = *((u32 *) tag);
-+    tag_zero[4] = 0;
-+    if (kstrtoint(&tag_zero[1], 10, &tagn)) {
-+        pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag);
-+        return;
-+    }
-+
-+    spin_lock_irqsave(&b->spinlock, irq_flags);
-+    entry = b->pending_entries[tagn];
-+    if (entry) {
-+        if (reply->size < entry->msg->size)
-+            entry->msg->size = reply->size;
-+        memcpy(entry->msg->data, reply->data, entry->msg->size);
-+        complete(entry->cmpl);
-+
-+        b->pending_entries[tagn] = NULL;
-+    } else {
-+        pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag);
-+    }
-+    spin_unlock_irqrestore(&b->spinlock, irq_flags);
-+}
-+
-+static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq)
-+{
-+    while (bce_next_completion(sq)) {
-+        //pr_info("aaudio: Send confirmed\n");
-+        bce_notify_submission_complete(sq);
-+    }
-+}
-+
-+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg);
-+
-+static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq)
-+{
-+    struct aaudio_msg msg;
-+    struct aaudio_device *dev = sq->userdata;
-+    struct aaudio_bce_queue *q = &dev->bcem.qin;
-+    struct bce_sq_completion_data *c;
-+    size_t cnt = 0;
-+
-+    mb();
-+    while ((c = bce_next_completion(sq))) {
-+        msg.data = (u8 *) q->data + q->data_head * q->el_size;
-+        msg.size = c->data_size;
-+#ifdef DEBUG
-+        pr_debug("aaudio: Received command data %llx\n", c->data_size);
-+        print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true);
-+#endif
-+        aaudio_bce_in_queue_handle_msg(dev, &msg);
-+
-+        q->data_head = (q->data_head + 1) % q->el_count;
-+
-+        bce_notify_submission_complete(sq);
-+        ++cnt;
-+    }
-+    aaudio_bce_in_queue_submit_pending(q, cnt);
-+}
-+
-+static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg)
-+{
-+    struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data;
-+    if (msg->size < sizeof(struct aaudio_msg_header)) {
-+        pr_err("aaudio: Msg size smaller than header (%lx)", msg->size);
-+        return;
-+    }
-+    if (header->type == AAUDIO_MSG_TYPE_RESPONSE) {
-+        aaudio_handle_reply(&a->bcem, msg);
-+    } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) {
-+        aaudio_handle_command(a, msg);
-+    } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) {
-+        aaudio_handle_notification(a, msg);
-+    }
-+}
-+
-+void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count)
-+{
-+    struct bce_qe_submission *s;
-+    while (count--) {
-+        if (bce_reserve_submission(q->sq, NULL)) {
-+            pr_err("aaudio: Failed to reserve an event queue submission\n");
-+            break;
-+        }
-+        s = bce_next_submission(q->sq);
-+        bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size);
-+        q->data_tail = (q->data_tail + 1) % q->el_count;
-+    }
-+    bce_submit_to_device(q->sq);
-+}
-+
-+struct aaudio_msg aaudio_reply_alloc(void)
-+{
-+    struct aaudio_msg ret;
-+    ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
-+    ret.data = kmalloc(ret.size, GFP_KERNEL);
-+    return ret;
-+}
-+
-+void aaudio_reply_free(struct aaudio_msg *reply)
-+{
-+    kfree(reply->data);
-+}
-diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h
-new file mode 100644
-index 000000000000..14d26c05ddf9
---- /dev/null
-+++ b/drivers/staging/apple-bce/audio/protocol_bce.h
-@@ -0,0 +1,72 @@
-+#ifndef AAUDIO_PROTOCOL_BCE_H
-+#define AAUDIO_PROTOCOL_BCE_H
-+
-+#include "protocol.h"
-+#include "../queue.h"
-+
-+#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000
-+#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20
-+
-+#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000
-+
-+struct aaudio_device;
-+
-+struct aaudio_bce_queue_entry {
-+    struct aaudio_msg *msg;
-+    struct completion *cmpl;
-+};
-+struct aaudio_bce_queue {
-+    struct bce_queue_cq *cq;
-+    struct bce_queue_sq *sq;
-+    void *data;
-+    dma_addr_t dma_addr;
-+    size_t data_head, data_tail;
-+    size_t el_size, el_count;
-+};
-+struct aaudio_bce {
-+    struct bce_queue_cq *cq;
-+    struct aaudio_bce_queue qin;
-+    struct aaudio_bce_queue qout;
-+    int tag_num;
-+    struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT];
-+    struct spinlock spinlock;
-+};
-+
-+struct aaudio_send_ctx {
-+    int status;
-+    int tag_n;
-+    unsigned long irq_flags;
-+    struct aaudio_msg msg;
-+    unsigned long timeout;
-+};
-+
-+int aaudio_bce_init(struct aaudio_device *dev);
-+int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag);
-+void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx);
-+int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply);
-+
-+#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \
-+    (ctx)->timeout = msecs_to_jiffies(tout); \
-+    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \
-+    if (!(ctx)->status) { \
-+        fn(&(ctx)->msg, ##__VA_ARGS__); \
-+        __aaudio_send(&(a)->bcem, (ctx)); \
-+    } \
-+    (ctx)->status; \
-+})
-+#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__)
-+
-+#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \
-+    (ctx)->timeout = msecs_to_jiffies(tout); \
-+    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \
-+    if (!(ctx)->status) { \
-+        fn(&(ctx)->msg, ##__VA_ARGS__); \
-+        (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \
-+    } \
-+    (ctx)->status; \
-+})
-+
-+struct aaudio_msg aaudio_reply_alloc(void);
-+void aaudio_reply_free(struct aaudio_msg *reply);
-+
-+#endif //AAUDIO_PROTOCOL_BCE_H
-diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c
-new file mode 100644
-index 000000000000..e24bd35215c0
---- /dev/null
-+++ b/drivers/staging/apple-bce/mailbox.c
-@@ -0,0 +1,151 @@
-+#include "mailbox.h"
-+#include <linux/atomic.h>
-+#include "apple_bce.h"
-+
-+#define REG_MBOX_OUT_BASE 0x820
-+#define REG_MBOX_REPLY_COUNTER 0x108
-+#define REG_MBOX_REPLY_BASE 0x810
-+#define REG_TIMESTAMP_BASE 0xC000
-+
-+#define BCE_MBOX_TIMEOUT_MS 200
-+
-+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb)
-+{
-+    mb->reg_mb = reg_mb;
-+    init_completion(&mb->mb_completion);
-+}
-+
-+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv)
-+{
-+    u32 __iomem *regb;
-+
-+    if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) {
-+        return -EEXIST; // We don't support two messages at once
-+    }
-+    reinit_completion(&mb->mb_completion);
-+
-+    pr_debug("bce_mailbox_send: %llx\n", msg);
-+    regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE);
-+    iowrite32((u32) msg, regb);
-+    iowrite32((u32) (msg >> 32), regb + 1);
-+    iowrite32(0, regb + 2);
-+    iowrite32(0, regb + 3);
-+
-+    wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS));
-+    if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply
-+        atomic_set(&mb->mb_status, 0);
-+        return -ETIMEDOUT;
-+    }
-+
-+    *recv = mb->mb_result;
-+    pr_debug("bce_mailbox_send: reply %llx\n", *recv);
-+
-+    atomic_set(&mb->mb_status, 0);
-+    return 0;
-+}
-+
-+static int bce_mailbox_retrive_response(struct bce_mailbox *mb)
-+{
-+    u32 __iomem *regb;
-+    u32 lo, hi;
-+    int count, counter;
-+    u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER);
-+    count = (res >> 20) & 0xf;
-+    counter = count;
-+    pr_debug("bce_mailbox_retrive_response count=%i\n", count);
-+    while (counter--) {
-+        regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE);
-+        lo = ioread32(regb);
-+        hi = ioread32(regb + 1);
-+        ioread32(regb + 2);
-+        ioread32(regb + 3);
-+        pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo);
-+        mb->mb_result = ((u64) hi << 32) | lo;
-+    }
-+    return count > 0 ? 0 : -ENODATA;
-+}
-+
-+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb)
-+{
-+    int status = bce_mailbox_retrive_response(mb);
-+    if (!status) {
-+        atomic_set(&mb->mb_status, 2);
-+        complete(&mb->mb_completion);
-+    }
-+    return status;
-+}
-+
-+static void bc_send_timestamp(struct timer_list *tl);
-+
-+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg)
-+{
-+    u32 __iomem *regb;
-+
-+    spin_lock_init(&ts->stop_sl);
-+    ts->stopped = false;
-+
-+    ts->reg = reg;
-+
-+    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
-+
-+    ioread32(regb);
-+    mb();
-+
-+    timer_setup(&ts->timer, bc_send_timestamp, 0);
-+}
-+
-+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial)
-+{
-+    unsigned long flags;
-+    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
-+
-+    if (is_initial) {
-+        iowrite32((u32) -4, regb + 2);
-+        iowrite32((u32) -1, regb);
-+    } else {
-+        iowrite32((u32) -3, regb + 2);
-+        iowrite32((u32) -1, regb);
-+    }
-+
-+    spin_lock_irqsave(&ts->stop_sl, flags);
-+    ts->stopped = false;
-+    spin_unlock_irqrestore(&ts->stop_sl, flags);
-+    mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
-+}
-+
-+void bce_timestamp_stop(struct bce_timestamp *ts)
-+{
-+    unsigned long flags;
-+    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
-+
-+    spin_lock_irqsave(&ts->stop_sl, flags);
-+    ts->stopped = true;
-+    spin_unlock_irqrestore(&ts->stop_sl, flags);
-+    del_timer_sync(&ts->timer);
-+
-+    iowrite32((u32) -2, regb + 2);
-+    iowrite32((u32) -1, regb);
-+}
-+
-+static void bc_send_timestamp(struct timer_list *tl)
-+{
-+    struct bce_timestamp *ts;
-+    unsigned long flags;
-+    u32 __iomem *regb;
-+    ktime_t bt;
-+
-+    ts = container_of(tl, struct bce_timestamp, timer);
-+    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
-+    local_irq_save(flags);
-+    ioread32(regb + 2);
-+    mb();
-+    bt = ktime_get_boottime();
-+    iowrite32((u32) bt, regb + 2);
-+    iowrite32((u32) (bt >> 32), regb);
-+
-+    spin_lock(&ts->stop_sl);
-+    if (!ts->stopped)
-+        mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
-+    spin_unlock(&ts->stop_sl);
-+    local_irq_restore(flags);
-+}
-\ No newline at end of file
-diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h
-new file mode 100644
-index 000000000000..f3323f95ba51
---- /dev/null
-+++ b/drivers/staging/apple-bce/mailbox.h
-@@ -0,0 +1,53 @@
-+#ifndef BCE_MAILBOX_H
-+#define BCE_MAILBOX_H
-+
-+#include <linux/completion.h>
-+#include <linux/pci.h>
-+#include <linux/timer.h>
-+
-+struct bce_mailbox {
-+    void __iomem *reg_mb;
-+
-+    atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply)
-+    struct completion mb_completion;
-+    uint64_t mb_result;
-+};
-+
-+enum bce_message_type {
-+    BCE_MB_REGISTER_COMMAND_SQ = 0x7,            // to-device
-+    BCE_MB_REGISTER_COMMAND_CQ = 0x8,            // to-device
-+    BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB,   // to-host
-+    BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC,        // both
-+    BCE_MB_SLEEP_NO_STATE = 0x14,                // to-device
-+    BCE_MB_RESTORE_NO_STATE = 0x15,              // to-device
-+    BCE_MB_SAVE_STATE_AND_SLEEP = 0x17,          // to-device
-+    BCE_MB_RESTORE_STATE_AND_WAKE = 0x18,        // to-device
-+    BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19,  // from-device
-+    BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A,   // from-device
-+};
-+
-+#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL))
-+#define BCE_MB_TYPE(v) ((u32) (v >> 58))
-+#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL)
-+
-+void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb);
-+
-+int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv);
-+
-+int bce_mailbox_handle_interrupt(struct bce_mailbox *mb);
-+
-+
-+struct bce_timestamp {
-+    void __iomem *reg;
-+    struct timer_list timer;
-+    struct spinlock stop_sl;
-+    bool stopped;
-+};
-+
-+void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg);
-+
-+void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial);
-+
-+void bce_timestamp_stop(struct bce_timestamp *ts);
-+
-+#endif //BCEDRIVER_MAILBOX_H
-diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c
-new file mode 100644
-index 000000000000..bc9cd3bc6f0c
---- /dev/null
-+++ b/drivers/staging/apple-bce/queue.c
-@@ -0,0 +1,390 @@
-+#include "queue.h"
-+#include "apple_bce.h"
-+
-+#define REG_DOORBELL_BASE 0x44000
-+
-+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count)
-+{
-+    struct bce_queue_cq *q;
-+    q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL);
-+    q->qid = qid;
-+    q->type = BCE_QUEUE_CQ;
-+    q->el_count = el_count;
-+    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion),
-+            &q->dma_handle, GFP_KERNEL);
-+    if (!q->data) {
-+        pr_err("DMA queue memory alloc failed\n");
-+        kfree(q);
-+        return NULL;
-+    }
-+    return q;
-+}
-+
-+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
-+{
-+    cfg->qid = (u16) cq->qid;
-+    cfg->el_count = (u16) cq->el_count;
-+    cfg->vector_or_cq = 0;
-+    cfg->_pad = 0;
-+    cfg->addr = cq->dma_handle;
-+    cfg->length = cq->el_count * sizeof(struct bce_qe_completion);
-+}
-+
-+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
-+{
-+    dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle);
-+    kfree(cq);
-+}
-+
-+static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce)
-+{
-+    struct bce_queue *target;
-+    struct bce_queue_sq *target_sq;
-+    struct bce_sq_completion_data *cmpl;
-+    if (e->qid >= BCE_MAX_QUEUE_COUNT) {
-+        pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid);
-+        return;
-+    }
-+    target = dev->queues[e->qid];
-+    if (!target || target->type != BCE_QUEUE_SQ) {
-+        pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid);
-+        return;
-+    }
-+    target_sq = (struct bce_queue_sq *) target;
-+    if (target_sq->completion_tail != e->completion_index) {
-+        pr_err("Completion index mismatch; this is likely going to make this driver unusable\n");
-+        return;
-+    }
-+    if (!target_sq->has_pending_completions) {
-+        target_sq->has_pending_completions = true;
-+        dev->int_sq_list[(*ce)++] = target_sq;
-+    }
-+    cmpl = &target_sq->completion_data[e->completion_index];
-+    cmpl->status = e->status;
-+    cmpl->data_size = e->data_size;
-+    cmpl->result = e->result;
-+    wmb();
-+    target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count;
-+}
-+
-+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq)
-+{
-+    size_t ce = 0;
-+    struct bce_qe_completion *e;
-+    struct bce_queue_sq *sq;
-+    e = bce_cq_element(cq, cq->index);
-+    if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
-+        return;
-+    mb();
-+    while (true) {
-+        e = bce_cq_element(cq, cq->index);
-+        if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
-+            break;
-+        // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result);
-+        bce_handle_cq_completion(dev, e, &ce);
-+        e->flags = 0;
-+        cq->index = (cq->index + 1) % cq->el_count;
-+    }
-+    mb();
-+    iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma +  REG_DOORBELL_BASE) + cq->qid);
-+    while (ce) {
-+        --ce;
-+        sq = dev->int_sq_list[ce];
-+        sq->completion(sq);
-+        sq->has_pending_completions = false;
-+    }
-+}
-+
-+
-+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
-+        bce_sq_completion compl, void *userdata)
-+{
-+    struct bce_queue_sq *q;
-+    q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL);
-+    q->qid = qid;
-+    q->type = BCE_QUEUE_SQ;
-+    q->el_size = el_size;
-+    q->el_count = el_count;
-+    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size,
-+                                 &q->dma_handle, GFP_KERNEL);
-+    q->completion = compl;
-+    q->userdata = userdata;
-+    q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL);
-+    q->reg_mem_dma = dev->reg_mem_dma;
-+    atomic_set(&q->available_commands, el_count - 1);
-+    init_completion(&q->available_command_completion);
-+    atomic_set(&q->available_command_completion_waiting_count, 0);
-+    if (!q->data) {
-+        pr_err("DMA queue memory alloc failed\n");
-+        kfree(q);
-+        return NULL;
-+    }
-+    return q;
-+}
-+
-+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
-+{
-+    cfg->qid = (u16) sq->qid;
-+    cfg->el_count = (u16) sq->el_count;
-+    cfg->vector_or_cq = (u16) cq->qid;
-+    cfg->_pad = 0;
-+    cfg->addr = sq->dma_handle;
-+    cfg->length = sq->el_count * sq->el_size;
-+}
-+
-+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
-+{
-+    dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle);
-+    kfree(sq);
-+}
-+
-+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout)
-+{
-+    while (atomic_dec_if_positive(&sq->available_commands) < 0) {
-+        if (!timeout || !*timeout)
-+            return -EAGAIN;
-+        atomic_inc(&sq->available_command_completion_waiting_count);
-+        *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout);
-+        if (!*timeout) {
-+            if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0)
-+                try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */
-+        }
-+    }
-+    return 0;
-+}
-+
-+void bce_cancel_submission_reservation(struct bce_queue_sq *sq)
-+{
-+    atomic_inc(&sq->available_commands);
-+}
-+
-+void *bce_next_submission(struct bce_queue_sq *sq)
-+{
-+    void *ret = bce_sq_element(sq, sq->tail);
-+    sq->tail = (sq->tail + 1) % sq->el_count;
-+    return ret;
-+}
-+
-+void bce_submit_to_device(struct bce_queue_sq *sq)
-+{
-+    mb();
-+    iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma +  REG_DOORBELL_BASE) + sq->qid);
-+}
-+
-+void bce_notify_submission_complete(struct bce_queue_sq *sq)
-+{
-+    sq->head = (sq->head + 1) % sq->el_count;
-+    atomic_inc(&sq->available_commands);
-+    if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) {
-+        complete(&sq->available_command_completion);
-+    }
-+}
-+
-+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size)
-+{
-+    element->addr = addr;
-+    element->length = size;
-+    element->segl_addr = element->segl_length = 0;
-+}
-+
-+static void bce_cmdq_completion(struct bce_queue_sq *q);
-+
-+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count)
-+{
-+    struct bce_queue_cmdq *q;
-+    q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL);
-+    q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q);
-+    if (!q->sq) {
-+        kfree(q);
-+        return NULL;
-+    }
-+    spin_lock_init(&q->lck);
-+    q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL);
-+    if (!q->tres) {
-+        kfree(q);
-+        return NULL;
-+    }
-+    return q;
-+}
-+
-+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq)
-+{
-+    bce_free_sq(dev, cmdq->sq);
-+    kfree(cmdq->tres);
-+    kfree(cmdq);
-+}
-+
-+void bce_cmdq_completion(struct bce_queue_sq *q)
-+{
-+    struct bce_queue_cmdq_result_el *el;
-+    struct bce_queue_cmdq *cmdq = q->userdata;
-+    struct bce_sq_completion_data *result;
-+
-+    spin_lock(&cmdq->lck);
-+    while ((result = bce_next_completion(q))) {
-+        el = cmdq->tres[cmdq->sq->head];
-+        if (el) {
-+            el->result = result->result;
-+            el->status = result->status;
-+            mb();
-+            complete(&el->cmpl);
-+        } else {
-+            pr_err("apple-bce: Unexpected command queue completion\n");
-+        }
-+        cmdq->tres[cmdq->sq->head] = NULL;
-+        bce_notify_submission_complete(q);
-+    }
-+    spin_unlock(&cmdq->lck);
-+}
-+
-+static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
-+{
-+    void *ret;
-+    unsigned long timeout;
-+    init_completion(&res->cmpl);
-+    mb();
-+
-+    timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */
-+    if (bce_reserve_submission(cmdq->sq, &timeout))
-+        return NULL;
-+
-+    spin_lock(&cmdq->lck);
-+    cmdq->tres[cmdq->sq->tail] = res;
-+    ret = bce_next_submission(cmdq->sq);
-+    return ret;
-+}
-+
-+static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
-+{
-+    bce_submit_to_device(cmdq->sq);
-+    spin_unlock(&cmdq->lck);
-+
-+    wait_for_completion(&res->cmpl);
-+    mb();
-+}
-+
-+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout)
-+{
-+    struct bce_queue_cmdq_result_el res;
-+    struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
-+    if (!cmd)
-+        return (u32) -1;
-+    cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE;
-+    cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0));
-+    cmd->qid = cfg->qid;
-+    cmd->el_count = cfg->el_count;
-+    cmd->vector_or_cq = cfg->vector_or_cq;
-+    memset(cmd->name, 0, sizeof(cmd->name));
-+    if (name) {
-+        cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name));
-+        memcpy(cmd->name, name, cmd->name_len);
-+    } else {
-+        cmd->name_len = 0;
-+    }
-+    cmd->addr = cfg->addr;
-+    cmd->length = cfg->length;
-+
-+    bce_cmd_finish(cmdq, &res);
-+    return res.status;
-+}
-+
-+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
-+{
-+    struct bce_queue_cmdq_result_el res;
-+    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
-+    if (!cmd)
-+        return (u32) -1;
-+    cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE;
-+    cmd->flags = 0;
-+    cmd->qid = qid;
-+    bce_cmd_finish(cmdq, &res);
-+    return res.status;
-+}
-+
-+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
-+{
-+    struct bce_queue_cmdq_result_el res;
-+    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
-+    if (!cmd)
-+        return (u32) -1;
-+    cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE;
-+    cmd->flags = 0;
-+    cmd->qid = qid;
-+    bce_cmd_finish(cmdq, &res);
-+    return res.status;
-+}
-+
-+
-+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count)
-+{
-+    struct bce_queue_cq *cq;
-+    struct bce_queue_memcfg cfg;
-+    int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
-+    if (qid < 0)
-+        return NULL;
-+    cq = bce_alloc_cq(dev, qid, el_count);
-+    if (!cq)
-+        return NULL;
-+    bce_get_cq_memcfg(cq, &cfg);
-+    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) {
-+        pr_err("apple-bce: CQ registration failed (%i)", qid);
-+        bce_free_cq(dev, cq);
-+        ida_simple_remove(&dev->queue_ida, (uint) qid);
-+        return NULL;
-+    }
-+    dev->queues[qid] = (struct bce_queue *) cq;
-+    return cq;
-+}
-+
-+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
-+        int direction, bce_sq_completion compl, void *userdata)
-+{
-+    struct bce_queue_sq *sq;
-+    struct bce_queue_memcfg cfg;
-+    int qid;
-+    if (cq == NULL)
-+        return NULL; /* cq can not be null */
-+    if (name == NULL)
-+        return NULL; /* name can not be null */
-+    if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE)
-+        return NULL; /* unsupported direction */
-+    qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
-+    if (qid < 0)
-+        return NULL;
-+    sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata);
-+    if (!sq)
-+        return NULL;
-+    bce_get_sq_memcfg(sq, cq, &cfg);
-+    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) {
-+        pr_err("apple-bce: SQ registration failed (%i)", qid);
-+        bce_free_sq(dev, sq);
-+        ida_simple_remove(&dev->queue_ida, (uint) qid);
-+        return NULL;
-+    }
-+    spin_lock(&dev->queues_lock);
-+    dev->queues[qid] = (struct bce_queue *) sq;
-+    spin_unlock(&dev->queues_lock);
-+    return sq;
-+}
-+
-+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
-+{
-+    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid))
-+        pr_err("apple-bce: CQ unregister failed");
-+    spin_lock(&dev->queues_lock);
-+    dev->queues[cq->qid] = NULL;
-+    spin_unlock(&dev->queues_lock);
-+    ida_simple_remove(&dev->queue_ida, (uint) cq->qid);
-+    bce_free_cq(dev, cq);
-+}
-+
-+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
-+{
-+    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid))
-+        pr_err("apple-bce: CQ unregister failed");
-+    spin_lock(&dev->queues_lock);
-+    dev->queues[sq->qid] = NULL;
-+    spin_unlock(&dev->queues_lock);
-+    ida_simple_remove(&dev->queue_ida, (uint) sq->qid);
-+    bce_free_sq(dev, sq);
-+}
-\ No newline at end of file
-diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h
-new file mode 100644
-index 000000000000..8368ac5dfca8
---- /dev/null
-+++ b/drivers/staging/apple-bce/queue.h
-@@ -0,0 +1,177 @@
-+#ifndef BCE_QUEUE_H
-+#define BCE_QUEUE_H
-+
-+#include <linux/completion.h>
-+#include <linux/pci.h>
-+
-+#define BCE_CMD_SIZE 0x40
-+
-+struct apple_bce_device;
-+
-+enum bce_queue_type {
-+    BCE_QUEUE_CQ, BCE_QUEUE_SQ
-+};
-+struct bce_queue {
-+    int qid;
-+    int type;
-+};
-+struct bce_queue_cq {
-+    int qid;
-+    int type;
-+    u32 el_count;
-+    dma_addr_t dma_handle;
-+    void *data;
-+
-+    u32 index;
-+};
-+struct bce_queue_sq;
-+typedef void (*bce_sq_completion)(struct bce_queue_sq *q);
-+struct bce_sq_completion_data {
-+    u32 status;
-+    u64 data_size;
-+    u64 result;
-+};
-+struct bce_queue_sq {
-+    int qid;
-+    int type;
-+    u32 el_size;
-+    u32 el_count;
-+    dma_addr_t dma_handle;
-+    void *data;
-+    void *userdata;
-+    void __iomem *reg_mem_dma;
-+
-+    atomic_t available_commands;
-+    struct completion available_command_completion;
-+    atomic_t available_command_completion_waiting_count;
-+    u32 head, tail;
-+
-+    u32 completion_cidx, completion_tail;
-+    struct bce_sq_completion_data *completion_data;
-+    bool has_pending_completions;
-+    bce_sq_completion completion;
-+};
-+
-+struct bce_queue_cmdq_result_el {
-+    struct completion cmpl;
-+    u32 status;
-+    u64 result;
-+};
-+struct bce_queue_cmdq {
-+    struct bce_queue_sq *sq;
-+    struct spinlock lck;
-+    struct bce_queue_cmdq_result_el **tres;
-+};
-+
-+struct bce_queue_memcfg {
-+    u16 qid;
-+    u16 el_count;
-+    u16 vector_or_cq;
-+    u16 _pad;
-+    u64 addr;
-+    u64 length;
-+};
-+
-+enum bce_qe_completion_status {
-+    BCE_COMPLETION_SUCCESS = 0,
-+    BCE_COMPLETION_ERROR = 1,
-+    BCE_COMPLETION_ABORTED = 2,
-+    BCE_COMPLETION_NO_SPACE = 3,
-+    BCE_COMPLETION_OVERRUN = 4
-+};
-+enum bce_qe_completion_flags {
-+    BCE_COMPLETION_FLAG_PENDING = 0x8000
-+};
-+struct bce_qe_completion {
-+    u64 result;
-+    u64 data_size;
-+    u16 qid;
-+    u16 completion_index;
-+    u16 status; // bce_qe_completion_status
-+    u16 flags;  // bce_qe_completion_flags
-+};
-+
-+struct bce_qe_submission {
-+    u64 length;
-+    u64 addr;
-+
-+    u64 segl_addr;
-+    u64 segl_length;
-+};
-+
-+enum bce_cmdq_command {
-+    BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20,
-+    BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30,
-+    BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40,
-+    BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50
-+};
-+struct bce_cmdq_simple_memory_queue_cmd {
-+    u16 cmd; // bce_cmdq_command
-+    u16 flags;
-+    u16 qid;
-+};
-+struct bce_cmdq_register_memory_queue_cmd {
-+    u16 cmd; // bce_cmdq_command
-+    u16 flags;
-+    u16 qid;
-+    u16 _pad;
-+    u16 el_count;
-+    u16 vector_or_cq;
-+    u16 _pad2;
-+    u16 name_len;
-+    char name[0x20];
-+    u64 addr;
-+    u64 length;
-+};
-+
-+static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) {
-+    return (void *) ((u8 *) q->data + q->el_size * i);
-+}
-+static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) {
-+    return (void *) ((struct bce_qe_completion *) q->data + i);
-+}
-+
-+static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) {
-+    struct bce_sq_completion_data *res;
-+    rmb();
-+    if (sq->completion_cidx == sq->completion_tail)
-+        return NULL;
-+    res = &sq->completion_data[sq->completion_cidx];
-+    sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count;
-+    return res;
-+}
-+
-+struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count);
-+void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
-+void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
-+void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq);
-+
-+struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
-+        bce_sq_completion compl, void *userdata);
-+void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
-+void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
-+int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout);
-+void bce_cancel_submission_reservation(struct bce_queue_sq *sq);
-+void *bce_next_submission(struct bce_queue_sq *sq);
-+void bce_submit_to_device(struct bce_queue_sq *sq);
-+void bce_notify_submission_complete(struct bce_queue_sq *sq);
-+
-+void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size);
-+
-+struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count);
-+void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq);
-+
-+u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout);
-+u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
-+u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
-+
-+
-+/* User API - Creates and registers the queue */
-+
-+struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count);
-+struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
-+        int direction, bce_sq_completion compl, void *userdata);
-+void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
-+void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
-+
-+#endif //BCEDRIVER_MAILBOX_H
-diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c
-new file mode 100644
-index 000000000000..b236613285c0
---- /dev/null
-+++ b/drivers/staging/apple-bce/queue_dma.c
-@@ -0,0 +1,220 @@
-+#include "queue_dma.h"
-+#include <linux/vmalloc.h>
-+#include <linux/mm.h>
-+#include "queue.h"
-+
-+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len);
-+static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
-+        struct device *dev, struct scatterlist *pages, int pagen);
-+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list);
-+
-+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
-+        enum dma_data_direction dir)
-+{
-+    int cnt;
-+
-+    buf->direction = dir;
-+    buf->scatterlist = scatterlist;
-+    buf->seglist_hostinfo = NULL;
-+
-+    cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
-+    if (cnt != buf->scatterlist.nents) {
-+        pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt);
-+        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
-+        return -EIO;
-+    }
-+    if (cnt == 1)
-+        return 0;
-+
-+    buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents);
-+    if (!buf->seglist_hostinfo) {
-+        pr_err("apple-bce: Creating segment list failed\n");
-+        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
-+        return -EIO;
-+    }
-+    return 0;
-+}
-+
-+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
-+                          enum dma_data_direction dir)
-+{
-+    int status;
-+    struct sg_table scatterlist;
-+    if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len)))
-+        return status;
-+    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
-+        sg_free_table(&scatterlist);
-+        return status;
-+    }
-+    return 0;
-+}
-+
-+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
-+                          enum dma_data_direction dir)
-+{
-+    /* Kernel memory is continuous which is great for us. */
-+    int status;
-+    struct sg_table scatterlist;
-+    if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) {
-+        sg_free_table(&scatterlist);
-+        return status;
-+    }
-+    sg_set_buf(scatterlist.sgl, data, (uint) len);
-+    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
-+        sg_free_table(&scatterlist);
-+        return status;
-+    }
-+    return 0;
-+}
-+
-+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf)
-+{
-+    dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction);
-+    bce_unmap_segement_list(dev, buf->seglist_hostinfo);
-+}
-+
-+
-+static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len)
-+{
-+    int status, i;
-+    struct page **pages;
-+    size_t off, start_page, end_page, page_count;
-+    off        = (size_t) data % PAGE_SIZE;
-+    start_page = (size_t) data  / PAGE_SIZE;
-+    end_page   = ((size_t) data + len - 1) / PAGE_SIZE;
-+    page_count = end_page - start_page + 1;
-+
-+    if (page_count > PAGE_SIZE / sizeof(struct page *))
-+        pages = vmalloc(page_count * sizeof(struct page *));
-+    else
-+        pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL);
-+
-+    for (i = 0; i < page_count; i++)
-+        pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE));
-+
-+    if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) {
-+        sg_free_table(tbl);
-+    }
-+
-+    if (page_count > PAGE_SIZE / sizeof(struct page *))
-+        vfree(pages);
-+    else
-+        kfree(pages);
-+    return status;
-+}
-+
-+#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \
-+                               / sizeof(struct bce_segment_list_element))
-+#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element))
-+
-+static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
-+        struct device *dev, struct scatterlist *pages, int pagen)
-+{
-+    size_t ptr, pptr = 0;
-+    struct bce_segment_list_header theader; /* a temp header, to store the initial seg */
-+    struct bce_segment_list_header *header;
-+    struct bce_segment_list_element *el, *el_end;
-+    struct bce_segment_list_element_hostinfo *out, *pout, *out_root;
-+    struct scatterlist *sg;
-+    int i;
-+    header = &theader;
-+    out = out_root = NULL;
-+    el = el_end = NULL;
-+    for_each_sg(pages, sg, pagen, i) {
-+        if (el >= el_end) {
-+            /* allocate a new page, this will be also done for the first element */
-+            ptr = __get_free_page(GFP_KERNEL);
-+            if (pptr && ptr == pptr + PAGE_SIZE) {
-+                out->page_count++;
-+                header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
-+                el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
-+            } else {
-+                header = (void *) ptr;
-+                header->element_count = BCE_ELEMENTS_PER_PAGE;
-+                header->data_size = 0;
-+                header->next_segl_addr = 0;
-+                header->next_segl_length = 0;
-+                el = (void *) (header + 1);
-+                el_end = el + BCE_ELEMENTS_PER_PAGE;
-+
-+                if (out) {
-+                    out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
-+                    out = out->next;
-+                } else {
-+                    out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
-+                }
-+                out->page_start = (void *) ptr;
-+                out->page_count = 1;
-+                out->dma_start = DMA_MAPPING_ERROR;
-+                out->next = NULL;
-+            }
-+            pptr = ptr;
-+        }
-+        el->addr = sg->dma_address;
-+        el->length = sg->length;
-+        header->data_size += el->length;
-+    }
-+
-+    /* DMA map */
-+    out = out_root;
-+    pout = NULL;
-+    while (out) {
-+        out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE);
-+        if (dma_mapping_error(dev, out->dma_start))
-+            goto error;
-+        if (pout) {
-+            header = pout->page_start;
-+            header->next_segl_addr = out->dma_start;
-+            header->next_segl_length = out->page_count * PAGE_SIZE;
-+        }
-+        pout = out;
-+        out = out->next;
-+    }
-+    return out_root;
-+
-+    error:
-+    bce_unmap_segement_list(dev, out_root);
-+    return NULL;
-+}
-+
-+static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list)
-+{
-+    struct bce_segment_list_element_hostinfo *next;
-+    while (list) {
-+        if (list->dma_start != DMA_MAPPING_ERROR)
-+            dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE);
-+        next = list->next;
-+        kfree(list);
-+        list = next;
-+    }
-+}
-+
-+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length)
-+{
-+    struct bce_segment_list_element_hostinfo *seg;
-+    struct bce_segment_list_header *seg_header;
-+
-+    seg = buf->seglist_hostinfo;
-+    if (!seg) {
-+        element->addr = buf->scatterlist.sgl->dma_address + offset;
-+        element->length = length;
-+        element->segl_addr = 0;
-+        element->segl_length = 0;
-+        return 0;
-+    }
-+
-+    while (seg) {
-+        seg_header = seg->page_start;
-+        if (offset <= seg_header->data_size)
-+            break;
-+        offset -= seg_header->data_size;
-+        seg = seg->next;
-+    }
-+    if (!seg)
-+        return -EINVAL;
-+    element->addr = offset;
-+    element->length = buf->scatterlist.sgl->dma_length;
-+    element->segl_addr = seg->dma_start;
-+    element->segl_length = seg->page_count * PAGE_SIZE;
-+    return 0;
-+}
-\ No newline at end of file
-diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h
-new file mode 100644
-index 000000000000..f8a57e50e7a3
---- /dev/null
-+++ b/drivers/staging/apple-bce/queue_dma.h
-@@ -0,0 +1,50 @@
-+#ifndef BCE_QUEUE_DMA_H
-+#define BCE_QUEUE_DMA_H
-+
-+#include <linux/pci.h>
-+
-+struct bce_qe_submission;
-+
-+struct bce_segment_list_header {
-+    u64 element_count;
-+    u64 data_size;
-+
-+    u64 next_segl_addr;
-+    u64 next_segl_length;
-+};
-+struct bce_segment_list_element {
-+    u64 addr;
-+    u64 length;
-+};
-+
-+struct bce_segment_list_element_hostinfo {
-+    struct bce_segment_list_element_hostinfo *next;
-+    void *page_start;
-+    size_t page_count;
-+    dma_addr_t dma_start;
-+};
-+
-+
-+struct bce_dma_buffer {
-+    enum dma_data_direction direction;
-+    struct sg_table scatterlist;
-+    struct bce_segment_list_element_hostinfo *seglist_hostinfo;
-+};
-+
-+/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */
-+int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
-+        enum dma_data_direction dir);
-+
-+/* Creates a buffer from virtual memory (vmalloc) */
-+int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
-+        enum dma_data_direction dir);
-+
-+/* Creates a buffer from kernel memory (kmalloc) */
-+int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
-+                          enum dma_data_direction dir);
-+
-+void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf);
-+
-+int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length);
-+
-+#endif //BCE_QUEUE_DMA_H
-diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h
-new file mode 100644
-index 000000000000..26619e0bccfa
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/command.h
-@@ -0,0 +1,204 @@
-+#ifndef BCE_VHCI_COMMAND_H
-+#define BCE_VHCI_COMMAND_H
-+
-+#include "queue.h"
-+#include <linux/jiffies.h>
-+#include <linux/usb.h>
-+
-+#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000)
-+#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000)
-+
-+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2
-+#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2)
-+
-+typedef u8 bce_vhci_port_t;
-+typedef u8 bce_vhci_device_t;
-+
-+enum bce_vhci_command {
-+    BCE_VHCI_CMD_CONTROLLER_ENABLE = 1,
-+    BCE_VHCI_CMD_CONTROLLER_DISABLE = 2,
-+    BCE_VHCI_CMD_CONTROLLER_START = 3,
-+    BCE_VHCI_CMD_CONTROLLER_PAUSE = 4,
-+
-+    BCE_VHCI_CMD_PORT_POWER_ON = 0x10,
-+    BCE_VHCI_CMD_PORT_POWER_OFF = 0x11,
-+    BCE_VHCI_CMD_PORT_RESUME = 0x12,
-+    BCE_VHCI_CMD_PORT_SUSPEND = 0x13,
-+    BCE_VHCI_CMD_PORT_RESET = 0x14,
-+    BCE_VHCI_CMD_PORT_DISABLE = 0x15,
-+    BCE_VHCI_CMD_PORT_STATUS = 0x16,
-+
-+    BCE_VHCI_CMD_DEVICE_CREATE = 0x30,
-+    BCE_VHCI_CMD_DEVICE_DESTROY = 0x31,
-+
-+    BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40,
-+    BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41,
-+    BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42,
-+    BCE_VHCI_CMD_ENDPOINT_RESET = 0x44,
-+
-+    /* Device to host only */
-+    BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43,
-+    BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000,
-+    BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005
-+};
-+
-+enum bce_vhci_endpoint_state {
-+    BCE_VHCI_ENDPOINT_ACTIVE = 0,
-+    BCE_VHCI_ENDPOINT_PAUSED = 1,
-+    BCE_VHCI_ENDPOINT_STALLED = 2
-+};
-+
-+static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask)
-+{
-+    int status;
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE;
-+    cmd.param1 = 0x7100u | busNum;
-+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+    if (!status)
-+        *portMask = (u16) res.param2;
-+    return status;
-+}
-+static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+
-+static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON;
-+    cmd.param1 = port;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF;
-+    cmd.param1 = port;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_RESUME;
-+    cmd.param1 = port;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND;
-+    cmd.param1 = port;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_RESET;
-+    cmd.param1 = port;
-+    cmd.param2 = timeout;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE;
-+    cmd.param1 = port;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
-+        u32 clearFlags, u32 *resStatus)
-+{
-+    int status;
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_PORT_STATUS;
-+    cmd.param1 = port;
-+    cmd.param2 = clearFlags & 0x560000;
-+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+    if (status >= 0)
-+        *resStatus = (u32) res.param2;
-+    return status;
-+}
-+
-+static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
-+        bce_vhci_device_t *dev)
-+{
-+    int status;
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE;
-+    cmd.param1 = port;
-+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+    if (!status)
-+        *dev = (bce_vhci_device_t) res.param2;
-+    return status;
-+}
-+static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY;
-+    cmd.param1 = dev;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
-+}
-+
-+static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev,
-+        struct usb_endpoint_descriptor *desc)
-+{
-+    struct bce_vhci_message cmd, res;
-+    int endpoint_type = usb_endpoint_type(desc);
-+    int maxp = usb_endpoint_maxp(desc);
-+    int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp;
-+    u8 max_active_requests_pow2 = 0;
-+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE;
-+    cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8);
-+    if (endpoint_type == USB_ENDPOINT_XFER_BULK)
-+        max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2;
-+    cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32);
-+    if (endpoint_type == USB_ENDPOINT_XFER_INT)
-+        cmd.param2 |= (desc->bInterval - 1) << 8;
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY;
-+    cmd.param1 = dev | (endpoint << 8);
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint,
-+        enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState)
-+{
-+    int status;
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE;
-+    cmd.param1 = dev | (endpoint << 8);
-+    cmd.param2 = (u64) newState;
-+    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+    if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER)
-+        *retState = (enum bce_vhci_endpoint_state) res.param2;
-+    return status;
-+}
-+static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
-+{
-+    struct bce_vhci_message cmd, res;
-+    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET;
-+    cmd.param1 = dev | (endpoint << 8);
-+    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
-+}
-+
-+
-+#endif //BCE_VHCI_COMMAND_H
-diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c
-new file mode 100644
-index 000000000000..7b0b5027157b
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/queue.c
-@@ -0,0 +1,268 @@
-+#include "queue.h"
-+#include "vhci.h"
-+#include "../apple_bce.h"
-+
-+
-+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq);
-+
-+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name)
-+{
-+    int status;
-+    ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT);
-+    if (!ret->cq)
-+        return -EINVAL;
-+    ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE,
-+                            bce_vhci_message_queue_completion, ret);
-+    if (!ret->sq) {
-+        status = -EINVAL;
-+        goto fail_cq;
-+    }
-+    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
-+                                   &ret->dma_addr, GFP_KERNEL);
-+    if (!ret->data) {
-+        status = -EINVAL;
-+        goto fail_sq;
-+    }
-+    return 0;
-+
-+fail_sq:
-+    bce_destroy_sq(vhci->dev, ret->sq);
-+    ret->sq = NULL;
-+fail_cq:
-+    bce_destroy_cq(vhci->dev, ret->cq);
-+    ret->cq = NULL;
-+    return status;
-+}
-+
-+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q)
-+{
-+    if (!q->cq)
-+        return;
-+    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
-+                      q->data, q->dma_addr);
-+    bce_destroy_sq(vhci->dev, q->sq);
-+    bce_destroy_cq(vhci->dev, q->cq);
-+}
-+
-+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req)
-+{
-+    int sidx;
-+    struct bce_qe_submission *s;
-+    sidx = q->sq->tail;
-+    s = bce_next_submission(q->sq);
-+    pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2);
-+    q->data[sidx] = *req;
-+    bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx,
-+            sizeof(struct bce_vhci_message));
-+    bce_submit_to_device(q->sq);
-+}
-+
-+static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq)
-+{
-+    while (bce_next_completion(sq))
-+        bce_notify_submission_complete(sq);
-+}
-+
-+
-+
-+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq);
-+
-+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
-+                                  bce_sq_completion compl)
-+{
-+    ret->vhci = vhci;
-+
-+    ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret);
-+    if (!ret->sq)
-+        return -EINVAL;
-+    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
-+                                   &ret->dma_addr, GFP_KERNEL);
-+    if (!ret->data) {
-+        bce_destroy_sq(vhci->dev, ret->sq);
-+        ret->sq = NULL;
-+        return -EINVAL;
-+    }
-+
-+    init_completion(&ret->queue_empty_completion);
-+    bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT);
-+    return 0;
-+}
-+
-+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
-+        bce_vhci_event_queue_callback cb)
-+{
-+    ret->cb = cb;
-+    return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion);
-+}
-+
-+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q)
-+{
-+    if (!q->sq)
-+        return;
-+    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
-+                      q->data, q->dma_addr);
-+    bce_destroy_sq(vhci->dev, q->sq);
-+}
-+
-+static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq)
-+{
-+    struct bce_sq_completion_data *cd;
-+    struct bce_vhci_event_queue *ev = sq->userdata;
-+    struct bce_vhci_message *msg;
-+    size_t cnt = 0;
-+
-+    while ((cd = bce_next_completion(sq))) {
-+        if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
-+            bce_notify_submission_complete(sq);
-+            continue;
-+        }
-+        msg = &ev->data[sq->head];
-+        pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
-+        ev->cb(ev, msg);
-+
-+        bce_notify_submission_complete(sq);
-+        ++cnt;
-+    }
-+    bce_vhci_event_queue_submit_pending(ev, cnt);
-+    if (atomic_read(&sq->available_commands) == sq->el_count - 1)
-+        complete(&ev->queue_empty_completion);
-+}
-+
-+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count)
-+{
-+    int idx;
-+    struct bce_qe_submission *s;
-+    while (count--) {
-+        if (bce_reserve_submission(q->sq, NULL)) {
-+            pr_err("bce-vhci: Failed to reserve an event queue submission\n");
-+            break;
-+        }
-+        idx = q->sq->tail;
-+        s = bce_next_submission(q->sq);
-+        bce_set_submission_single(s,
-+                                  q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message));
-+    }
-+    bce_submit_to_device(q->sq);
-+}
-+
-+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q)
-+{
-+    unsigned long timeout;
-+    reinit_completion(&q->queue_empty_completion);
-+    if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid))
-+        pr_warn("bce-vhci: failed to flush event queue\n");
-+    timeout = msecs_to_jiffies(5000);
-+    while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
-+        timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout);
-+        if (timeout == 0) {
-+            pr_err("bce-vhci: waiting for queue to be flushed timed out\n");
-+            break;
-+        }
-+    }
-+}
-+
-+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q)
-+{
-+    if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
-+        pr_err("bce-vhci: resume of a queue with pending submissions\n");
-+        return;
-+    }
-+    bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT);
-+}
-+
-+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq)
-+{
-+    ret->mq = mq;
-+    ret->completion.result = NULL;
-+    init_completion(&ret->completion.completion);
-+    spin_lock_init(&ret->completion_lock);
-+    mutex_init(&ret->mutex);
-+}
-+
-+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq)
-+{
-+    spin_lock(&cq->completion_lock);
-+    if (cq->completion.result) {
-+        memset(cq->completion.result, 0, sizeof(struct bce_vhci_message));
-+        cq->completion.result->status = BCE_VHCI_ABORT;
-+        complete(&cq->completion.completion);
-+        cq->completion.result = NULL;
-+    }
-+    spin_unlock(&cq->completion_lock);
-+    mutex_lock(&cq->mutex);
-+    mutex_unlock(&cq->mutex);
-+    mutex_destroy(&cq->mutex);
-+}
-+
-+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg)
-+{
-+    struct bce_vhci_command_queue_completion *c = &cq->completion;
-+
-+    spin_lock(&cq->completion_lock);
-+    if (c->result) {
-+        *c->result = *msg;
-+        complete(&c->completion);
-+        c->result = NULL;
-+    }
-+    spin_unlock(&cq->completion_lock);
-+}
-+
-+static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
-+        struct bce_vhci_message *res, unsigned long timeout)
-+{
-+    int status;
-+    struct bce_vhci_command_queue_completion *c;
-+    struct bce_vhci_message creq;
-+    c = &cq->completion;
-+
-+    if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
-+        return status;
-+
-+    spin_lock(&cq->completion_lock);
-+    c->result = res;
-+    reinit_completion(&c->completion);
-+    spin_unlock(&cq->completion_lock);
-+
-+    bce_vhci_message_queue_write(cq->mq, req);
-+
-+    if (!wait_for_completion_timeout(&c->completion, timeout)) {
-+        /* we ran out of time, send cancellation */
-+        pr_debug("bce-vhci: command timed out req=%x\n", req->cmd);
-+        if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
-+            return status;
-+
-+        creq = *req;
-+        creq.cmd |= 0x4000;
-+        bce_vhci_message_queue_write(cq->mq, &creq);
-+
-+        if (!wait_for_completion_timeout(&c->completion, 1000)) {
-+            pr_err("bce-vhci: Possible desync, cmd cancel timed out\n");
-+
-+            spin_lock(&cq->completion_lock);
-+            c->result = NULL;
-+            spin_unlock(&cq->completion_lock);
-+            return -ETIMEDOUT;
-+        }
-+        if ((res->cmd & ~0x8000) == creq.cmd)
-+            return -ETIMEDOUT;
-+        /* reply for the previous command most likely arrived */
-+    }
-+
-+    if ((res->cmd & ~0x8000) != req->cmd) {
-+        pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd);
-+        return -EIO;
-+    }
-+    if (res->status == BCE_VHCI_SUCCESS)
-+        return 0;
-+    return res->status;
-+}
-+
-+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
-+                                   struct bce_vhci_message *res, unsigned long timeout)
-+{
-+    int status;
-+    mutex_lock(&cq->mutex);
-+    status = __bce_vhci_command_queue_execute(cq, req, res, timeout);
-+    mutex_unlock(&cq->mutex);
-+    return status;
-+}
-diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h
-new file mode 100644
-index 000000000000..adb705b6ba1d
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/queue.h
-@@ -0,0 +1,76 @@
-+#ifndef BCE_VHCI_QUEUE_H
-+#define BCE_VHCI_QUEUE_H
-+
-+#include <linux/completion.h>
-+#include "../queue.h"
-+
-+#define VHCI_EVENT_QUEUE_EL_COUNT 256
-+#define VHCI_EVENT_PENDING_COUNT 32
-+
-+struct bce_vhci;
-+struct bce_vhci_event_queue;
-+
-+enum bce_vhci_message_status {
-+    BCE_VHCI_SUCCESS = 1,
-+    BCE_VHCI_ERROR = 2,
-+    BCE_VHCI_USB_PIPE_STALL = 3,
-+    BCE_VHCI_ABORT = 4,
-+    BCE_VHCI_BAD_ARGUMENT = 5,
-+    BCE_VHCI_OVERRUN = 6,
-+    BCE_VHCI_INTERNAL_ERROR = 7,
-+    BCE_VHCI_NO_POWER = 8,
-+    BCE_VHCI_UNSUPPORTED = 9
-+};
-+struct bce_vhci_message {
-+    u16 cmd;
-+    u16 status; // bce_vhci_message_status
-+    u32 param1;
-+    u64 param2;
-+};
-+
-+struct bce_vhci_message_queue {
-+    struct bce_queue_cq *cq;
-+    struct bce_queue_sq *sq;
-+    struct bce_vhci_message *data;
-+    dma_addr_t dma_addr;
-+};
-+typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
-+struct bce_vhci_event_queue {
-+    struct bce_vhci *vhci;
-+    struct bce_queue_sq *sq;
-+    struct bce_vhci_message *data;
-+    dma_addr_t dma_addr;
-+    bce_vhci_event_queue_callback cb;
-+    struct completion queue_empty_completion;
-+};
-+struct bce_vhci_command_queue_completion {
-+    struct bce_vhci_message *result;
-+    struct completion completion;
-+};
-+struct bce_vhci_command_queue {
-+    struct bce_vhci_message_queue *mq;
-+    struct bce_vhci_command_queue_completion completion;
-+    struct spinlock completion_lock;
-+    struct mutex mutex;
-+};
-+
-+int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name);
-+void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q);
-+void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req);
-+
-+int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
-+        bce_sq_completion compl);
-+int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
-+        bce_vhci_event_queue_callback cb);
-+void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q);
-+void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count);
-+void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q);
-+void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q);
-+
-+void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq);
-+void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq);
-+int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
-+        struct bce_vhci_message *res, unsigned long timeout);
-+void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg);
-+
-+#endif //BCE_VHCI_QUEUE_H
-diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c
-new file mode 100644
-index 000000000000..8226363d69c8
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/transfer.c
-@@ -0,0 +1,661 @@
-+#include "transfer.h"
-+#include "../queue.h"
-+#include "vhci.h"
-+#include "../apple_bce.h"
-+#include <linux/usb/hcd.h>
-+
-+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq);
-+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q);
-+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q);
-+
-+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb);
-+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg);
-+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c);
-+
-+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work);
-+
-+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
-+        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir)
-+{
-+    char name[0x21];
-+    INIT_LIST_HEAD(&q->evq);
-+    INIT_LIST_HEAD(&q->giveback_urb_list);
-+    spin_lock_init(&q->urb_lock);
-+    mutex_init(&q->pause_lock);
-+    q->vhci = vhci;
-+    q->endp = endp;
-+    q->dev_addr = dev_addr;
-+    q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F);
-+    q->state = BCE_VHCI_ENDPOINT_ACTIVE;
-+    q->active = true;
-+    q->stalled = false;
-+    q->max_active_requests = 1;
-+    if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK)
-+        q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS;
-+    q->remaining_active_requests = q->max_active_requests;
-+    q->cq = bce_create_cq(vhci->dev, 0x100);
-+    INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w);
-+    q->sq_in = NULL;
-+    if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
-+        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc));
-+        q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE,
-+                                 bce_vhci_transfer_queue_completion, q);
-+    }
-+    q->sq_out = NULL;
-+    if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
-+        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc));
-+        q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE,
-+                                  bce_vhci_transfer_queue_completion, q);
-+    }
-+}
-+
-+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q)
-+{
-+    bce_vhci_transfer_queue_giveback(q);
-+    bce_vhci_transfer_queue_remove_pending(q);
-+    if (q->sq_in)
-+        bce_destroy_sq(vhci->dev, q->sq_in);
-+    if (q->sq_out)
-+        bce_destroy_sq(vhci->dev, q->sq_out);
-+    bce_destroy_cq(vhci->dev, q->cq);
-+}
-+
-+static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q)
-+{
-+    return q->remaining_active_requests > 0;
-+}
-+
-+static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
-+{
-+    struct bce_vhci_list_message *lm;
-+    lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL);
-+    INIT_LIST_HEAD(&lm->list);
-+    lm->msg = *msg;
-+    list_add_tail(&lm->list, &q->evq);
-+}
-+
-+static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q)
-+{
-+    unsigned long flags;
-+    struct urb *urb;
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    while (!list_empty(&q->giveback_urb_list)) {
-+        urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list);
-+        list_del(&urb->urb_list);
-+
-+        spin_unlock_irqrestore(&q->urb_lock, flags);
-+        usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status);
-+        spin_lock_irqsave(&q->urb_lock, flags);
-+    }
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+}
-+
-+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q);
-+
-+static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q)
-+{
-+    struct urb *urb;
-+    struct bce_vhci_list_message *lm;
-+
-+    while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) {
-+        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
-+
-+        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
-+        if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN)
-+            break;
-+        list_del(&lm->list);
-+        kfree(lm);
-+    }
-+
-+    /* some of the URBs could have been completed, so initialize more URBs if possible */
-+    bce_vhci_transfer_queue_init_pending_urbs(q);
-+}
-+
-+static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q)
-+{
-+    unsigned long flags;
-+    struct bce_vhci_list_message *lm;
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    while (!list_empty(&q->evq)) {
-+        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
-+        list_del(&lm->list);
-+        kfree(lm);
-+    }
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+}
-+
-+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
-+{
-+    unsigned long flags;
-+    struct bce_vhci_urb *turb;
-+    struct urb *urb;
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    bce_vhci_transfer_queue_deliver_pending(q);
-+
-+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST &&
-+        (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) {
-+        bce_vhci_transfer_queue_defer_event(q, msg);
-+        goto complete;
-+    }
-+    if (list_empty(&q->endp->urb_list)) {
-+        pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr);
-+        goto complete;
-+    }
-+    urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
-+    turb = urb->hcpriv;
-+    if (bce_vhci_urb_update(turb, msg) == -EAGAIN) {
-+        bce_vhci_transfer_queue_defer_event(q, msg);
-+    } else {
-+        bce_vhci_transfer_queue_init_pending_urbs(q);
-+    }
-+
-+complete:
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    bce_vhci_transfer_queue_giveback(q);
-+}
-+
-+static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
-+{
-+    unsigned long flags;
-+    struct bce_sq_completion_data *c;
-+    struct urb *urb;
-+    struct bce_vhci_transfer_queue *q = sq->userdata;
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    while ((c = bce_next_completion(sq))) {
-+        if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
-+            pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr);
-+            bce_notify_submission_complete(sq);
-+            continue;
-+        }
-+        if (list_empty(&q->endp->urb_list)) {
-+            pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr);
-+            continue;
-+        }
-+        pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr);
-+        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
-+        bce_vhci_urb_transfer_completion(urb->hcpriv, c);
-+        bce_notify_submission_complete(sq);
-+    }
-+    bce_vhci_transfer_queue_deliver_pending(q);
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    bce_vhci_transfer_queue_giveback(q);
-+}
-+
-+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q)
-+{
-+    unsigned long flags;
-+    int status;
-+    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    q->active = false;
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    if (q->sq_out) {
-+        pr_err("bce-vhci: Not implemented: wait for pending output requests\n");
-+    }
-+    bce_vhci_transfer_queue_remove_pending(q);
-+    if ((status = bce_vhci_cmd_endpoint_set_state(
-+            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state)))
-+        return status;
-+    if (q->state != BCE_VHCI_ENDPOINT_PAUSED)
-+        return -EINVAL;
-+    if (q->sq_in)
-+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
-+    if (q->sq_out)
-+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
-+    return 0;
-+}
-+
-+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb);
-+
-+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q)
-+{
-+    unsigned long flags;
-+    int status;
-+    struct urb *urb, *urbt;
-+    struct bce_vhci_urb *vurb;
-+    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
-+    if ((status = bce_vhci_cmd_endpoint_set_state(
-+            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
-+        return status;
-+    if (q->state != BCE_VHCI_ENDPOINT_ACTIVE)
-+        return -EINVAL;
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    q->active = true;
-+    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
-+        vurb = urb->hcpriv;
-+        if (vurb->state == BCE_VHCI_URB_INIT_PENDING) {
-+            if (!bce_vhci_transfer_queue_can_init_urb(q))
-+                break;
-+            bce_vhci_urb_init(vurb);
-+        } else {
-+            bce_vhci_urb_resume(vurb);
-+        }
-+    }
-+    bce_vhci_transfer_queue_deliver_pending(q);
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    return 0;
-+}
-+
-+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
-+{
-+    int ret = 0;
-+    mutex_lock(&q->pause_lock);
-+    if ((q->paused_by & src) != src) {
-+        if (!q->paused_by)
-+            ret = bce_vhci_transfer_queue_do_pause(q);
-+        if (!ret)
-+            q->paused_by |= src;
-+    }
-+    mutex_unlock(&q->pause_lock);
-+    return ret;
-+}
-+
-+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
-+{
-+    int ret = 0;
-+    mutex_lock(&q->pause_lock);
-+    if (q->paused_by & src) {
-+        if (!(q->paused_by & ~src))
-+            ret = bce_vhci_transfer_queue_do_resume(q);
-+        if (!ret)
-+            q->paused_by &= ~src;
-+    }
-+    mutex_unlock(&q->pause_lock);
-+    return ret;
-+}
-+
-+static void bce_vhci_transfer_queue_reset_w(struct work_struct *work)
-+{
-+    unsigned long flags;
-+    struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset);
-+
-+    mutex_lock(&q->pause_lock);
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    if (!q->stalled) {
-+        spin_unlock_irqrestore(&q->urb_lock, flags);
-+        mutex_unlock(&q->pause_lock);
-+        return;
-+    }
-+    q->active = false;
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ;
-+    bce_vhci_transfer_queue_remove_pending(q);
-+    if (q->sq_in)
-+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
-+    if (q->sq_out)
-+        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
-+    bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F));
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    q->stalled = false;
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    mutex_unlock(&q->pause_lock);
-+    bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
-+}
-+
-+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q)
-+{
-+    queue_work(q->vhci->tq_state_wq, &q->w_reset);
-+}
-+
-+static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q)
-+{
-+    struct urb *urb, *urbt;
-+    struct bce_vhci_urb *vurb;
-+    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
-+        vurb = urb->hcpriv;
-+        if (!bce_vhci_transfer_queue_can_init_urb(q))
-+            break;
-+        if (vurb->state == BCE_VHCI_URB_INIT_PENDING)
-+            bce_vhci_urb_init(vurb);
-+    }
-+}
-+
-+
-+
-+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout);
-+
-+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb)
-+{
-+    unsigned long flags;
-+    int status = 0;
-+    struct bce_vhci_urb *vurb;
-+    vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL);
-+    urb->hcpriv = vurb;
-+
-+    vurb->q = q;
-+    vurb->urb = urb;
-+    vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-+    vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0);
-+
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb);
-+    if (status) {
-+        spin_unlock_irqrestore(&q->urb_lock, flags);
-+        urb->hcpriv = NULL;
-+        kfree(vurb);
-+        return status;
-+    }
-+
-+    if (q->active) {
-+        if (bce_vhci_transfer_queue_can_init_urb(vurb->q))
-+            status = bce_vhci_urb_init(vurb);
-+        else
-+            vurb->state = BCE_VHCI_URB_INIT_PENDING;
-+    } else {
-+        if (q->stalled)
-+            bce_vhci_transfer_queue_request_reset(q);
-+        vurb->state = BCE_VHCI_URB_INIT_PENDING;
-+    }
-+    if (status) {
-+        usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
-+        urb->hcpriv = NULL;
-+        kfree(vurb);
-+    } else {
-+        bce_vhci_transfer_queue_deliver_pending(q);
-+    }
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+    pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr,
-+            usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length);
-+    return status;
-+}
-+
-+static int bce_vhci_urb_init(struct bce_vhci_urb *vurb)
-+{
-+    int status = 0;
-+
-+    if (vurb->q->remaining_active_requests == 0) {
-+        pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n");
-+        return -EINVAL;
-+    }
-+
-+    if (vurb->is_control) {
-+        vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST;
-+    } else {
-+        status = bce_vhci_urb_data_start(vurb, NULL);
-+    }
-+
-+    if (!status) {
-+        --vurb->q->remaining_active_requests;
-+    }
-+    return status;
-+}
-+
-+static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status)
-+{
-+    struct bce_vhci_transfer_queue *q = urb->q;
-+    struct bce_vhci *vhci = q->vhci;
-+    struct urb *real_urb = urb->urb;
-+    pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status);
-+    usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb);
-+    real_urb->hcpriv = NULL;
-+    real_urb->status = status;
-+    if (urb->state != BCE_VHCI_URB_INIT_PENDING)
-+        ++urb->q->remaining_active_requests;
-+    kfree(urb);
-+    list_add_tail(&real_urb->urb_list, &q->giveback_urb_list);
-+}
-+
-+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status)
-+{
-+    struct bce_vhci_urb *vurb;
-+    unsigned long flags;
-+    int ret;
-+
-+    spin_lock_irqsave(&q->urb_lock, flags);
-+    if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) {
-+        spin_unlock_irqrestore(&q->urb_lock, flags);
-+        return ret;
-+    }
-+
-+    vurb = urb->hcpriv;
-+    /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */
-+    if (vurb->state != BCE_VHCI_URB_INIT_PENDING) {
-+        pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr);
-+
-+        spin_unlock_irqrestore(&q->urb_lock, flags);
-+        bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
-+        spin_lock_irqsave(&q->urb_lock, flags);
-+
-+        ++q->remaining_active_requests;
-+    }
-+
-+    usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
-+
-+    spin_unlock_irqrestore(&q->urb_lock, flags);
-+
-+    usb_hcd_giveback_urb(q->vhci->hcd, urb, status);
-+
-+    if (vurb->state != BCE_VHCI_URB_INIT_PENDING)
-+        bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
-+
-+    kfree(vurb);
-+
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout)
-+{
-+    struct bce_vhci_message msg;
-+    struct bce_qe_submission *s;
-+    u32 tr_len;
-+    int reservation1, reservation2 = -EFAULT;
-+
-+    pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr,
-+             (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length);
-+
-+    /* Reserve both a message and a submission, so we don't run into issues later. */
-+    reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout);
-+    if (!reservation1)
-+        reservation2 = bce_reserve_submission(urb->q->sq_in, timeout);
-+    if (reservation1 || reservation2) {
-+        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
-+        if (!reservation1)
-+            bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq);
-+        return -ENOMEM;
-+    }
-+
-+    urb->send_offset = urb->receive_offset;
-+
-+    tr_len = urb->urb->transfer_buffer_length - urb->send_offset;
-+
-+    spin_lock(&urb->q->vhci->msg_asynchronous_lock);
-+    msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST;
-+    msg.status = 0;
-+    msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr;
-+    msg.param2 = tr_len;
-+    bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg);
-+    spin_unlock(&urb->q->vhci->msg_asynchronous_lock);
-+
-+    s = bce_next_submission(urb->q->sq_in);
-+    bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len);
-+    bce_submit_to_device(urb->q->sq_in);
-+
-+    urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout)
-+{
-+    if (urb->dir == DMA_TO_DEVICE) {
-+        if (urb->urb->transfer_buffer_length > 0)
-+            urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST;
-+        else
-+            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
-+        return 0;
-+    } else {
-+        return bce_vhci_urb_data_transfer_in(urb, timeout);
-+    }
-+}
-+
-+static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size)
-+{
-+    struct bce_qe_submission *s;
-+    unsigned long timeout = 0;
-+    if (bce_reserve_submission(urb->q->sq_out, &timeout)) {
-+        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
-+        return -EPIPE;
-+    }
-+
-+    pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size);
-+
-+    s = bce_next_submission(urb->q->sq_out);
-+    bce_set_submission_single(s, addr, size);
-+    bce_submit_to_device(urb->q->sq_out);
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
-+{
-+    u32 tr_len;
-+    int status;
-+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) {
-+        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
-+            tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2);
-+            if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len)))
-+                return status;
-+            urb->send_offset += tr_len;
-+            urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
-+            return 0;
-+        }
-+    }
-+
-+    /* 0x1000 in out queues aren't really unexpected */
-+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
-+        return -EAGAIN;
-+    pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n",
-+            urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state,
-+            msg->cmd, msg->status, msg->param1, msg->param2);
-+    return -EAGAIN;
-+}
-+
-+static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
-+{
-+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
-+        urb->receive_offset += c->data_size;
-+        if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) {
-+            urb->urb->actual_length = (u32) urb->receive_offset;
-+            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
-+            if (!urb->is_control) {
-+                bce_vhci_urb_complete(urb, 0);
-+                return -ENOENT;
-+            }
-+        }
-+    } else {
-+        pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr);
-+    }
-+    return 0;
-+}
-+
-+
-+static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb)
-+{
-+    struct bce_vhci_transfer_queue *q = urb->q;
-+    if (urb->received_status == 0)
-+        return 0;
-+    if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE ||
-+        (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST &&
-+        urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) {
-+        urb->state = BCE_VHCI_URB_CONTROL_COMPLETE;
-+        if (urb->received_status != BCE_VHCI_SUCCESS) {
-+            pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status);
-+            urb->q->active = false;
-+            urb->q->stalled = true;
-+            bce_vhci_urb_complete(urb, -EPIPE);
-+            if (!list_empty(&q->endp->urb_list))
-+                bce_vhci_transfer_queue_request_reset(q);
-+            return -ENOENT;
-+        }
-+        bce_vhci_urb_complete(urb, 0);
-+        return -ENOENT;
-+    }
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
-+{
-+    int status;
-+    if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
-+        urb->received_status = msg->status;
-+        return bce_vhci_urb_control_check_status(urb);
-+    }
-+
-+    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) {
-+        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
-+            if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) {
-+                pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr);
-+                return 0; /* TODO: fail the URB? */
-+            }
-+            urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION;
-+            pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma);
-+            return 0;
-+        }
-+    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
-+               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
-+        if ((status = bce_vhci_urb_data_update(urb, msg)))
-+            return status;
-+        return bce_vhci_urb_control_check_status(urb);
-+    }
-+
-+    /* 0x1000 in out queues aren't really unexpected */
-+    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
-+        return -EAGAIN;
-+    pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr,
-+            urb->state, msg->cmd, msg->status, msg->param1, msg->param2);
-+    return -EAGAIN;
-+}
-+
-+static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
-+{
-+    int status;
-+    unsigned long timeout;
-+
-+    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) {
-+        if (c->data_size != sizeof(struct usb_ctrlrequest))
-+            pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n",
-+                   urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest));
-+
-+        timeout = 1000;
-+        status = bce_vhci_urb_data_start(urb, &timeout);
-+        if (status) {
-+            bce_vhci_urb_complete(urb, status);
-+            return -ENOENT;
-+        }
-+        return 0;
-+    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
-+               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
-+        if ((status = bce_vhci_urb_data_transfer_completion(urb, c)))
-+            return status;
-+        return bce_vhci_urb_control_check_status(urb);
-+    } else {
-+        pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state);
-+    }
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
-+{
-+    if (urb->state == BCE_VHCI_URB_INIT_PENDING)
-+        return -EAGAIN;
-+    if (urb->is_control)
-+        return bce_vhci_urb_control_update(urb, msg);
-+    else
-+        return bce_vhci_urb_data_update(urb, msg);
-+}
-+
-+static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
-+{
-+    if (urb->is_control)
-+        return bce_vhci_urb_control_transfer_completion(urb, c);
-+    else
-+        return bce_vhci_urb_data_transfer_completion(urb, c);
-+}
-+
-+static void bce_vhci_urb_resume(struct bce_vhci_urb *urb)
-+{
-+    int status = 0;
-+    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
-+        status = bce_vhci_urb_data_transfer_in(urb, NULL);
-+    }
-+    if (status)
-+        bce_vhci_urb_complete(urb, status);
-+}
-diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h
-new file mode 100644
-index 000000000000..89ecad6bcf8f
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/transfer.h
-@@ -0,0 +1,73 @@
-+#ifndef BCEDRIVER_TRANSFER_H
-+#define BCEDRIVER_TRANSFER_H
-+
-+#include <linux/usb.h>
-+#include "queue.h"
-+#include "command.h"
-+#include "../queue.h"
-+
-+struct bce_vhci_list_message {
-+    struct list_head list;
-+    struct bce_vhci_message msg;
-+};
-+enum bce_vhci_pause_source {
-+    BCE_VHCI_PAUSE_INTERNAL_WQ = 1,
-+    BCE_VHCI_PAUSE_FIRMWARE = 2,
-+    BCE_VHCI_PAUSE_SUSPEND = 4,
-+    BCE_VHCI_PAUSE_SHUTDOWN = 8
-+};
-+struct bce_vhci_transfer_queue {
-+    struct bce_vhci *vhci;
-+    struct usb_host_endpoint *endp;
-+    enum bce_vhci_endpoint_state state;
-+    u32 max_active_requests, remaining_active_requests;
-+    bool active, stalled;
-+    u32 paused_by;
-+    bce_vhci_device_t dev_addr;
-+    u8 endp_addr;
-+    struct bce_queue_cq *cq;
-+    struct bce_queue_sq *sq_in;
-+    struct bce_queue_sq *sq_out;
-+    struct list_head evq;
-+    struct spinlock urb_lock;
-+    struct mutex pause_lock;
-+    struct list_head giveback_urb_list;
-+
-+    struct work_struct w_reset;
-+};
-+enum bce_vhci_urb_state {
-+    BCE_VHCI_URB_INIT_PENDING,
-+
-+    BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST,
-+    BCE_VHCI_URB_WAITING_FOR_COMPLETION,
-+    BCE_VHCI_URB_DATA_TRANSFER_COMPLETE,
-+
-+    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST,
-+    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION,
-+    BCE_VHCI_URB_CONTROL_COMPLETE
-+};
-+struct bce_vhci_urb {
-+    struct urb *urb;
-+    struct bce_vhci_transfer_queue *q;
-+    enum dma_data_direction dir;
-+    bool is_control;
-+    enum bce_vhci_urb_state state;
-+    int received_status;
-+    u32 send_offset;
-+    u32 receive_offset;
-+};
-+
-+void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
-+        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir);
-+void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q);
-+void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg);
-+int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q);
-+int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q);
-+int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
-+int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
-+void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q);
-+
-+int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb);
-+int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status);
-+
-+#endif //BCEDRIVER_TRANSFER_H
-diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c
-new file mode 100644
-index 000000000000..eb26f55000d8
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/vhci.c
-@@ -0,0 +1,759 @@
-+#include "vhci.h"
-+#include "../apple_bce.h"
-+#include "command.h"
-+#include <linux/usb.h>
-+#include <linux/usb/hcd.h>
-+#include <linux/module.h>
-+#include <linux/version.h>
-+
-+static dev_t bce_vhci_chrdev;
-+static struct class *bce_vhci_class;
-+static const struct hc_driver bce_vhci_driver;
-+static u16 bce_vhci_port_mask = U16_MAX;
-+
-+static int bce_vhci_create_event_queues(struct bce_vhci *vhci);
-+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci);
-+static int bce_vhci_create_message_queues(struct bce_vhci *vhci);
-+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci);
-+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws);
-+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq);
-+
-+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci)
-+{
-+    int status;
-+
-+    spin_lock_init(&vhci->hcd_spinlock);
-+
-+    vhci->dev = dev;
-+
-+    vhci->vdevt = bce_vhci_chrdev;
-+    vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci");
-+    if (IS_ERR_OR_NULL(vhci->vdev)) {
-+        status = PTR_ERR(vhci->vdev);
-+        goto fail_dev;
-+    }
-+
-+    if ((status = bce_vhci_create_message_queues(vhci)))
-+        goto fail_mq;
-+    if ((status = bce_vhci_create_event_queues(vhci)))
-+        goto fail_eq;
-+
-+    vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0);
-+    INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w);
-+
-+    vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci");
-+    if (!vhci->hcd) {
-+        status = -ENOMEM;
-+        goto fail_hcd;
-+    }
-+    vhci->hcd->self.sysdev = &dev->pci->dev;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
-+    vhci->hcd->self.uses_dma = 1;
-+#endif
-+    *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci;
-+    vhci->hcd->speed = HCD_USB2;
-+
-+    if ((status = usb_add_hcd(vhci->hcd, 0, 0)))
-+        goto fail_hcd;
-+
-+    return 0;
-+
-+fail_hcd:
-+    bce_vhci_destroy_event_queues(vhci);
-+fail_eq:
-+    bce_vhci_destroy_message_queues(vhci);
-+fail_mq:
-+    device_destroy(bce_vhci_class, vhci->vdevt);
-+fail_dev:
-+    if (!status)
-+        status = -EINVAL;
-+    return status;
-+}
-+
-+void bce_vhci_destroy(struct bce_vhci *vhci)
-+{
-+    usb_remove_hcd(vhci->hcd);
-+    bce_vhci_destroy_event_queues(vhci);
-+    bce_vhci_destroy_message_queues(vhci);
-+    device_destroy(bce_vhci_class, vhci->vdevt);
-+}
-+
-+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd)
-+{
-+    return *((struct bce_vhci **) hcd->hcd_priv);
-+}
-+
-+int bce_vhci_start(struct usb_hcd *hcd)
-+{
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    int status;
-+    u16 port_mask = 0;
-+    bce_vhci_port_t port_no = 0;
-+    if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask)))
-+        return status;
-+    vhci->port_mask = port_mask;
-+    vhci->port_power_mask = 0;
-+    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
-+        return status;
-+    port_mask = vhci->port_mask;
-+    while (port_mask) {
-+        port_no += 1;
-+        port_mask >>= 1;
-+    }
-+    vhci->port_count = port_no;
-+    return 0;
-+}
-+
-+void bce_vhci_stop(struct usb_hcd *hcd)
-+{
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    bce_vhci_cmd_controller_disable(&vhci->cq);
-+}
-+
-+static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf)
-+{
-+    return 0;
-+}
-+
-+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout);
-+
-+static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
-+{
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    int status;
-+    struct usb_hub_descriptor *hd;
-+    struct usb_hub_status *hs;
-+    struct usb_port_status *ps;
-+    u32 port_status;
-+    // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
-+    if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) {
-+        hd = (struct usb_hub_descriptor *) buf;
-+        memset(hd, 0, sizeof(*hd));
-+        hd->bDescLength = sizeof(struct usb_hub_descriptor);
-+        hd->bDescriptorType = USB_DT_HUB;
-+        hd->bNbrPorts = (u8) vhci->port_count;
-+        hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM;
-+        hd->bPwrOn2PwrGood = 0;
-+        hd->bHubContrCurrent = 0;
-+        return 0;
-+    } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) {
-+        hs = (struct usb_hub_status *) buf;
-+        memset(hs, 0, sizeof(*hs));
-+        hs->wHubStatus = 0;
-+        hs->wHubChange = 0;
-+        return 0;
-+    } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) {
-+        ps = (struct usb_port_status *) buf;
-+        ps->wPortStatus = 0;
-+        ps->wPortChange = 0;
-+
-+        if (vhci->port_power_mask & BIT(wIndex))
-+            ps->wPortStatus |= USB_PORT_STAT_POWER;
-+
-+        if (!(bce_vhci_port_mask & BIT(wIndex)))
-+            return 0;
-+
-+        if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status)))
-+            return status;
-+
-+        if (port_status & 16)
-+            ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED;
-+        if (port_status & 4)
-+            ps->wPortStatus |= USB_PORT_STAT_CONNECTION;
-+        if (port_status & 2)
-+            ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT;
-+        if (port_status & 8)
-+            ps->wPortStatus |= USB_PORT_STAT_RESET;
-+        if (port_status & 0x60)
-+            ps->wPortStatus |= USB_PORT_STAT_SUSPEND;
-+
-+        if (port_status & 0x40000)
-+            ps->wPortChange |= USB_PORT_STAT_C_CONNECTION;
-+
-+        pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange);
-+        return 0;
-+    } else if (typeReq == SetPortFeature) {
-+        if (wValue == USB_PORT_FEAT_POWER) {
-+            status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex);
-+            /* As far as I am aware, power status is not part of the port status so store it separately */
-+            if (!status)
-+                vhci->port_power_mask |= BIT(wIndex);
-+            return status;
-+        }
-+        if (wValue == USB_PORT_FEAT_RESET) {
-+            return bce_vhci_reset_device(vhci, wIndex, wValue);
-+        }
-+        if (wValue == USB_PORT_FEAT_SUSPEND) {
-+            /* TODO: Am I supposed to also suspend the endpoints? */
-+            pr_debug("bce-vhci: Suspending port %i\n", wIndex);
-+            return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex);
-+        }
-+    } else if (typeReq == ClearPortFeature) {
-+        if (wValue == USB_PORT_FEAT_ENABLE)
-+            return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex);
-+        if (wValue == USB_PORT_FEAT_POWER) {
-+            status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex);
-+            if (!status)
-+                vhci->port_power_mask &= ~BIT(wIndex);
-+            return status;
-+        }
-+        if (wValue == USB_PORT_FEAT_C_CONNECTION)
-+            return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status);
-+        if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */
-+            return 0;
-+        }
-+        if (wValue == USB_PORT_FEAT_SUSPEND) {
-+            pr_debug("bce-vhci: Resuming port %i\n", wIndex);
-+            return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex);
-+        }
-+    }
-+    pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
-+    dump_stack();
-+    return -EIO;
-+}
-+
-+static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev)
-+{
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    struct bce_vhci_device *vdev;
-+    bce_vhci_device_t devid;
-+    pr_info("bce_vhci_enable_device\n");
-+
-+    if (vhci->port_to_device[udev->portnum])
-+        return 0;
-+
-+    /* We need to early address the device */
-+    if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid))
-+        return -EIO;
-+
-+    pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid);
-+
-+    vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL);
-+    vhci->port_to_device[udev->portnum] = devid;
-+    vhci->devices[devid] = vdev;
-+
-+    bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL);
-+    udev->ep0.hcpriv = &vdev->tq[0];
-+    vdev->tq_mask |= BIT(0);
-+
-+    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc);
-+    return 0;
-+}
-+
-+static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, unsigned int timeout_ms) //TODO: follow timeout
-+{
-+    /* This is the same as enable_device, but instead in the old scheme */
-+    return bce_vhci_enable_device(hcd, udev);
-+}
-+
-+static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev)
-+{
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    int i;
-+    bce_vhci_device_t devid;
-+    struct bce_vhci_device *dev;
-+    pr_info("bce_vhci_free_device %i\n", udev->portnum);
-+    if (!vhci->port_to_device[udev->portnum])
-+        return;
-+    devid = vhci->port_to_device[udev->portnum];
-+    dev = vhci->devices[devid];
-+    for (i = 0; i < 32; i++) {
-+        if (dev->tq_mask & BIT(i)) {
-+            bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
-+            bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
-+            bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
-+        }
-+    }
-+    vhci->devices[devid] = NULL;
-+    vhci->port_to_device[udev->portnum] = 0;
-+    bce_vhci_cmd_device_destroy(&vhci->cq, devid);
-+    kfree(dev);
-+}
-+
-+static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout)
-+{
-+    struct bce_vhci_device *dev = NULL;
-+    bce_vhci_device_t devid;
-+    int i;
-+    int status;
-+    enum dma_data_direction dir;
-+    pr_info("bce_vhci_reset_device %i\n", index);
-+
-+    devid = vhci->port_to_device[index];
-+    if (devid) {
-+        dev = vhci->devices[devid];
-+
-+        for (i = 0; i < 32; i++) {
-+            if (dev->tq_mask & BIT(i)) {
-+                bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
-+                bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
-+                bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
-+            }
-+        }
-+        vhci->devices[devid] = NULL;
-+        vhci->port_to_device[index] = 0;
-+        bce_vhci_cmd_device_destroy(&vhci->cq, devid);
-+    }
-+    status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout);
-+
-+    if (dev) {
-+        if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid)))
-+            return status;
-+        vhci->devices[devid] = dev;
-+        vhci->port_to_device[index] = devid;
-+
-+        for (i = 0; i < 32; i++) {
-+            if (dev->tq_mask & BIT(i)) {
-+                dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-+                if (i == 0)
-+                    dir = DMA_BIDIRECTIONAL;
-+                bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir);
-+                bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc);
-+            }
-+        }
-+    }
-+
-+    return status;
-+}
-+
-+static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
-+{
-+    return 0;
-+}
-+
-+static int bce_vhci_get_frame_number(struct usb_hcd *hcd)
-+{
-+    return 0;
-+}
-+
-+static int bce_vhci_bus_suspend(struct usb_hcd *hcd)
-+{
-+    int i, j;
-+    int status;
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    pr_info("bce_vhci: suspend started\n");
-+
-+    pr_info("bce_vhci: suspend endpoints\n");
-+    for (i = 0; i < 16; i++) {
-+        if (!vhci->port_to_device[i])
-+            continue;
-+        for (j = 0; j < 32; j++) {
-+            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
-+                continue;
-+            bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j],
-+                    BCE_VHCI_PAUSE_SUSPEND);
-+        }
-+    }
-+
-+    pr_info("bce_vhci: suspend ports\n");
-+    for (i = 0; i < 16; i++) {
-+        if (!vhci->port_to_device[i])
-+            continue;
-+        bce_vhci_cmd_port_suspend(&vhci->cq, i);
-+    }
-+    pr_info("bce_vhci: suspend controller\n");
-+    if ((status = bce_vhci_cmd_controller_pause(&vhci->cq)))
-+        return status;
-+
-+    bce_vhci_event_queue_pause(&vhci->ev_commands);
-+    bce_vhci_event_queue_pause(&vhci->ev_system);
-+    bce_vhci_event_queue_pause(&vhci->ev_isochronous);
-+    bce_vhci_event_queue_pause(&vhci->ev_interrupt);
-+    bce_vhci_event_queue_pause(&vhci->ev_asynchronous);
-+    pr_info("bce_vhci: suspend done\n");
-+    return 0;
-+}
-+
-+static int bce_vhci_bus_resume(struct usb_hcd *hcd)
-+{
-+    int i, j;
-+    int status;
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    pr_info("bce_vhci: resume started\n");
-+
-+    bce_vhci_event_queue_resume(&vhci->ev_system);
-+    bce_vhci_event_queue_resume(&vhci->ev_isochronous);
-+    bce_vhci_event_queue_resume(&vhci->ev_interrupt);
-+    bce_vhci_event_queue_resume(&vhci->ev_asynchronous);
-+    bce_vhci_event_queue_resume(&vhci->ev_commands);
-+
-+    pr_info("bce_vhci: resume controller\n");
-+    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
-+        return status;
-+
-+    pr_info("bce_vhci: resume ports\n");
-+    for (i = 0; i < 16; i++) {
-+        if (!vhci->port_to_device[i])
-+            continue;
-+        bce_vhci_cmd_port_resume(&vhci->cq, i);
-+    }
-+    pr_info("bce_vhci: resume endpoints\n");
-+    for (i = 0; i < 16; i++) {
-+        if (!vhci->port_to_device[i])
-+            continue;
-+        for (j = 0; j < 32; j++) {
-+            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
-+                continue;
-+            bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j],
-+                    BCE_VHCI_PAUSE_SUSPEND);
-+        }
-+    }
-+
-+    pr_info("bce_vhci: resume done\n");
-+    return 0;
-+}
-+
-+static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
-+{
-+    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
-+    pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress);
-+    if (!q)
-+        return -ENOENT;
-+    return bce_vhci_urb_create(q, urb);
-+}
-+
-+static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
-+{
-+    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
-+    pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress);
-+    return bce_vhci_urb_request_cancel(q, urb, status);
-+}
-+
-+static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
-+{
-+    struct bce_vhci_transfer_queue *q = ep->hcpriv;
-+    pr_debug("bce_vhci_endpoint_reset\n");
-+    if (q)
-+        bce_vhci_transfer_queue_request_reset(q);
-+}
-+
-+static u8 bce_vhci_endpoint_index(u8 addr)
-+{
-+    if (addr & 0x80)
-+        return (u8) (0x10 + (addr & 0xf));
-+    return (u8) (addr & 0xf);
-+}
-+
-+static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
-+{
-+    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
-+    struct bce_vhci_device *vdev = vhci->devices[devid];
-+    pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index);
-+
-+    if (udev->bus->root_hub == udev) /* The USB hub */
-+        return 0;
-+    if (vdev == NULL)
-+        return -ENODEV;
-+    if (vdev->tq_mask & BIT(endp_index)) {
-+        endp->hcpriv = &vdev->tq[endp_index];
-+        return 0;
-+    }
-+
-+    bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid,
-+            usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
-+    endp->hcpriv = &vdev->tq[endp_index];
-+    vdev->tq_mask |= BIT(endp_index);
-+
-+    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc);
-+    return 0;
-+}
-+
-+static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
-+{
-+    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
-+    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
-+    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
-+    struct bce_vhci_transfer_queue *q = endp->hcpriv;
-+    struct bce_vhci_device *vdev = vhci->devices[devid];
-+    pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index);
-+    if (!q) {
-+        if (vdev && vdev->tq_mask & BIT(endp_index)) {
-+            pr_err("something deleted the hcpriv?\n");
-+            q = &vdev->tq[endp_index];
-+        } else {
-+            return 0;
-+        }
-+    }
-+
-+    bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu));
-+    vhci->devices[devid]->tq_mask &= ~BIT(endp_index);
-+    bce_vhci_destroy_transfer_queue(vhci, q);
-+    return 0;
-+}
-+
-+static int bce_vhci_create_message_queues(struct bce_vhci *vhci)
-+{
-+    if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") ||
-+        bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") ||
-+        bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") ||
-+        bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") ||
-+        bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) {
-+        bce_vhci_destroy_message_queues(vhci);
-+        return -EINVAL;
-+    }
-+    spin_lock_init(&vhci->msg_asynchronous_lock);
-+    bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands);
-+    return 0;
-+}
-+
-+static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci)
-+{
-+    bce_vhci_command_queue_destroy(&vhci->cq);
-+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands);
-+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_system);
-+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous);
-+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt);
-+    bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous);
-+}
-+
-+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
-+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
-+
-+static int bce_vhci_create_event_queues(struct bce_vhci *vhci)
-+{
-+    vhci->ev_cq = bce_create_cq(vhci->dev, 0x100);
-+    if (!vhci->ev_cq)
-+        return -EINVAL;
-+#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb)
-+    if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands",
-+            bce_vhci_firmware_event_completion) ||
-+        CREATE_EVENT_QUEUE(ev_system,       "VHC1FirmwareSystemEvents",       bce_vhci_handle_system_event) ||
-+        CREATE_EVENT_QUEUE(ev_isochronous,  "VHC1FirmwareIsochronousEvents",  bce_vhci_handle_usb_event) ||
-+        CREATE_EVENT_QUEUE(ev_interrupt,    "VHC1FirmwareInterruptEvents",    bce_vhci_handle_usb_event) ||
-+        CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) {
-+        bce_vhci_destroy_event_queues(vhci);
-+        return -EINVAL;
-+    }
-+#undef CREATE_EVENT_QUEUE
-+    return 0;
-+}
-+
-+static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci)
-+{
-+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands);
-+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_system);
-+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous);
-+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt);
-+    bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous);
-+    if (vhci->ev_cq)
-+        bce_destroy_cq(vhci->dev, vhci->ev_cq);
-+}
-+
-+static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status)
-+{
-+    unsigned long timeout = 1000;
-+    struct bce_vhci_message r = *req;
-+    r.cmd = (u16) (req->cmd | 0x8000u);
-+    r.status = status;
-+    r.param1 = req->param1;
-+    r.param2 = 0;
-+
-+    if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) {
-+        pr_err("bce-vhci: Cannot reserve submision for FW event reply\n");
-+        return;
-+    }
-+    bce_vhci_message_queue_write(&vhci->msg_system, &r);
-+}
-+
-+static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg)
-+{
-+    unsigned long flags;
-+    bce_vhci_device_t devid;
-+    u8 endp;
-+    struct bce_vhci_device *dev;
-+    struct bce_vhci_transfer_queue *tq;
-+    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
-+        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
-+        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
-+        dev = vhci->devices[devid];
-+        if (!dev || !(dev->tq_mask & BIT(endp)))
-+            return BCE_VHCI_BAD_ARGUMENT;
-+        tq = &dev->tq[endp];
-+    }
-+
-+    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) {
-+        if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) {
-+            bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE);
-+            return BCE_VHCI_SUCCESS;
-+        } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) {
-+            bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE);
-+            return BCE_VHCI_SUCCESS;
-+        }
-+        return BCE_VHCI_BAD_ARGUMENT;
-+    } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
-+        if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) {
-+            tq->state = msg->param2;
-+            spin_lock_irqsave(&tq->urb_lock, flags);
-+            tq->stalled = true;
-+            spin_unlock_irqrestore(&tq->urb_lock, flags);
-+            return BCE_VHCI_SUCCESS;
-+        }
-+        return BCE_VHCI_BAD_ARGUMENT;
-+    }
-+    pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n",
-+            msg->cmd, msg->status, msg->param1, msg->param2);
-+    return BCE_VHCI_BAD_ARGUMENT;
-+}
-+
-+static void bce_vhci_handle_firmware_events_w(struct work_struct *ws)
-+{
-+    size_t cnt = 0;
-+    int result;
-+    struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events);
-+    struct bce_queue_sq *sq = vhci->ev_commands.sq;
-+    struct bce_sq_completion_data *cq;
-+    struct bce_vhci_message *msg, *msg2 = NULL;
-+
-+    while (true) {
-+        if (msg2) {
-+            msg = msg2;
-+            msg2 = NULL;
-+        } else if ((cq = bce_next_completion(sq))) {
-+            if (cq->status == BCE_COMPLETION_ABORTED) {
-+                bce_notify_submission_complete(sq);
-+                continue;
-+            }
-+            msg = &vhci->ev_commands.data[sq->head];
-+        } else {
-+            break;
-+        }
-+
-+        pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
-+        if ((cq = bce_next_completion(sq))) {
-+            msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count];
-+            pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n",
-+                    msg->cmd, msg->status, msg->param1, msg->param2);
-+            if (cq->status != BCE_COMPLETION_ABORTED &&
-+                msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) {
-+                /* Take two elements */
-+                pr_debug("bce-vhci: Cancelled\n");
-+                bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT);
-+
-+                bce_notify_submission_complete(sq);
-+                bce_notify_submission_complete(sq);
-+                msg2 = NULL;
-+                cnt += 2;
-+                continue;
-+            }
-+
-+            pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n");
-+        }
-+
-+        result = bce_vhci_handle_firmware_event(vhci, msg);
-+        bce_vhci_send_fw_event_response(vhci, msg, (u16) result);
-+
-+
-+        bce_notify_submission_complete(sq);
-+        ++cnt;
-+    }
-+    bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt);
-+    if (atomic_read(&sq->available_commands) == sq->el_count - 1) {
-+        pr_debug("bce-vhci: complete\n");
-+        complete(&vhci->ev_commands.queue_empty_completion);
-+    }
-+}
-+
-+static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq)
-+{
-+    struct bce_vhci_event_queue *q = sq->userdata;
-+    queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events);
-+}
-+
-+static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
-+{
-+    if (msg->cmd & 0x8000) {
-+        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
-+    } else {
-+        pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n",
-+                msg->cmd, msg->status, msg->param1, msg->param2);
-+    }
-+}
-+
-+static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
-+{
-+    bce_vhci_device_t devid;
-+    u8 endp;
-+    struct bce_vhci_device *dev;
-+    if (msg->cmd & 0x8000) {
-+        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
-+    } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
-+        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
-+        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
-+        dev = q->vhci->devices[devid];
-+        if (!dev || (dev->tq_mask & BIT(endp)) == 0) {
-+            pr_err("bce-vhci: Didn't find destination for transfer queue event\n");
-+            return;
-+        }
-+        bce_vhci_transfer_queue_event(&dev->tq[endp], msg);
-+    } else {
-+        pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n",
-+                msg->cmd, msg->status, msg->param1, msg->param2);
-+    }
-+}
-+
-+
-+
-+static const struct hc_driver bce_vhci_driver = {
-+        .description = "bce-vhci",
-+        .product_desc = "BCE VHCI Host Controller",
-+        .hcd_priv_size = sizeof(struct bce_vhci *),
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
-+        .flags = HCD_USB2,
-+#else
-+        .flags = HCD_USB2 | HCD_DMA,
-+#endif
-+
-+        .start = bce_vhci_start,
-+        .stop = bce_vhci_stop,
-+        .hub_status_data = bce_vhci_hub_status_data,
-+        .hub_control = bce_vhci_hub_control,
-+        .urb_enqueue = bce_vhci_urb_enqueue,
-+        .urb_dequeue = bce_vhci_urb_dequeue,
-+        .enable_device = bce_vhci_enable_device,
-+        .free_dev = bce_vhci_free_device,
-+        .address_device = bce_vhci_address_device,
-+        .add_endpoint = bce_vhci_add_endpoint,
-+        .drop_endpoint = bce_vhci_drop_endpoint,
-+        .endpoint_reset = bce_vhci_endpoint_reset,
-+        .check_bandwidth = bce_vhci_check_bandwidth,
-+        .get_frame_number = bce_vhci_get_frame_number,
-+        .bus_suspend = bce_vhci_bus_suspend,
-+        .bus_resume = bce_vhci_bus_resume
-+};
-+
-+
-+int __init bce_vhci_module_init(void)
-+{
-+    int result;
-+    if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci")))
-+        goto fail_chrdev;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
-+    bce_vhci_class = class_create(THIS_MODULE, "bce-vhci");
-+#else
-+    bce_vhci_class = class_create("bce-vhci");
-+#endif
-+    if (IS_ERR(bce_vhci_class)) {
-+        result = PTR_ERR(bce_vhci_class);
-+        goto fail_class;
-+    }
-+    return 0;
-+
-+fail_class:
-+    class_destroy(bce_vhci_class);
-+fail_chrdev:
-+    unregister_chrdev_region(bce_vhci_chrdev, 1);
-+    if (!result)
-+        result = -EINVAL;
-+    return result;
-+}
-+void __exit bce_vhci_module_exit(void)
-+{
-+    class_destroy(bce_vhci_class);
-+    unregister_chrdev_region(bce_vhci_chrdev, 1);
-+}
-+
-+module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444);
-+MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled");
-diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h
-new file mode 100644
-index 000000000000..6c2e22622f4c
---- /dev/null
-+++ b/drivers/staging/apple-bce/vhci/vhci.h
-@@ -0,0 +1,52 @@
-+#ifndef BCE_VHCI_H
-+#define BCE_VHCI_H
-+
-+#include "queue.h"
-+#include "transfer.h"
-+
-+struct usb_hcd;
-+struct bce_queue_cq;
-+
-+struct bce_vhci_device {
-+    struct bce_vhci_transfer_queue tq[32];
-+    u32 tq_mask;
-+};
-+struct bce_vhci {
-+    struct apple_bce_device *dev;
-+    dev_t vdevt;
-+    struct device *vdev;
-+    struct usb_hcd *hcd;
-+    struct spinlock hcd_spinlock;
-+    struct bce_vhci_message_queue msg_commands;
-+    struct bce_vhci_message_queue msg_system;
-+    struct bce_vhci_message_queue msg_isochronous;
-+    struct bce_vhci_message_queue msg_interrupt;
-+    struct bce_vhci_message_queue msg_asynchronous;
-+    struct spinlock msg_asynchronous_lock;
-+    struct bce_vhci_command_queue cq;
-+    struct bce_queue_cq *ev_cq;
-+    struct bce_vhci_event_queue ev_commands;
-+    struct bce_vhci_event_queue ev_system;
-+    struct bce_vhci_event_queue ev_isochronous;
-+    struct bce_vhci_event_queue ev_interrupt;
-+    struct bce_vhci_event_queue ev_asynchronous;
-+    u16 port_mask;
-+    u8 port_count;
-+    u16 port_power_mask;
-+    bce_vhci_device_t port_to_device[16];
-+    struct bce_vhci_device *devices[16];
-+    struct workqueue_struct *tq_state_wq;
-+    struct work_struct w_fw_events;
-+};
-+
-+int __init bce_vhci_module_init(void);
-+void __exit bce_vhci_module_exit(void);
-+
-+int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci);
-+void bce_vhci_destroy(struct bce_vhci *vhci);
-+int bce_vhci_start(struct usb_hcd *hcd);
-+void bce_vhci_stop(struct usb_hcd *hcd);
-+
-+struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd);
-+
-+#endif //BCE_VHCI_H
-diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
-index e02ba15f6e34..b35734d03109 100644
---- a/drivers/usb/core/driver.c
-+++ b/drivers/usb/core/driver.c
-@@ -517,6 +517,19 @@ static int usb_unbind_interface(struct device *dev)
- 	return 0;
- }
- 
-+static void usb_shutdown_interface(struct device *dev)
-+{
-+	struct usb_interface *intf = to_usb_interface(dev);
-+	struct usb_driver *driver;
-+
-+	if (!dev->driver)
-+		return;
-+
-+	driver = to_usb_driver(dev->driver);
-+	if (driver->shutdown)
-+		driver->shutdown(intf);
-+}
-+
- /**
-  * usb_driver_claim_interface - bind a driver to an interface
-  * @driver: the driver to be bound
-@@ -1059,6 +1072,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner,
- 	new_driver->driver.bus = &usb_bus_type;
- 	new_driver->driver.probe = usb_probe_interface;
- 	new_driver->driver.remove = usb_unbind_interface;
-+	new_driver->driver.shutdown = usb_shutdown_interface;
- 	new_driver->driver.owner = owner;
- 	new_driver->driver.mod_name = mod_name;
- 	new_driver->driver.dev_groups = new_driver->dev_groups;
-diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
-index b610a2de4ae5..0cdbcf82554f 100644
---- a/drivers/usb/storage/uas.c
-+++ b/drivers/usb/storage/uas.c
-@@ -1232,9 +1232,8 @@ static void uas_disconnect(struct usb_interface *intf)
-  * hang on reboot when the device is still in uas mode. Note the reset is
-  * necessary as some devices won't revert to usb-storage mode without it.
-  */
--static void uas_shutdown(struct device *dev)
-+static void uas_shutdown(struct usb_interface *intf)
- {
--	struct usb_interface *intf = to_usb_interface(dev);
- 	struct usb_device *udev = interface_to_usbdev(intf);
- 	struct Scsi_Host *shost = usb_get_intfdata(intf);
- 	struct uas_dev_info *devinfo = (struct uas_dev_info *)shost->hostdata;
-@@ -1257,7 +1256,7 @@ static struct usb_driver uas_driver = {
- 	.suspend = uas_suspend,
- 	.resume = uas_resume,
- 	.reset_resume = uas_reset_resume,
--	.driver.shutdown = uas_shutdown,
-+	.shutdown = uas_shutdown,
- 	.id_table = uas_usb_ids,
- };
- 
-diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
-index 428d81afe215..aa1604d92c1a 100644
---- a/include/drm/drm_format_helper.h
-+++ b/include/drm/drm_format_helper.h
-@@ -96,6 +96,9 @@ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_
- void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch,
- 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
- 			       const struct drm_rect *clip, struct drm_format_conv_state *state);
-+void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
-+			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-+			       const struct drm_rect *clip, struct drm_format_conv_state *state);
- void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch,
- 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
- 				 const struct drm_rect *clip, struct drm_format_conv_state *state);
-diff --git a/include/linux/efi.h b/include/linux/efi.h
-index 418e555459da..e28873eb19ed 100644
---- a/include/linux/efi.h
-+++ b/include/linux/efi.h
-@@ -385,6 +385,7 @@ void efi_native_runtime_setup(void);
- #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
- #define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
- #define APPLE_PROPERTIES_PROTOCOL_GUID		EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb,  0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
-+#define APPLE_SET_OS_PROTOCOL_GUID		EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6,  0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77)
- #define EFI_TCG2_PROTOCOL_GUID			EFI_GUID(0x607f766c, 0x7455, 0x42be,  0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f)
- #define EFI_TCG2_FINAL_EVENTS_TABLE_GUID	EFI_GUID(0x1e2ed096, 0x30e2, 0x4254,  0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
- #define EFI_LOAD_FILE_PROTOCOL_GUID		EFI_GUID(0x56ec3091, 0x954c, 0x11d2,  0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
-diff --git a/include/linux/hid.h b/include/linux/hid.h
-index 8e06d89698e6..6cdb5a451453 100644
---- a/include/linux/hid.h
-+++ b/include/linux/hid.h
-@@ -940,6 +940,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep
- extern int hidinput_connect(struct hid_device *hid, unsigned int force);
- extern void hidinput_disconnect(struct hid_device *);
- 
-+struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
-+				 unsigned int application, unsigned int usage);
- int hid_set_field(struct hid_field *, unsigned, __s32);
- int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
- 		     int interrupt);
-diff --git a/include/linux/usb.h b/include/linux/usb.h
-index 1913a13833f2..832997a9da0a 100644
---- a/include/linux/usb.h
-+++ b/include/linux/usb.h
-@@ -1171,6 +1171,7 @@ extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf);
-  *	post_reset method is called.
-  * @post_reset: Called by usb_reset_device() after the device
-  *	has been reset
-+ * @shutdown: Called at shut-down time to quiesce the device.
-  * @id_table: USB drivers use ID table to support hotplugging.
-  *	Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
-  *	or your driver's probe function will never get called.
-@@ -1222,6 +1223,8 @@ struct usb_driver {
- 	int (*pre_reset)(struct usb_interface *intf);
- 	int (*post_reset)(struct usb_interface *intf);
- 
-+	void (*shutdown)(struct usb_interface *intf);
-+
- 	const struct usb_device_id *id_table;
- 	const struct attribute_group **dev_groups;
- 
-diff --git a/lib/test_printf.c b/lib/test_printf.c
-index 69b6a5e177f2..a318bb72a165 100644
---- a/lib/test_printf.c
-+++ b/lib/test_printf.c
-@@ -745,18 +745,26 @@ static void __init fwnode_pointer(void)
- static void __init fourcc_pointer(void)
- {
- 	struct {
-+		char type;
- 		u32 code;
- 		char *str;
- 	} const try[] = {
--		{ 0x3231564e, "NV12 little-endian (0x3231564e)", },
--		{ 0xb231564e, "NV12 big-endian (0xb231564e)", },
--		{ 0x10111213, ".... little-endian (0x10111213)", },
--		{ 0x20303159, "Y10  little-endian (0x20303159)", },
-+		{ 'c', 0x3231564e, "NV12 little-endian (0x3231564e)", },
-+		{ 'c', 0xb231564e, "NV12 big-endian (0xb231564e)", },
-+		{ 'c', 0x10111213, ".... little-endian (0x10111213)", },
-+		{ 'c', 0x20303159, "Y10  little-endian (0x20303159)", },
-+		{ 'h', 0x67503030, "gP00 (0x67503030)", },
-+		{ 'r', 0x30305067, "gP00 (0x67503030)", },
-+		{ 'l', cpu_to_le32(0x67503030), "gP00 (0x67503030)", },
-+		{ 'b', cpu_to_be32(0x67503030), "gP00 (0x67503030)", },
- 	};
- 	unsigned int i;
- 
--	for (i = 0; i < ARRAY_SIZE(try); i++)
--		test(try[i].str, "%p4cc", &try[i].code);
-+	for (i = 0; i < ARRAY_SIZE(try); i++) {
-+		char fmt[] = { '%', 'p', '4', 'c', try[i].type, '\0' };
-+
-+		test(try[i].str, fmt, &try[i].code);
-+	}
- }
- 
- static void __init
-diff --git a/lib/vsprintf.c b/lib/vsprintf.c
-index cdd4e2314bfc..4feaea1815fa 100644
---- a/lib/vsprintf.c
-+++ b/lib/vsprintf.c
-@@ -1760,27 +1760,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
- 	char output[sizeof("0123 little-endian (0x01234567)")];
- 	char *p = output;
- 	unsigned int i;
-+	bool pix_fmt = false;
- 	u32 orig, val;
- 
--	if (fmt[1] != 'c' || fmt[2] != 'c')
-+	if (fmt[1] != 'c')
- 		return error_string(buf, end, "(%p4?)", spec);
- 
- 	if (check_pointer(&buf, end, fourcc, spec))
- 		return buf;
- 
- 	orig = get_unaligned(fourcc);
--	val = orig & ~BIT(31);
-+	switch (fmt[2]) {
-+	case 'h':
-+		val = orig;
-+		break;
-+	case 'r':
-+		val = orig = swab32(orig);
-+		break;
-+	case 'l':
-+		val = orig = le32_to_cpu(orig);
-+		break;
-+	case 'b':
-+		val = orig = be32_to_cpu(orig);
-+		break;
-+	case 'c':
-+		/* Pixel formats are printed LSB-first */
-+		val = swab32(orig & ~BIT(31));
-+		pix_fmt = true;
-+		break;
-+	default:
-+		return error_string(buf, end, "(%p4?)", spec);
-+	}
- 
- 	for (i = 0; i < sizeof(u32); i++) {
--		unsigned char c = val >> (i * 8);
-+		unsigned char c = val >> ((3 - i) * 8);
- 
- 		/* Print non-control ASCII characters as-is, dot otherwise */
- 		*p++ = isascii(c) && isprint(c) ? c : '.';
- 	}
- 
--	*p++ = ' ';
--	strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
--	p += strlen(p);
-+	if (pix_fmt) {
-+		*p++ = ' ';
-+		strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
-+		p += strlen(p);
-+	}
- 
- 	*p++ = ' ';
- 	*p++ = '(';
-@@ -2355,6 +2378,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr);
-  *       read the documentation (path below) first.
-  * - 'NF' For a netdev_features_t
-  * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value.
-+ * - '4c[hlbr]' Generic FourCC code.
-  * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with
-  *            a certain separator (' ' by default):
-  *              C colon
-diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
-index 2b812210b412..4c3a8cc6ef15 100755
---- a/scripts/checkpatch.pl
-+++ b/scripts/checkpatch.pl
-@@ -6909,7 +6909,7 @@ sub process {
- 					    ($extension eq "f" &&
- 					     defined $qualifier && $qualifier !~ /^w/) ||
- 					    ($extension eq "4" &&
--					     defined $qualifier && $qualifier !~ /^cc/)) {
-+					     defined $qualifier && $qualifier !~ /^c[chlbr]/)) {
- 						$bad_specifier = $specifier;
- 						last;
- 					}
--- 
-2.46.0.rc1
-
-From 5e342e16601d0e39334a97b24dcda3a4f086ad1a Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 15 Jul 2024 13:27:23 +0200
-Subject: [PATCH 11/11] zstd
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- include/linux/zstd.h                          |    2 +-
- include/linux/zstd_errors.h                   |   23 +-
- include/linux/zstd_lib.h                      |  850 +++++--
- lib/zstd/Makefile                             |    2 +-
- lib/zstd/common/allocations.h                 |   56 +
- lib/zstd/common/bits.h                        |  149 ++
- lib/zstd/common/bitstream.h                   |  127 +-
- lib/zstd/common/compiler.h                    |  134 +-
- lib/zstd/common/cpu.h                         |    3 +-
- lib/zstd/common/debug.c                       |    9 +-
- lib/zstd/common/debug.h                       |   34 +-
- lib/zstd/common/entropy_common.c              |   42 +-
- lib/zstd/common/error_private.c               |   12 +-
- lib/zstd/common/error_private.h               |   84 +-
- lib/zstd/common/fse.h                         |   94 +-
- lib/zstd/common/fse_decompress.c              |  130 +-
- lib/zstd/common/huf.h                         |  237 +-
- lib/zstd/common/mem.h                         |    3 +-
- lib/zstd/common/portability_macros.h          |   28 +-
- lib/zstd/common/zstd_common.c                 |   38 +-
- lib/zstd/common/zstd_deps.h                   |   16 +-
- lib/zstd/common/zstd_internal.h               |  109 +-
- lib/zstd/compress/clevels.h                   |    3 +-
- lib/zstd/compress/fse_compress.c              |   74 +-
- lib/zstd/compress/hist.c                      |    3 +-
- lib/zstd/compress/hist.h                      |    3 +-
- lib/zstd/compress/huf_compress.c              |  441 ++--
- lib/zstd/compress/zstd_compress.c             | 2111 ++++++++++++-----
- lib/zstd/compress/zstd_compress_internal.h    |  359 ++-
- lib/zstd/compress/zstd_compress_literals.c    |  155 +-
- lib/zstd/compress/zstd_compress_literals.h    |   25 +-
- lib/zstd/compress/zstd_compress_sequences.c   |    7 +-
- lib/zstd/compress/zstd_compress_sequences.h   |    3 +-
- lib/zstd/compress/zstd_compress_superblock.c  |  376 ++-
- lib/zstd/compress/zstd_compress_superblock.h  |    3 +-
- lib/zstd/compress/zstd_cwksp.h                |  169 +-
- lib/zstd/compress/zstd_double_fast.c          |  143 +-
- lib/zstd/compress/zstd_double_fast.h          |   17 +-
- lib/zstd/compress/zstd_fast.c                 |  596 +++--
- lib/zstd/compress/zstd_fast.h                 |    6 +-
- lib/zstd/compress/zstd_lazy.c                 |  732 +++---
- lib/zstd/compress/zstd_lazy.h                 |  138 +-
- lib/zstd/compress/zstd_ldm.c                  |   21 +-
- lib/zstd/compress/zstd_ldm.h                  |    3 +-
- lib/zstd/compress/zstd_ldm_geartab.h          |    3 +-
- lib/zstd/compress/zstd_opt.c                  |  497 ++--
- lib/zstd/compress/zstd_opt.h                  |   41 +-
- lib/zstd/decompress/huf_decompress.c          |  887 ++++---
- lib/zstd/decompress/zstd_ddict.c              |    9 +-
- lib/zstd/decompress/zstd_ddict.h              |    3 +-
- lib/zstd/decompress/zstd_decompress.c         |  358 ++-
- lib/zstd/decompress/zstd_decompress_block.c   |  708 +++---
- lib/zstd/decompress/zstd_decompress_block.h   |   10 +-
- .../decompress/zstd_decompress_internal.h     |    9 +-
- lib/zstd/decompress_sources.h                 |    2 +-
- lib/zstd/zstd_common_module.c                 |    5 +-
- lib/zstd/zstd_compress_module.c               |    2 +-
- lib/zstd/zstd_decompress_module.c             |    4 +-
- 58 files changed, 6577 insertions(+), 3531 deletions(-)
- create mode 100644 lib/zstd/common/allocations.h
- create mode 100644 lib/zstd/common/bits.h
-
-diff --git a/include/linux/zstd.h b/include/linux/zstd.h
-index 113408eef6ec..f109d49f43f8 100644
---- a/include/linux/zstd.h
-+++ b/include/linux/zstd.h
-@@ -1,6 +1,6 @@
- /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
-index 58b6dd45a969..6d5cf55f0bf3 100644
---- a/include/linux/zstd_errors.h
-+++ b/include/linux/zstd_errors.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -17,8 +18,17 @@
- 
- 
- /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
--#define ZSTDERRORLIB_VISIBILITY 
--#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
-+#define ZSTDERRORLIB_VISIBLE 
-+
-+#ifndef ZSTDERRORLIB_HIDDEN
-+#  if (__GNUC__ >= 4) && !defined(__MINGW32__)
-+#    define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
-+#  else
-+#    define ZSTDERRORLIB_HIDDEN
-+#  endif
-+#endif
-+
-+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
- 
- /*-*********************************************
-  *  Error codes list
-@@ -43,14 +53,17 @@ typedef enum {
-   ZSTD_error_frameParameter_windowTooLarge = 16,
-   ZSTD_error_corruption_detected = 20,
-   ZSTD_error_checksum_wrong      = 22,
-+  ZSTD_error_literals_headerWrong = 24,
-   ZSTD_error_dictionary_corrupted      = 30,
-   ZSTD_error_dictionary_wrong          = 32,
-   ZSTD_error_dictionaryCreation_failed = 34,
-   ZSTD_error_parameter_unsupported   = 40,
-+  ZSTD_error_parameter_combination_unsupported = 41,
-   ZSTD_error_parameter_outOfBound    = 42,
-   ZSTD_error_tableLog_tooLarge       = 44,
-   ZSTD_error_maxSymbolValue_tooLarge = 46,
-   ZSTD_error_maxSymbolValue_tooSmall = 48,
-+  ZSTD_error_stabilityCondition_notRespected = 50,
-   ZSTD_error_stage_wrong       = 60,
-   ZSTD_error_init_missing      = 62,
-   ZSTD_error_memory_allocation = 64,
-@@ -58,11 +71,15 @@ typedef enum {
-   ZSTD_error_dstSize_tooSmall = 70,
-   ZSTD_error_srcSize_wrong    = 72,
-   ZSTD_error_dstBuffer_null   = 74,
-+  ZSTD_error_noForwardProgress_destFull = 80,
-+  ZSTD_error_noForwardProgress_inputEmpty = 82,
-   /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
-   ZSTD_error_frameIndex_tooLarge = 100,
-   ZSTD_error_seekableIO          = 102,
-   ZSTD_error_dstBuffer_wrong     = 104,
-   ZSTD_error_srcBuffer_wrong     = 105,
-+  ZSTD_error_sequenceProducer_failed = 106,
-+  ZSTD_error_externalSequences_invalid = 107,
-   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
- } ZSTD_ErrorCode;
- 
-diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
-index 79d55465d5c1..6320fedcf8a4 100644
---- a/include/linux/zstd_lib.h
-+++ b/include/linux/zstd_lib.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -11,23 +12,42 @@
- #ifndef ZSTD_H_235446
- #define ZSTD_H_235446
- 
--/* ======   Dependency   ======*/
-+/* ======   Dependencies   ======*/
- #include <linux/limits.h>   /* INT_MAX */
- #include <linux/types.h>   /* size_t */
- 
- 
- /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
--#ifndef ZSTDLIB_VISIBLE
-+#define ZSTDLIB_VISIBLE 
-+
-+#ifndef ZSTDLIB_HIDDEN
- #  if (__GNUC__ >= 4) && !defined(__MINGW32__)
--#    define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
- #    define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
- #  else
--#    define ZSTDLIB_VISIBLE
- #    define ZSTDLIB_HIDDEN
- #  endif
- #endif
-+
- #define ZSTDLIB_API ZSTDLIB_VISIBLE
- 
-+/* Deprecation warnings :
-+ * Should these warnings be a problem, it is generally possible to disable them,
-+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
-+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
-+ */
-+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
-+#  define ZSTD_DEPRECATED(message) /* disable deprecation warnings */
-+#else
-+#  if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
-+#    define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
-+#  elif (__GNUC__ >= 3)
-+#    define ZSTD_DEPRECATED(message) __attribute__((deprecated))
-+#  else
-+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
-+#    define ZSTD_DEPRECATED(message)
-+#  endif
-+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
-+
- 
- /* *****************************************************************************
-   Introduction
-@@ -65,7 +85,7 @@
- /*------   Version   ------*/
- #define ZSTD_VERSION_MAJOR    1
- #define ZSTD_VERSION_MINOR    5
--#define ZSTD_VERSION_RELEASE  2
-+#define ZSTD_VERSION_RELEASE  6
- #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
- 
- /*! ZSTD_versionNumber() :
-@@ -107,7 +127,8 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
- ***************************************/
- /*! ZSTD_compress() :
-  *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
-- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-+ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
-+ *        enough space to successfully compress the data.
-  *  @return : compressed size written into `dst` (<= `dstCapacity),
-  *            or an error code if it fails (which can be tested using ZSTD_isError()). */
- ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
-@@ -156,7 +177,9 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
-  *  "empty", "unknown" and "error" results to the same return value (0),
-  *  while ZSTD_getFrameContentSize() gives them separate return values.
-  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
--ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-+ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
-+ZSTDLIB_API
-+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
- 
- /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
-  * `src` should point to the start of a ZSTD frame or skippable frame.
-@@ -168,8 +191,30 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
- 
- 
- /*======  Helper functions  ======*/
--#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
--ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
-+/* ZSTD_compressBound() :
-+ * maximum compressed size in worst case single-pass scenario.
-+ * When invoking `ZSTD_compress()` or any other one-pass compression function,
-+ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
-+ * as it eliminates one potential failure scenario,
-+ * aka not enough room in dst buffer to write the compressed frame.
-+ * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
-+ *        In which case, ZSTD_compressBound() will return an error code
-+ *        which can be tested using ZSTD_isError().
-+ *
-+ * ZSTD_COMPRESSBOUND() :
-+ * same as ZSTD_compressBound(), but as a macro.
-+ * It can be used to produce constants, which can be useful for static allocation,
-+ * for example to size a static array on stack.
-+ * Will produce constant value 0 if srcSize too large.
-+ */
-+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
-+#define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
-+/* ZSTD_isError() :
-+ * Most ZSTD_* functions returning a size_t value can be tested for error,
-+ * using ZSTD_isError().
-+ * @return 1 if error, 0 otherwise
-+ */
- ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
- ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
- ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
-@@ -183,7 +228,7 @@ ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compres
- /*= Compression context
-  *  When compressing many times,
-  *  it is recommended to allocate a context just once,
-- *  and re-use it for each successive compression operation.
-+ *  and reuse it for each successive compression operation.
-  *  This will make workload friendlier for system's memory.
-  *  Note : re-using context is just a speed / resource optimization.
-  *         It doesn't change the compression ratio, which remains identical.
-@@ -196,9 +241,9 @@ ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer *
- 
- /*! ZSTD_compressCCtx() :
-  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
-- *  Important : in order to behave similarly to `ZSTD_compress()`,
-- *  this function compresses at requested compression level,
-- *  __ignoring any other parameter__ .
-+ *  Important : in order to mirror `ZSTD_compress()` behavior,
-+ *  this function compresses at the requested compression level,
-+ *  __ignoring any other advanced parameter__ .
-  *  If any advanced parameter was set using the advanced API,
-  *  they will all be reset. Only `compressionLevel` remains.
-  */
-@@ -210,7 +255,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
- /*= Decompression context
-  *  When decompressing many times,
-  *  it is recommended to allocate a context only once,
-- *  and re-use it for each successive compression operation.
-+ *  and reuse it for each successive compression operation.
-  *  This will make workload friendlier for system's memory.
-  *  Use one context per thread for parallel execution. */
- typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-@@ -220,7 +265,7 @@ ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer *
- /*! ZSTD_decompressDCtx() :
-  *  Same as ZSTD_decompress(),
-  *  requires an allocated ZSTD_DCtx.
-- *  Compatible with sticky parameters.
-+ *  Compatible with sticky parameters (see below).
-  */
- ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
-                                        void* dst, size_t dstCapacity,
-@@ -236,12 +281,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
-  *   using ZSTD_CCtx_set*() functions.
-  *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
-  *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
-- *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
-+ *   __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
-  *
-  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
-  *
-  *   This API supersedes all other "advanced" API entry points in the experimental section.
-- *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
-+ *   In the future, we expect to remove API entry points from experimental which are redundant with this API.
-  */
- 
- 
-@@ -324,6 +369,19 @@ typedef enum {
-                               * The higher the value of selected strategy, the more complex it is,
-                               * resulting in stronger and slower compression.
-                               * Special: value 0 means "use default strategy". */
-+
-+    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
-+                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
-+                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
-+                                  * Note that it's not a guarantee, just a convergence target (default:0).
-+                                  * No target when targetCBlockSize == 0.
-+                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
-+                                  * when a client can make use of partial documents (a prominent example being Chrome).
-+                                  * Note: this parameter is stable since v1.5.6.
-+                                  * It was present as an experimental parameter in earlier versions,
-+                                  * but it's not recommended using it with earlier library versions
-+                                  * due to massive performance regressions.
-+                                  */
-     /* LDM mode parameters */
-     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
-                                      * This parameter is designed to improve compression ratio
-@@ -403,7 +461,6 @@ typedef enum {
-      * ZSTD_c_forceMaxWindow
-      * ZSTD_c_forceAttachDict
-      * ZSTD_c_literalCompressionMode
--     * ZSTD_c_targetCBlockSize
-      * ZSTD_c_srcSizeHint
-      * ZSTD_c_enableDedicatedDictSearch
-      * ZSTD_c_stableInBuffer
-@@ -412,6 +469,9 @@ typedef enum {
-      * ZSTD_c_validateSequences
-      * ZSTD_c_useBlockSplitter
-      * ZSTD_c_useRowMatchFinder
-+     * ZSTD_c_prefetchCDictTables
-+     * ZSTD_c_enableSeqProducerFallback
-+     * ZSTD_c_maxBlockSize
-      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-      * note : never ever use experimentalParam? names directly;
-      *        also, the enums values themselves are unstable and can still change.
-@@ -421,7 +481,7 @@ typedef enum {
-      ZSTD_c_experimentalParam3=1000,
-      ZSTD_c_experimentalParam4=1001,
-      ZSTD_c_experimentalParam5=1002,
--     ZSTD_c_experimentalParam6=1003,
-+     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
-      ZSTD_c_experimentalParam7=1004,
-      ZSTD_c_experimentalParam8=1005,
-      ZSTD_c_experimentalParam9=1006,
-@@ -430,7 +490,11 @@ typedef enum {
-      ZSTD_c_experimentalParam12=1009,
-      ZSTD_c_experimentalParam13=1010,
-      ZSTD_c_experimentalParam14=1011,
--     ZSTD_c_experimentalParam15=1012
-+     ZSTD_c_experimentalParam15=1012,
-+     ZSTD_c_experimentalParam16=1013,
-+     ZSTD_c_experimentalParam17=1014,
-+     ZSTD_c_experimentalParam18=1015,
-+     ZSTD_c_experimentalParam19=1016
- } ZSTD_cParameter;
- 
- typedef struct {
-@@ -493,7 +557,7 @@ typedef enum {
-  *                  They will be used to compress next frame.
-  *                  Resetting session never fails.
-  *  - The parameters : changes all parameters back to "default".
-- *                  This removes any reference to any dictionary too.
-+ *                  This also removes any reference to any dictionary or external sequence producer.
-  *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
-  *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
-  *  - Both : similar to resetting the session, followed by resetting parameters.
-@@ -502,11 +566,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
- 
- /*! ZSTD_compress2() :
-  *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
-+ *  (note that this entry point doesn't even expose a compression level parameter).
-  *  ZSTD_compress2() always starts a new frame.
-  *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
-  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
-  *  - The function is always blocking, returns when compression is completed.
-- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-+ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
-+ *        enough space to successfully compress the data, though it is possible it fails for other reasons.
-  * @return : compressed size written into `dst` (<= `dstCapacity),
-  *           or an error code if it fails (which can be tested using ZSTD_isError()).
-  */
-@@ -543,13 +609,17 @@ typedef enum {
-      * ZSTD_d_stableOutBuffer
-      * ZSTD_d_forceIgnoreChecksum
-      * ZSTD_d_refMultipleDDicts
-+     * ZSTD_d_disableHuffmanAssembly
-+     * ZSTD_d_maxBlockSize
-      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-      * note : never ever use experimentalParam? names directly
-      */
-      ZSTD_d_experimentalParam1=1000,
-      ZSTD_d_experimentalParam2=1001,
-      ZSTD_d_experimentalParam3=1002,
--     ZSTD_d_experimentalParam4=1003
-+     ZSTD_d_experimentalParam4=1003,
-+     ZSTD_d_experimentalParam5=1004,
-+     ZSTD_d_experimentalParam6=1005
- 
- } ZSTD_dParameter;
- 
-@@ -604,14 +674,14 @@ typedef struct ZSTD_outBuffer_s {
- *  A ZSTD_CStream object is required to track streaming operation.
- *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
- *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
--*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
-+*  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
- *
- *  For parallel execution, use one separate ZSTD_CStream per thread.
- *
- *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
- *
- *  Parameters are sticky : when starting a new compression on the same context,
--*  it will re-use the same sticky parameters as previous compression session.
-+*  it will reuse the same sticky parameters as previous compression session.
- *  When in doubt, it's recommended to fully initialize the context before usage.
- *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
- *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
-@@ -700,6 +770,11 @@ typedef enum {
-  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
-  *            Before starting a new compression job, or changing compression parameters,
-  *            it is required to fully flush internal buffers.
-+ *  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
-+ *          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
-+ *          In order to be re-employed after an error, a state must be reset,
-+ *          which can be done explicitly (ZSTD_CCtx_reset()),
-+ *          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
-  */
- ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                                          ZSTD_outBuffer* output,
-@@ -728,8 +803,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output
-  * This following is a legacy streaming API, available since v1.0+ .
-  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
-  * It is redundant, but remains fully supported.
-- * Streaming in combination with advanced parameters and dictionary compression
-- * can only be used through the new API.
-  ******************************************************************************/
- 
- /*!
-@@ -738,6 +811,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output
-  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
-  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
-+ *
-+ * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API
-+ * to compress with a dictionary.
-  */
- ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
- /*!
-@@ -758,7 +834,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
- *
- *  A ZSTD_DStream object is required to track streaming operations.
- *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
--*  ZSTD_DStream objects can be re-used multiple times.
-+*  ZSTD_DStream objects can be reused multiple times.
- *
- *  Use ZSTD_initDStream() to start a new decompression operation.
- * @return : recommended first input size
-@@ -788,13 +864,37 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer
- 
- /*===== Streaming decompression functions =====*/
- 
--/* This function is redundant with the advanced API and equivalent to:
-+/*! ZSTD_initDStream() :
-+ * Initialize/reset DStream state for new decompression operation.
-+ * Call before new decompression operation using same DStream.
-  *
-+ * Note : This function is redundant with the advanced API and equivalent to:
-  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
-  *     ZSTD_DCtx_refDDict(zds, NULL);
-  */
- ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
- 
-+/*! ZSTD_decompressStream() :
-+ * Streaming decompression function.
-+ * Call repetitively to consume full input updating it as necessary.
-+ * Function will update both input and output `pos` fields exposing current state via these fields:
-+ * - `input.pos < input.size`, some input remaining and caller should provide remaining input
-+ *   on the next call.
-+ * - `output.pos < output.size`, decoder finished and flushed all remaining buffers.
-+ * - `output.pos == output.size`, potentially uncflushed data present in the internal buffers,
-+ *   call ZSTD_decompressStream() again to flush remaining data to output.
-+ * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
-+ *
-+ * @return : 0 when a frame is completely decoded and fully flushed,
-+ *           or an error code, which can be tested using ZSTD_isError(),
-+ *           or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
-+ *
-+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
-+ *       It's UB to invoke `ZSTD_decompressStream()` on such a state.
-+ *       In order to re-use such a state, it must be first reset,
-+ *       which can be done explicitly (`ZSTD_DCtx_reset()`),
-+ *       or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
-+ */
- ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
- 
- ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
-@@ -913,7 +1013,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
-  *  If @return == 0, the dictID could not be decoded.
-  *  This could for one of the following reasons :
-  *  - The frame does not require a dictionary to be decoded (most common case).
-- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
-+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information.
-  *    Note : this use case also happens when using a non-conformant dictionary.
-  *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
-  *  - This is not a Zstandard frame.
-@@ -925,9 +1025,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
-  * Advanced dictionary and prefix API (Requires v1.4.0+)
-  *
-  * This API allows dictionaries to be used with ZSTD_compress2(),
-- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
-- * only reset with the context is reset with ZSTD_reset_parameters or
-- * ZSTD_reset_session_and_parameters. Prefixes are single-use.
-+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
-+ * Dictionaries are sticky, they remain valid when same context is reused,
-+ * they only reset when the context is reset
-+ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
-+ * In contrast, Prefixes are single-use.
-  ******************************************************************************/
- 
- 
-@@ -937,8 +1039,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
-  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
-  *           meaning "return to no-dictionary mode".
-- *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
-- *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
-+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames,
-+ *           until parameters are reset, a new dictionary is loaded, or the dictionary
-+ *           is explicitly invalidated by loading a NULL dictionary.
-  *  Note 2 : Loading a dictionary involves building tables.
-  *           It's also a CPU consuming operation, with non-negligible impact on latency.
-  *           Tables are dependent on compression parameters, and for this reason,
-@@ -947,11 +1050,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
-  *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
-  *           In such a case, dictionary buffer must outlive its users.
-  *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
-- *           to precisely select how dictionary content must be interpreted. */
-+ *           to precisely select how dictionary content must be interpreted.
-+ *  Note 5 : This method does not benefit from LDM (long distance mode).
-+ *           If you want to employ LDM on some large dictionary content,
-+ *           prefer employing ZSTD_CCtx_refPrefix() described below.
-+ */
- ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
- 
- /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
-- *  Reference a prepared dictionary, to be used for all next compressed frames.
-+ *  Reference a prepared dictionary, to be used for all future compressed frames.
-  *  Note that compression parameters are enforced from within CDict,
-  *  and supersede any compression parameter previously set within CCtx.
-  *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
-@@ -970,6 +1077,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
-  *  Decompression will need same prefix to properly regenerate data.
-  *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
-  *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
-+ *  This method is compatible with LDM (long distance mode).
-  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
-  *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
-@@ -986,9 +1094,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
-                                  const void* prefix, size_t prefixSize);
- 
- /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
-- *  Create an internal DDict from dict buffer,
-- *  to be used to decompress next frames.
-- *  The dictionary remains valid for all future frames, until explicitly invalidated.
-+ *  Create an internal DDict from dict buffer, to be used to decompress all future frames.
-+ *  The dictionary remains valid for all future frames, until explicitly invalidated, or
-+ *  a new dictionary is loaded.
-  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
-  *            meaning "return to no-dictionary mode".
-@@ -1012,9 +1120,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
-  *  The memory for the table is allocated on the first call to refDDict, and can be
-  *  freed with ZSTD_freeDCtx().
-  *
-+ *  If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary
-+ *  will be managed, and referencing a dictionary effectively "discards" any previous one.
-+ *
-  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
-- *  Note 1 : Currently, only one dictionary can be managed.
-- *           Referencing a new dictionary effectively "discards" any previous one.
-  *  Special: referencing a NULL DDict means "return to no-dictionary mode".
-  *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
-  */
-@@ -1071,24 +1180,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
- #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
- #endif
- 
--/* Deprecation warnings :
-- * Should these warnings be a problem, it is generally possible to disable them,
-- * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
-- * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
-- */
--#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
--#  define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API  /* disable deprecation warnings */
--#else
--#  if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
--#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message)))
--#  elif (__GNUC__ >= 3)
--#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated))
--#  else
--#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
--#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API
--#  endif
--#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
--
- /* **************************************************************************************
-  *   experimental API (static linking only)
-  ****************************************************************************************
-@@ -1123,6 +1214,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
- #define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
- #define ZSTD_STRATEGY_MIN        ZSTD_fast
- #define ZSTD_STRATEGY_MAX        ZSTD_btultra2
-+#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */
- 
- 
- #define ZSTD_OVERLAPLOG_MIN       0
-@@ -1146,7 +1238,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
- #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
- 
- /* Advanced parameter bounds */
--#define ZSTD_TARGETCBLOCKSIZE_MIN   64
-+#define ZSTD_TARGETCBLOCKSIZE_MIN   1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
- #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
- #define ZSTD_SRCSIZEHINT_MIN        0
- #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
-@@ -1303,7 +1395,7 @@ typedef enum {
- } ZSTD_paramSwitch_e;
- 
- /* *************************************
--*  Frame size functions
-+*  Frame header and size functions
- ***************************************/
- 
- /*! ZSTD_findDecompressedSize() :
-@@ -1350,29 +1442,122 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size
-  *           or an error code (if srcSize is too small) */
- ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
- 
-+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
-+typedef struct {
-+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
-+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
-+    unsigned blockSizeMax;
-+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
-+    unsigned headerSize;
-+    unsigned dictID;
-+    unsigned checksumFlag;
-+    unsigned _reserved1;
-+    unsigned _reserved2;
-+} ZSTD_frameHeader;
-+
-+/*! ZSTD_getFrameHeader() :
-+ *  decode Frame Header, or requires larger `srcSize`.
-+ * @return : 0, `zfhPtr` is correctly filled,
-+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
-+ *           or an error code, which can be tested using ZSTD_isError() */
-+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
-+/*! ZSTD_getFrameHeader_advanced() :
-+ *  same as ZSTD_getFrameHeader(),
-+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-+
-+/*! ZSTD_decompressionMargin() :
-+ * Zstd supports in-place decompression, where the input and output buffers overlap.
-+ * In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
-+ * and the input buffer must be at the end of the output buffer.
-+ *
-+ *  _______________________ Output Buffer ________________________
-+ * |                                                              |
-+ * |                                        ____ Input Buffer ____|
-+ * |                                       |                      |
-+ * v                                       v                      v
-+ * |---------------------------------------|-----------|----------|
-+ * ^                                                   ^          ^
-+ * |___________________ Output_Size ___________________|_ Margin _|
-+ *
-+ * NOTE: See also ZSTD_DECOMPRESSION_MARGIN().
-+ * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or
-+ * ZSTD_decompressDCtx().
-+ * NOTE: This function supports multi-frame input.
-+ *
-+ * @param src The compressed frame(s)
-+ * @param srcSize The size of the compressed frame(s)
-+ * @returns The decompression margin or an error that can be checked with ZSTD_isError().
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize);
-+
-+/*! ZSTD_DECOMPRESS_MARGIN() :
-+ * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from
-+ * the compressed frame, compute it from the original size and the blockSizeLog.
-+ * See ZSTD_decompressionMargin() for details.
-+ *
-+ * WARNING: This macro does not support multi-frame input, the input must be a single
-+ * zstd frame. If you need that support use the function, or implement it yourself.
-+ *
-+ * @param originalSize The original uncompressed size of the data.
-+ * @param blockSize    The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX).
-+ *                     Unless you explicitly set the windowLog smaller than
-+ *                     ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX.
-+ */
-+#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)(                                              \
-+        ZSTD_FRAMEHEADERSIZE_MAX                                                              /* Frame header */ + \
-+        4                                                                                         /* checksum */ + \
-+        ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \
-+        (blockSize)                                                                    /* One block of margin */   \
-+    ))
-+
- typedef enum {
-   ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
-   ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
- } ZSTD_sequenceFormat_e;
- 
-+/*! ZSTD_sequenceBound() :
-+ * `srcSize` : size of the input buffer
-+ *  @return : upper-bound for the number of sequences that can be generated
-+ *            from a buffer of srcSize bytes
-+ *
-+ *  note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence).
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
-+
- /*! ZSTD_generateSequences() :
-- * Generate sequences using ZSTD_compress2, given a source buffer.
-+ * WARNING: This function is meant for debugging and informational purposes ONLY!
-+ * Its implementation is flawed, and it will be deleted in a future version.
-+ * It is not guaranteed to succeed, as there are several cases where it will give
-+ * up and fail. You should NOT use this function in production code.
-+ *
-+ * This function is deprecated, and will be removed in a future version.
-+ *
-+ * Generate sequences using ZSTD_compress2(), given a source buffer.
-+ *
-+ * @param zc The compression context to be used for ZSTD_compress2(). Set any
-+ *           compression parameters you need on this context.
-+ * @param outSeqs The output sequences buffer of size @p outSeqsSize
-+ * @param outSeqsSize The size of the output sequences buffer.
-+ *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
-+ *                    of sequences that can be generated.
-+ * @param src The source buffer to generate sequences from of size @p srcSize.
-+ * @param srcSize The size of the source buffer.
-  *
-  * Each block will end with a dummy sequence
-  * with offset == 0, matchLength == 0, and litLength == length of last literals.
-  * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
-  * simply acts as a block delimiter.
-  *
-- * zc can be used to insert custom compression params.
-- * This function invokes ZSTD_compress2
-- *
-- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
-- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
-- * @return : number of sequences generated
-+ * @returns The number of sequences generated, necessarily less than
-+ *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
-+ *          with ZSTD_isError().
-  */
--
--ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
--                                          size_t outSeqsSize, const void* src, size_t srcSize);
-+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
-+ZSTDLIB_STATIC_API size_t
-+ZSTD_generateSequences(ZSTD_CCtx* zc,
-+                       ZSTD_Sequence* outSeqs, size_t outSeqsSize,
-+                       const void* src, size_t srcSize);
- 
- /*! ZSTD_mergeBlockDelimiters() :
-  * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
-@@ -1388,7 +1573,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* o
- ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
- 
- /*! ZSTD_compressSequences() :
-- * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
-+ * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
-+ * @src contains the entire input (not just the literals).
-+ * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
-  * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
-  * The entire source is compressed into a single frame.
-  *
-@@ -1413,11 +1600,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
-  * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
-  * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
-  *         and cannot emit an RLE block that disagrees with the repcode history
-- * @return : final compressed size or a ZSTD error.
-+ * @return : final compressed size, or a ZSTD error code.
-  */
--ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
--                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
--                                  const void* src, size_t srcSize);
-+ZSTDLIB_STATIC_API size_t
-+ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
-+                        const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-+                        const void* src, size_t srcSize);
- 
- 
- /*! ZSTD_writeSkippableFrame() :
-@@ -1464,48 +1652,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
- /*! ZSTD_estimate*() :
-  *  These functions make it possible to estimate memory usage
-  *  of a future {D,C}Ctx, before its creation.
-+ *  This is useful in combination with ZSTD_initStatic(),
-+ *  which makes it possible to employ a static buffer for ZSTD_CCtx* state.
-  *
-  *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
-- *  for any compression level up to selected one.
-- *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
-- *         does not include space for a window buffer.
-- *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
-+ *  to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
-+ *  associated with any compression level up to max specified one.
-  *  The estimate will assume the input may be arbitrarily large,
-  *  which is the worst case.
-  *
-+ *  Note that the size estimation is specific for one-shot compression,
-+ *  it is not valid for streaming (see ZSTD_estimateCStreamSize*())
-+ *  nor other potential ways of using a ZSTD_CCtx* state.
-+ *
-  *  When srcSize can be bound by a known and rather "small" value,
-- *  this fact can be used to provide a tighter estimation
-- *  because the CCtx compression context will need less memory.
-- *  This tighter estimation can be provided by more advanced functions
-+ *  this knowledge can be used to provide a tighter budget estimation
-+ *  because the ZSTD_CCtx* state will need less memory for small inputs.
-+ *  This tighter estimation can be provided by employing more advanced functions
-  *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
-  *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
-  *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
-  *
-- *  Note 2 : only single-threaded compression is supported.
-+ *  Note : only single-threaded compression is supported.
-  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
-  */
--ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
-+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
- 
- /*! ZSTD_estimateCStreamSize() :
-- *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
-- *  It will also consider src size to be arbitrarily "large", which is worst case.
-+ *  ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
-+ *  using any compression level up to the max specified one.
-+ *  It will also consider src size to be arbitrarily "large", which is a worst case scenario.
-  *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
-  *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
-  *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
-  *  Note : CStream size estimation is only correct for single-threaded compression.
-- *  ZSTD_DStream memory budget depends on window Size.
-+ *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
-+ *  Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
-+ *  Size estimates assume that no external sequence producer is registered.
-+ *
-+ *  ZSTD_DStream memory budget depends on frame's window Size.
-  *  This information can be passed manually, using ZSTD_estimateDStreamSize,
-  *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
-+ *  Any frame requesting a window size larger than max specified one will be rejected.
-  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
-  *         an internal ?Dict will be created, which additional size is not estimated here.
-- *         In this case, get total size by adding ZSTD_estimate?DictSize */
--ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
-+ *         In this case, get total size by adding ZSTD_estimate?DictSize
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
--ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
-+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
- ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
- 
- /*! ZSTD_estimate?DictSize() :
-@@ -1649,22 +1848,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
-  *  This function never fails (wide contract) */
- ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
- 
-+/*! ZSTD_CCtx_setCParams() :
-+ *  Set all parameters provided within @p cparams into the working @p cctx.
-+ *  Note : if modifying parameters during compression (MT mode only),
-+ *         note that changes to the .windowLog parameter will be ignored.
-+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
-+ *         On failure, no parameters are updated.
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
-+
-+/*! ZSTD_CCtx_setFParams() :
-+ *  Set all parameters provided within @p fparams into the working @p cctx.
-+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
-+
-+/*! ZSTD_CCtx_setParams() :
-+ *  Set all parameters provided within @p params into the working @p cctx.
-+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
-+ */
-+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
-+
- /*! ZSTD_compress_advanced() :
-  *  Note : this function is now DEPRECATED.
-  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
-  *  This prototype will generate compilation warnings. */
- ZSTD_DEPRECATED("use ZSTD_compress2")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
--                                          void* dst, size_t dstCapacity,
--                                    const void* src, size_t srcSize,
--                                    const void* dict,size_t dictSize,
--                                          ZSTD_parameters params);
-+                              void* dst, size_t dstCapacity,
-+                        const void* src, size_t srcSize,
-+                        const void* dict,size_t dictSize,
-+                              ZSTD_parameters params);
- 
- /*! ZSTD_compress_usingCDict_advanced() :
-  *  Note : this function is now DEPRECATED.
-  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
-  *  This prototype will generate compilation warnings. */
- ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
-                                               void* dst, size_t dstCapacity,
-                                         const void* src, size_t srcSize,
-@@ -1737,11 +1959,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
-  */
- #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
- 
--/* Tries to fit compressed block size to be around targetCBlockSize.
-- * No target when targetCBlockSize == 0.
-- * There is no guarantee on compressed block size (default:0) */
--#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
--
- /* User's best guess of source size.
-  * Hint is not valid when srcSizeHint == 0.
-  * There is no guarantee that hint is close to actual source size,
-@@ -1808,13 +2025,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
-  * Experimental parameter.
-  * Default is 0 == disabled. Set to 1 to enable.
-  *
-- * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
-- * between calls, except for the modifications that zstd makes to pos (the
-- * caller must not modify pos). This is checked by the compressor, and
-- * compression will fail if it ever changes. This means the only flush
-- * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
-- * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
-- * MUST not be modified during compression or you will get data corruption.
-+ * Tells the compressor that input data presented with ZSTD_inBuffer
-+ * will ALWAYS be the same between calls.
-+ * Technically, the @src pointer must never be changed,
-+ * and the @pos field can only be updated by zstd.
-+ * However, it's possible to increase the @size field,
-+ * allowing scenarios where more data can be appended after compressions starts.
-+ * These conditions are checked by the compressor,
-+ * and compression will fail if they are not respected.
-+ * Also, data in the ZSTD_inBuffer within the range [src, src + pos)
-+ * MUST not be modified during compression or it will result in data corruption.
-  *
-  * When this flag is enabled zstd won't allocate an input window buffer,
-  * because the user guarantees it can reference the ZSTD_inBuffer until
-@@ -1822,18 +2042,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
-  * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
-  * avoid the memcpy() from the input buffer to the input window buffer.
-  *
-- * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
-- * That means this flag cannot be used with ZSTD_compressStream().
-- *
-  * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
-  * this flag is ALWAYS memory safe, and will never access out-of-bounds
-- * memory. However, compression WILL fail if you violate the preconditions.
-+ * memory. However, compression WILL fail if conditions are not respected.
-  *
-- * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
-- * not be modified during compression or you will get data corruption. This
-- * is because zstd needs to reference data in the ZSTD_inBuffer to find
-+ * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST
-+ * not be modified during compression or it will result in data corruption.
-+ * This is because zstd needs to reference data in the ZSTD_inBuffer to find
-  * matches. Normally zstd maintains its own window buffer for this purpose,
-- * but passing this flag tells zstd to use the user provided buffer.
-+ * but passing this flag tells zstd to rely on user provided buffer instead.
-  */
- #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
- 
-@@ -1878,7 +2095,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
-  * Without validation, providing a sequence that does not conform to the zstd spec will cause
-  * undefined behavior, and may produce a corrupted block.
-  *
-- * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
-+ * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
-  * specifics regarding offset/matchlength requirements) then the function will bail out and
-  * return an error.
-  *
-@@ -1928,6 +2145,79 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
-  */
- #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
- 
-+/* ZSTD_c_prefetchCDictTables
-+ * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
-+ *
-+ * In some situations, zstd uses CDict tables in-place rather than copying them
-+ * into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
-+ * In such situations, compression speed is seriously impacted when CDict tables are
-+ * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
-+ * when they are used in-place.
-+ *
-+ * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
-+ * For sufficiently large inputs, zstd will by default memcpy() CDict tables
-+ * into the working context, so there is no need to prefetch. This parameter is
-+ * targeted at a middle range of input sizes, where a prefetch is cheap enough to be
-+ * useful but memcpy() is too expensive. The exact range of input sizes where this
-+ * makes sense is best determined by careful experimentation.
-+ *
-+ * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
-+ * but in the future zstd may conditionally enable this feature via an auto-detection
-+ * heuristic for cold CDicts.
-+ * Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
-+ */
-+#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
-+
-+/* ZSTD_c_enableSeqProducerFallback
-+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
-+ *
-+ * Controls whether zstd will fall back to an internal sequence producer if an
-+ * external sequence producer is registered and returns an error code. This fallback
-+ * is block-by-block: the internal sequence producer will only be called for blocks
-+ * where the external sequence producer returns an error code. Fallback parsing will
-+ * follow any other cParam settings, such as compression level, the same as in a
-+ * normal (fully-internal) compression operation.
-+ *
-+ * The user is strongly encouraged to read the full Block-Level Sequence Producer API
-+ * documentation (below) before setting this parameter. */
-+#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
-+
-+/* ZSTD_c_maxBlockSize
-+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
-+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
-+ *
-+ * This parameter can be used to set an upper bound on the blocksize
-+ * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
-+ * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
-+ * compressBound() inaccurate). Only currently meant to be used for testing.
-+ *
-+ */
-+#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
-+
-+/* ZSTD_c_searchForExternalRepcodes
-+ * This parameter affects how zstd parses external sequences, such as sequences
-+ * provided through the compressSequences() API or from an external block-level
-+ * sequence producer.
-+ *
-+ * If set to ZSTD_ps_enable, the library will check for repeated offsets in
-+ * external sequences, even if those repcodes are not explicitly indicated in
-+ * the "rep" field. Note that this is the only way to exploit repcode matches
-+ * while using compressSequences() or an external sequence producer, since zstd
-+ * currently ignores the "rep" field of external sequences.
-+ *
-+ * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
-+ * external sequences, regardless of whether the "rep" field has been set. This
-+ * reduces sequence compression overhead by about 25% while sacrificing some
-+ * compression ratio.
-+ *
-+ * The default value is ZSTD_ps_auto, for which the library will enable/disable
-+ * based on compression level.
-+ *
-+ * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
-+ * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
-+ */
-+#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
-+
- /*! ZSTD_CCtx_getParameter() :
-  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
-  *  and store it into int* value.
-@@ -2084,7 +2374,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
-  * in the range [dst, dst + pos) MUST not be modified during decompression
-  * or you will get data corruption.
-  *
-- * When this flags is enabled zstd won't allocate an output buffer, because
-+ * When this flag is enabled zstd won't allocate an output buffer, because
-  * it can write directly to the ZSTD_outBuffer, but it will still allocate
-  * an input buffer large enough to fit any compressed block. This will also
-  * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
-@@ -2137,6 +2427,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
-  */
- #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
- 
-+/* ZSTD_d_disableHuffmanAssembly
-+ * Set to 1 to disable the Huffman assembly implementation.
-+ * The default value is 0, which allows zstd to use the Huffman assembly
-+ * implementation if available.
-+ *
-+ * This parameter can be used to disable Huffman assembly at runtime.
-+ * If you want to disable it at compile time you can define the macro
-+ * ZSTD_DISABLE_ASM.
-+ */
-+#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
-+
-+/* ZSTD_d_maxBlockSize
-+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
-+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
-+ *
-+ * Forces the decompressor to reject blocks whose content size is
-+ * larger than the configured maxBlockSize. When maxBlockSize is
-+ * larger than the windowSize, the windowSize is used instead.
-+ * This saves memory on the decoder when you know all blocks are small.
-+ *
-+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
-+ *
-+ * WARNING: This causes the decoder to reject otherwise valid frames
-+ * that have block sizes larger than the configured maxBlockSize.
-+ */
-+#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
-+
- 
- /*! ZSTD_DCtx_setFormat() :
-  *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
-@@ -2145,6 +2462,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
-  *  such ZSTD_f_zstd1_magicless for example.
-  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
- ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
- 
- /*! ZSTD_decompressStream_simpleArgs() :
-@@ -2181,6 +2499,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
-  * This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
-                          int compressionLevel,
-                          unsigned long long pledgedSrcSize);
-@@ -2198,17 +2517,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
-  * This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
-                      const void* dict, size_t dictSize,
-                            int compressionLevel);
- 
- /*! ZSTD_initCStream_advanced() :
-- * This function is DEPRECATED, and is approximately equivalent to:
-+ * This function is DEPRECATED, and is equivalent to:
-  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-- *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
-- *     for ((param, value) : params) {
-- *         ZSTD_CCtx_setParameter(zcs, param, value);
-- *     }
-+ *     ZSTD_CCtx_setParams(zcs, params);
-  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
-  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
-  *
-@@ -2218,6 +2535,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
-  * This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
-                     const void* dict, size_t dictSize,
-                           ZSTD_parameters params,
-@@ -2232,15 +2550,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
-  * This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
- 
- /*! ZSTD_initCStream_usingCDict_advanced() :
-- *   This function is DEPRECATED, and is approximately equivalent to:
-+ *   This function is DEPRECATED, and is equivalent to:
-  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-- *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
-- *     for ((fParam, value) : fParams) {
-- *         ZSTD_CCtx_setParameter(zcs, fParam, value);
-- *     }
-+ *     ZSTD_CCtx_setFParams(zcs, fParams);
-  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
-  *     ZSTD_CCtx_refCDict(zcs, cdict);
-  *
-@@ -2250,6 +2566,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
-  * This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-                                const ZSTD_CDict* cdict,
-                                      ZSTD_frameParameters fParams,
-@@ -2264,7 +2581,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-  *       explicitly specified.
-  *
-  *  start a new frame, using same parameters from previous frame.
-- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
-+ *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
-  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
-  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
-  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
-@@ -2274,6 +2591,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-  *  This prototype will generate compilation warnings.
-  */
- ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
- 
- 
-@@ -2319,8 +2637,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
-  *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
-  *
-  * note: no dictionary will be used if dict == NULL or dictSize < 8
-- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
-  */
-+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions")
- ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
- 
- /*!
-@@ -2330,8 +2648,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const vo
-  *     ZSTD_DCtx_refDDict(zds, ddict);
-  *
-  * note : ddict is referenced, it must outlive decompression session
-- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
-  */
-+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions")
- ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
- 
- /*!
-@@ -2339,18 +2657,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z
-  *
-  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
-  *
-- * re-use decompression parameters from previous init; saves dictionary loading
-- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
-+ * reuse decompression parameters from previous init; saves dictionary loading
-  */
-+ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
- ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
- 
- 
-+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
-+ *
-+ * *** OVERVIEW ***
-+ * The Block-Level Sequence Producer API allows users to provide their own custom
-+ * sequence producer which libzstd invokes to process each block. The produced list
-+ * of sequences (literals and matches) is then post-processed by libzstd to produce
-+ * valid compressed blocks.
-+ *
-+ * This block-level offload API is a more granular complement of the existing
-+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
-+ * an easier migration story for applications already integrated with libzstd: the
-+ * user application continues to invoke the same compression functions
-+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
-+ * from the specific advantages of the external sequence producer. For example,
-+ * the sequence producer could be tuned to take advantage of known characteristics
-+ * of the input, to offer better speed / ratio, or could leverage hardware
-+ * acceleration not available within libzstd itself.
-+ *
-+ * See contrib/externalSequenceProducer for an example program employing the
-+ * Block-Level Sequence Producer API.
-+ *
-+ * *** USAGE ***
-+ * The user is responsible for implementing a function of type
-+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
-+ * arguments to the user-provided function:
-+ *
-+ *   - sequenceProducerState: a pointer to a user-managed state for the sequence
-+ *     producer.
-+ *
-+ *   - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
-+ *     outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
-+ *     backing outSeqs is managed by the CCtx.
-+ *
-+ *   - src, srcSize: an input buffer for the sequence producer to parse.
-+ *     srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
-+ *
-+ *   - dict, dictSize: a history buffer, which may be empty, which the sequence
-+ *     producer may reference as it parses the src buffer. Currently, zstd will
-+ *     always pass dictSize == 0 into external sequence producers, but this will
-+ *     change in the future.
-+ *
-+ *   - compressionLevel: a signed integer representing the zstd compression level
-+ *     set by the user for the current operation. The sequence producer may choose
-+ *     to use this information to change its compression strategy and speed/ratio
-+ *     tradeoff. Note: the compression level does not reflect zstd parameters set
-+ *     through the advanced API.
-+ *
-+ *   - windowSize: a size_t representing the maximum allowed offset for external
-+ *     sequences. Note that sequence offsets are sometimes allowed to exceed the
-+ *     windowSize if a dictionary is present, see doc/zstd_compression_format.md
-+ *     for details.
-+ *
-+ * The user-provided function shall return a size_t representing the number of
-+ * sequences written to outSeqs. This return value will be treated as an error
-+ * code if it is greater than outSeqsCapacity. The return value must be non-zero
-+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
-+ * for convenience, but any value greater than outSeqsCapacity will be treated as
-+ * an error code.
-+ *
-+ * If the user-provided function does not return an error code, the sequences
-+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
-+ * occur if the parse is not valid. A parse is defined to be valid if the
-+ * following conditions hold:
-+ *   - The sum of matchLengths and literalLengths must equal srcSize.
-+ *   - All sequences in the parse, except for the final sequence, must have
-+ *     matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
-+ *     matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
-+ *   - All offsets must respect the windowSize parameter as specified in
-+ *     doc/zstd_compression_format.md.
-+ *   - If the final sequence has matchLength == 0, it must also have offset == 0.
-+ *
-+ * zstd will only validate these conditions (and fail compression if they do not
-+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
-+ * validation has a performance cost.
-+ *
-+ * If the user-provided function returns an error, zstd will either fall back
-+ * to an internal sequence producer or fail the compression operation. The user can
-+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
-+ * cParam. Fallback compression will follow any other cParam settings, such as
-+ * compression level, the same as in a normal compression operation.
-+ *
-+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
-+ * function by calling
-+ *         ZSTD_registerSequenceProducer(cctx,
-+ *                                       sequenceProducerState,
-+ *                                       sequenceProducer)
-+ * This setting will persist until the next parameter reset of the CCtx.
-+ *
-+ * The sequenceProducerState must be initialized by the user before calling
-+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
-+ * sequenceProducerState.
-+ *
-+ * *** LIMITATIONS ***
-+ * This API is compatible with all zstd compression APIs which respect advanced parameters.
-+ * However, there are three limitations:
-+ *
-+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
-+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
-+ * external sequence producer.
-+ *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
-+ *     cases (see its documentation for details). Users must explicitly set
-+ *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
-+ *     sequence producer is registered.
-+ *   - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
-+ *     whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
-+ *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
-+ *     Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
-+ *
-+ * Second, history buffers are not currently supported. Concretely, zstd will always pass
-+ * dictSize == 0 to the external sequence producer (for now). This has two implications:
-+ *   - Dictionaries are not currently supported. Compression will *not* fail if the user
-+ *     references a dictionary, but the dictionary won't have any effect.
-+ *   - Stream history is not currently supported. All advanced compression APIs, including
-+ *     streaming APIs, work with external sequence producers, but each block is treated as
-+ *     an independent chunk without history from previous blocks.
-+ *
-+ * Third, multi-threading within a single compression is not currently supported. In other words,
-+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
-+ * Multi-threading across compressions is fine: simply create one CCtx per thread.
-+ *
-+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to
-+ * overcoming them. It is purely a question of engineering effort.
-+ */
-+
-+#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
-+
-+typedef size_t (*ZSTD_sequenceProducer_F) (
-+  void* sequenceProducerState,
-+  ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
-+  const void* src, size_t srcSize,
-+  const void* dict, size_t dictSize,
-+  int compressionLevel,
-+  size_t windowSize
-+);
-+
-+/*! ZSTD_registerSequenceProducer() :
-+ * Instruct zstd to use a block-level external sequence producer function.
-+ *
-+ * The sequenceProducerState must be initialized by the caller, and the caller is
-+ * responsible for managing its lifetime. This parameter is sticky across
-+ * compressions. It will remain set until the user explicitly resets compression
-+ * parameters.
-+ *
-+ * Sequence producer registration is considered to be an "advanced parameter",
-+ * part of the "advanced API". This means it will only have an effect on compression
-+ * APIs which respect advanced parameters, such as compress2() and compressStream2().
-+ * Older compression APIs such as compressCCtx(), which predate the introduction of
-+ * "advanced parameters", will ignore any external sequence producer setting.
-+ *
-+ * The sequence producer can be "cleared" by registering a NULL function pointer. This
-+ * removes all limitations described above in the "LIMITATIONS" section of the API docs.
-+ *
-+ * The user is strongly encouraged to read the full API documentation (above) before
-+ * calling this function. */
-+ZSTDLIB_STATIC_API void
-+ZSTD_registerSequenceProducer(
-+  ZSTD_CCtx* cctx,
-+  void* sequenceProducerState,
-+  ZSTD_sequenceProducer_F sequenceProducer
-+);
-+
-+/*! ZSTD_CCtxParams_registerSequenceProducer() :
-+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
-+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
-+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
-+ *
-+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
-+ * is required, then this function is for you. Otherwise, you probably don't need it.
-+ *
-+ * See tests/zstreamtest.c for example usage. */
-+ZSTDLIB_STATIC_API void
-+ZSTD_CCtxParams_registerSequenceProducer(
-+  ZSTD_CCtx_params* params,
-+  void* sequenceProducerState,
-+  ZSTD_sequenceProducer_F sequenceProducer
-+);
-+
-+
- /* *******************************************************************
--*  Buffer-less and synchronous inner streaming functions
-+*  Buffer-less and synchronous inner streaming functions (DEPRECATED)
-+*
-+*  This API is deprecated, and will be removed in a future version.
-+*  It allows streaming (de)compression with user allocated buffers.
-+*  However, it is hard to use, and not as well tested as the rest of
-+*  our API.
- *
--*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
--*  But it's also a complex one, with several restrictions, documented below.
--*  Prefer normal streaming API for an easier experience.
-+*  Please use the normal streaming API instead: ZSTD_compressStream2,
-+*  and ZSTD_decompressStream.
-+*  If there is functionality that you need, but it doesn't provide,
-+*  please open an issue on our GitHub.
- ********************************************************************* */
- 
- /*
-@@ -2358,11 +2860,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
- 
-   A ZSTD_CCtx object is required to track streaming operations.
-   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
--  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
-+  ZSTD_CCtx object can be reused multiple times within successive compression operations.
- 
-   Start by initializing a context.
-   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
--  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
- 
-   Then, consume your input using ZSTD_compressContinue().
-   There are some important considerations to keep in mind when using this advanced function :
-@@ -2380,36 +2881,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
-   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
-   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
- 
--  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
-+  `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
- */
- 
- /*=====   Buffer-less streaming compression functions  =====*/
-+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
--ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
- 
-+ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
-+ZSTDLIB_STATIC_API
-+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
-+
-+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
- /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
- ZSTD_DEPRECATED("use advanced API to access custom parameters")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
- ZSTD_DEPRECATED("use advanced API to access custom parameters")
-+ZSTDLIB_STATIC_API
- size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
- /*
-   Buffer-less streaming decompression (synchronous mode)
- 
-   A ZSTD_DCtx object is required to track streaming operations.
-   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
--  A ZSTD_DCtx object can be re-used multiple times.
-+  A ZSTD_DCtx object can be reused multiple times.
- 
-   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
-   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
-   Data fragment must be large enough to ensure successful decoding.
-  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
--  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
--           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
-+  result  : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
-+           >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
-            errorCode, which can be tested using ZSTD_isError().
- 
-   It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
-@@ -2428,7 +2939,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
- 
-   The most memory efficient way is to use a round buffer of sufficient size.
-   Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
--  which can @return an error code if required value is too large for current system (in 32-bits mode).
-+  which can return an error code if required value is too large for current system (in 32-bits mode).
-   In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
-   up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
-   which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
-@@ -2448,7 +2959,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
-   ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
-   ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
- 
-- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
-+  result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
-   It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
-   It can also be an error code, which can be tested with ZSTD_isError().
- 
-@@ -2471,27 +2982,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
- */
- 
- /*=====   Buffer-less streaming decompression functions  =====*/
--typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
--typedef struct {
--    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
--    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
--    unsigned blockSizeMax;
--    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
--    unsigned headerSize;
--    unsigned dictID;
--    unsigned checksumFlag;
--} ZSTD_frameHeader;
- 
--/*! ZSTD_getFrameHeader() :
-- *  decode Frame Header, or requires larger `srcSize`.
-- * @return : 0, `zfhPtr` is correctly filled,
-- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
-- *           or an error code, which can be tested using ZSTD_isError() */
--ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
--/*! ZSTD_getFrameHeader_advanced() :
-- *  same as ZSTD_getFrameHeader(),
-- *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
--ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
- ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
- 
- ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-@@ -2502,6 +2993,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
- ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
- /* misc */
-+ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.")
- ZSTDLIB_STATIC_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
- typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
- ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-@@ -2509,11 +3001,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
- 
- 
- 
--/* ============================ */
--/*       Block level API       */
--/* ============================ */
-+/* ========================================= */
-+/*       Block level API (DEPRECATED)       */
-+/* ========================================= */
- 
- /*!
-+
-+    This API is deprecated in favor of the regular compression API.
-+    You can get the frame header down to 2 bytes by setting:
-+      - ZSTD_c_format = ZSTD_f_zstd1_magicless
-+      - ZSTD_c_contentSizeFlag = 0
-+      - ZSTD_c_checksumFlag = 0
-+      - ZSTD_c_dictIDFlag = 0
-+
-+    This API is not as well tested as our normal API, so we recommend not using it.
-+    We will be removing it in a future version. If the normal API doesn't provide
-+    the functionality you need, please open a GitHub issue.
-+
-     Block functions produce and decode raw zstd blocks, without frame metadata.
-     Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
-     But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
-@@ -2524,7 +3028,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-     - It is necessary to init context before starting
-       + compression : any ZSTD_compressBegin*() variant, including with dictionary
-       + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
--      + copyCCtx() and copyDCtx() can be used too
-     - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
-       + If input is larger than a block size, it's necessary to split input data into multiple blocks
-       + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
-@@ -2541,11 +3044,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
- */
- 
- /*=====   Raw zstd block functions  =====*/
-+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
-+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
- ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
- 
--
- #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
- 
-diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
-index 20f08c644b71..464c410b2768 100644
---- a/lib/zstd/Makefile
-+++ b/lib/zstd/Makefile
-@@ -1,6 +1,6 @@
- # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- # ################################################################
--# Copyright (c) Facebook, Inc.
-+# Copyright (c) Meta Platforms, Inc. and affiliates.
- # All rights reserved.
- #
- # This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/common/allocations.h b/lib/zstd/common/allocations.h
-new file mode 100644
-index 000000000000..16c3d08e8d1a
---- /dev/null
-+++ b/lib/zstd/common/allocations.h
-@@ -0,0 +1,56 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under both the BSD-style license (found in the
-+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-+ * in the COPYING file in the root directory of this source tree).
-+ * You may select, at your option, one of the above-listed licenses.
-+ */
-+
-+/* This file provides custom allocation primitives
-+ */
-+
-+#define ZSTD_DEPS_NEED_MALLOC
-+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
-+
-+#include "compiler.h" /* MEM_STATIC */
-+#define ZSTD_STATIC_LINKING_ONLY
-+#include <linux/zstd.h> /* ZSTD_customMem */
-+
-+#ifndef ZSTD_ALLOCATIONS_H
-+#define ZSTD_ALLOCATIONS_H
-+
-+/* custom memory allocation functions */
-+
-+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
-+{
-+    if (customMem.customAlloc)
-+        return customMem.customAlloc(customMem.opaque, size);
-+    return ZSTD_malloc(size);
-+}
-+
-+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
-+{
-+    if (customMem.customAlloc) {
-+        /* calloc implemented as malloc+memset;
-+         * not as efficient as calloc, but next best guess for custom malloc */
-+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
-+        ZSTD_memset(ptr, 0, size);
-+        return ptr;
-+    }
-+    return ZSTD_calloc(1, size);
-+}
-+
-+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
-+{
-+    if (ptr!=NULL) {
-+        if (customMem.customFree)
-+            customMem.customFree(customMem.opaque, ptr);
-+        else
-+            ZSTD_free(ptr);
-+    }
-+}
-+
-+#endif /* ZSTD_ALLOCATIONS_H */
-diff --git a/lib/zstd/common/bits.h b/lib/zstd/common/bits.h
-new file mode 100644
-index 000000000000..aa3487ec4b6a
---- /dev/null
-+++ b/lib/zstd/common/bits.h
-@@ -0,0 +1,149 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under both the BSD-style license (found in the
-+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-+ * in the COPYING file in the root directory of this source tree).
-+ * You may select, at your option, one of the above-listed licenses.
-+ */
-+
-+#ifndef ZSTD_BITS_H
-+#define ZSTD_BITS_H
-+
-+#include "mem.h"
-+
-+MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
-+{
-+    assert(val != 0);
-+    {
-+        static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
-+                                                30, 22, 20, 15, 25, 17, 4, 8,
-+                                                31, 27, 13, 23, 21, 19, 16, 7,
-+                                                26, 12, 18, 6, 11, 5, 10, 9};
-+        return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
-+    }
-+}
-+
-+MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
-+{
-+    assert(val != 0);
-+#   if (__GNUC__ >= 4)
-+        return (unsigned)__builtin_ctz(val);
-+#   else
-+        return ZSTD_countTrailingZeros32_fallback(val);
-+#   endif
-+}
-+
-+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
-+    assert(val != 0);
-+    {
-+        static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
-+                                            11, 14, 16, 18, 22, 25, 3, 30,
-+                                            8, 12, 20, 28, 15, 17, 24, 7,
-+                                            19, 27, 23, 6, 26, 5, 4, 31};
-+        val |= val >> 1;
-+        val |= val >> 2;
-+        val |= val >> 4;
-+        val |= val >> 8;
-+        val |= val >> 16;
-+        return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
-+    }
-+}
-+
-+MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
-+{
-+    assert(val != 0);
-+#   if (__GNUC__ >= 4)
-+        return (unsigned)__builtin_clz(val);
-+#   else
-+        return ZSTD_countLeadingZeros32_fallback(val);
-+#   endif
-+}
-+
-+MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
-+{
-+    assert(val != 0);
-+#   if (__GNUC__ >= 4) && defined(__LP64__)
-+        return (unsigned)__builtin_ctzll(val);
-+#   else
-+        {
-+            U32 mostSignificantWord = (U32)(val >> 32);
-+            U32 leastSignificantWord = (U32)val;
-+            if (leastSignificantWord == 0) {
-+                return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
-+            } else {
-+                return ZSTD_countTrailingZeros32(leastSignificantWord);
-+            }
-+        }
-+#   endif
-+}
-+
-+MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
-+{
-+    assert(val != 0);
-+#   if (__GNUC__ >= 4)
-+        return (unsigned)(__builtin_clzll(val));
-+#   else
-+        {
-+            U32 mostSignificantWord = (U32)(val >> 32);
-+            U32 leastSignificantWord = (U32)val;
-+            if (mostSignificantWord == 0) {
-+                return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
-+            } else {
-+                return ZSTD_countLeadingZeros32(mostSignificantWord);
-+            }
-+        }
-+#   endif
-+}
-+
-+MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
-+{
-+    if (MEM_isLittleEndian()) {
-+        if (MEM_64bits()) {
-+            return ZSTD_countTrailingZeros64((U64)val) >> 3;
-+        } else {
-+            return ZSTD_countTrailingZeros32((U32)val) >> 3;
-+        }
-+    } else {  /* Big Endian CPU */
-+        if (MEM_64bits()) {
-+            return ZSTD_countLeadingZeros64((U64)val) >> 3;
-+        } else {
-+            return ZSTD_countLeadingZeros32((U32)val) >> 3;
-+        }
-+    }
-+}
-+
-+MEM_STATIC unsigned ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
-+{
-+    assert(val != 0);
-+    return 31 - ZSTD_countLeadingZeros32(val);
-+}
-+
-+/* ZSTD_rotateRight_*():
-+ * Rotates a bitfield to the right by "count" bits.
-+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
-+ */
-+MEM_STATIC
-+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
-+    assert(count < 64);
-+    count &= 0x3F; /* for fickle pattern recognition */
-+    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
-+}
-+
-+MEM_STATIC
-+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
-+    assert(count < 32);
-+    count &= 0x1F; /* for fickle pattern recognition */
-+    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
-+}
-+
-+MEM_STATIC
-+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
-+    assert(count < 16);
-+    count &= 0x0F; /* for fickle pattern recognition */
-+    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
-+}
-+
-+#endif /* ZSTD_BITS_H */
-diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
-index feef3a1b1d60..6a13f1f0f1e8 100644
---- a/lib/zstd/common/bitstream.h
-+++ b/lib/zstd/common/bitstream.h
-@@ -1,7 +1,8 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /* ******************************************************************
-  * bitstream
-  * Part of FSE library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  * You can contact the author at :
-  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -27,6 +28,7 @@
- #include "compiler.h"       /* UNLIKELY() */
- #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
- #include "error_private.h"  /* error codes and messages */
-+#include "bits.h"           /* ZSTD_highbit32 */
- 
- 
- /*=========================================
-@@ -79,19 +81,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
- /*-********************************************
- *  bitStream decoding API (read backward)
- **********************************************/
-+typedef size_t BitContainerType;
- typedef struct {
--    size_t   bitContainer;
-+    BitContainerType bitContainer;
-     unsigned bitsConsumed;
-     const char* ptr;
-     const char* start;
-     const char* limitPtr;
- } BIT_DStream_t;
- 
--typedef enum { BIT_DStream_unfinished = 0,
--               BIT_DStream_endOfBuffer = 1,
--               BIT_DStream_completed = 2,
--               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
--               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
-+typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
-+               BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
-+               BIT_DStream_completed = 2,   /* bitstream entirely consumed, bit-exact */
-+               BIT_DStream_overflow = 3     /* user requested more bits than present in bitstream */
-+    } BIT_DStream_status;  /* result of BIT_reloadDStream() */
- 
- MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
- MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
-@@ -101,7 +104,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
- 
- /* Start by invoking BIT_initDStream().
- *  A chunk of the bitStream is then stored into a local register.
--*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
- *  You can then retrieve bitFields stored into the local register, **in reverse order**.
- *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
- *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-@@ -122,33 +125,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
- /* faster, but works only if nbBits >= 1 */
- 
--
--
--/*-**************************************************************
--*  Internal functions
--****************************************************************/
--MEM_STATIC unsigned BIT_highbit32 (U32 val)
--{
--    assert(val != 0);
--    {
--#   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
--        return __builtin_clz (val) ^ 31;
--#   else   /* Software version */
--        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
--                                                 11, 14, 16, 18, 22, 25,  3, 30,
--                                                  8, 12, 20, 28, 15, 17, 24,  7,
--                                                 19, 27, 23,  6, 26,  5,  4, 31 };
--        U32 v = val;
--        v |= v >> 1;
--        v |= v >> 2;
--        v |= v >> 4;
--        v |= v >> 8;
--        v |= v >> 16;
--        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
--#   endif
--    }
--}
--
- /*=====    Local Constants   =====*/
- static const unsigned BIT_mask[] = {
-     0,          1,         3,         7,         0xF,       0x1F,
-@@ -178,6 +154,12 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
-     return 0;
- }
- 
-+FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
-+{
-+    assert(nbBits < BIT_MASK_SIZE);
-+    return bitContainer & BIT_mask[nbBits];
-+}
-+
- /*! BIT_addBits() :
-  *  can add up to 31 bits into `bitC`.
-  *  Note : does not check for register overflow ! */
-@@ -187,7 +169,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
-     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
-     assert(nbBits < BIT_MASK_SIZE);
-     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
--    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
-+    bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
-     bitC->bitPos += nbBits;
- }
- 
-@@ -266,35 +248,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
-         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-         bitD->bitContainer = MEM_readLEST(bitD->ptr);
-         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
--          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
-+          bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
-           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
-     } else {
-         bitD->ptr   = bitD->start;
-         bitD->bitContainer = *(const BYTE*)(bitD->start);
-         switch(srcSize)
-         {
--        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-+        case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-                 ZSTD_FALLTHROUGH;
- 
--        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-+        case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-                 ZSTD_FALLTHROUGH;
- 
--        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-+        case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-                 ZSTD_FALLTHROUGH;
- 
--        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-+        case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
-                 ZSTD_FALLTHROUGH;
- 
--        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-+        case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
-                 ZSTD_FALLTHROUGH;
- 
--        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-+        case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) <<  8;
-                 ZSTD_FALLTHROUGH;
- 
-         default: break;
-         }
-         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
--            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-+            bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
-             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
-         }
-         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
-@@ -303,12 +285,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
-     return srcSize;
- }
- 
--MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
-+FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
- {
-     return bitContainer >> start;
- }
- 
--MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
-+FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
- {
-     U32 const regMask = sizeof(bitContainer)*8 - 1;
-     /* if start > regMask, bitstream is corrupted, and result is undefined */
-@@ -325,19 +307,13 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
- #endif
- }
- 
--MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
--{
--    assert(nbBits < BIT_MASK_SIZE);
--    return bitContainer & BIT_mask[nbBits];
--}
--
- /*! BIT_lookBits() :
-  *  Provides next n bits from local register.
-  *  local register is not modified.
-  *  On 32-bits, maxNbBits==24.
-  *  On 64-bits, maxNbBits==56.
-  * @return : value extracted */
--MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
-+FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
- {
-     /* arbitrate between double-shift and shift+mask */
- #if 1
-@@ -360,7 +336,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
-     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
- }
- 
--MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
-+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
- {
-     bitD->bitsConsumed += nbBits;
- }
-@@ -369,7 +345,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
-  *  Read (consume) next n bits from local register and update.
-  *  Pay attention to not read more than nbBits contained into local register.
-  * @return : extracted value. */
--MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
-+FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
- {
-     size_t const value = BIT_lookBits(bitD, nbBits);
-     BIT_skipBits(bitD, nbBits);
-@@ -377,7 +353,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
- }
- 
- /*! BIT_readBitsFast() :
-- *  unsafe version; only works only if nbBits >= 1 */
-+ *  unsafe version; only works if nbBits >= 1 */
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
- {
-     size_t const value = BIT_lookBitsFast(bitD, nbBits);
-@@ -386,6 +362,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
-     return value;
- }
- 
-+/*! BIT_reloadDStream_internal() :
-+ *  Simple variant of BIT_reloadDStream(), with two conditions:
-+ *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
-+ *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
-+ */
-+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
-+{
-+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
-+    bitD->ptr -= bitD->bitsConsumed >> 3;
-+    assert(bitD->ptr >= bitD->start);
-+    bitD->bitsConsumed &= 7;
-+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
-+    return BIT_DStream_unfinished;
-+}
-+
- /*! BIT_reloadDStreamFast() :
-  *  Similar to BIT_reloadDStream(), but with two differences:
-  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
-@@ -396,31 +387,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
- {
-     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
-         return BIT_DStream_overflow;
--    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
--    bitD->ptr -= bitD->bitsConsumed >> 3;
--    bitD->bitsConsumed &= 7;
--    bitD->bitContainer = MEM_readLEST(bitD->ptr);
--    return BIT_DStream_unfinished;
-+    return BIT_reloadDStream_internal(bitD);
- }
- 
- /*! BIT_reloadDStream() :
-  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
-- *  This function is safe, it guarantees it will not read beyond src buffer.
-+ *  This function is safe, it guarantees it will not never beyond src buffer.
-  * @return : status of `BIT_DStream_t` internal register.
-  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
--MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
-+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
- {
--    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
-+    /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
-+    if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
-+        static const BitContainerType zeroFilled = 0;
-+        bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
-+        /* overflow detected, erroneous scenario or end of stream: no update */
-         return BIT_DStream_overflow;
-+    }
-+
-+    assert(bitD->ptr >= bitD->start);
- 
-     if (bitD->ptr >= bitD->limitPtr) {
--        return BIT_reloadDStreamFast(bitD);
-+        return BIT_reloadDStream_internal(bitD);
-     }
-     if (bitD->ptr == bitD->start) {
-+        /* reached end of bitStream => no update */
-         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
-         return BIT_DStream_completed;
-     }
--    /* start < ptr < limitPtr */
-+    /* start < ptr < limitPtr => cautious update */
-     {   U32 nbBytes = bitD->bitsConsumed >> 3;
-         BIT_DStream_status result = BIT_DStream_unfinished;
-         if (bitD->ptr - nbBytes < bitD->start) {
-diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
-index c42d39faf9bd..508ee25537bb 100644
---- a/lib/zstd/common/compiler.h
-+++ b/lib/zstd/common/compiler.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -11,6 +12,8 @@
- #ifndef ZSTD_COMPILER_H
- #define ZSTD_COMPILER_H
- 
-+#include <linux/types.h>
-+
- #include "portability_macros.h"
- 
- /*-*******************************************************
-@@ -41,12 +44,15 @@
- */
- #define WIN_CDECL
- 
-+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
-+#define UNUSED_ATTR __attribute__((unused))
-+
- /*
-  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
-  * parameters. They must be inlined for the compiler to eliminate the constant
-  * branches.
-  */
--#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
-+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
- /*
-  * HINT_INLINE is used to help the compiler generate better code. It is *not*
-  * used for "templates", so it can be tweaked based on the compilers
-@@ -61,11 +67,21 @@
- #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
- #  define HINT_INLINE static INLINE_KEYWORD
- #else
--#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
-+#  define HINT_INLINE FORCE_INLINE_TEMPLATE
- #endif
- 
--/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
--#define UNUSED_ATTR __attribute__((unused))
-+/* "soft" inline :
-+ * The compiler is free to select if it's a good idea to inline or not.
-+ * The main objective is to silence compiler warnings
-+ * when a defined function in included but not used.
-+ *
-+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
-+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
-+ * since this name is used everywhere.
-+ */
-+#ifndef MEM_STATIC  /* already defined in Linux Kernel mem.h */
-+#define MEM_STATIC static __inline UNUSED_ATTR
-+#endif
- 
- /* force no inlining */
- #define FORCE_NOINLINE static __attribute__((__noinline__))
-@@ -86,23 +102,24 @@
- #  define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
- #  define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
- #elif defined(__aarch64__)
--#  define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
--#  define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
-+#  define PREFETCH_L1(ptr)  do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
-+#  define PREFETCH_L2(ptr)  do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
- #else
--#  define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
--#  define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
-+#  define PREFETCH_L1(ptr) do { (void)(ptr); } while (0)  /* disabled */
-+#  define PREFETCH_L2(ptr) do { (void)(ptr); } while (0)  /* disabled */
- #endif  /* NO_PREFETCH */
- 
- #define CACHELINE_SIZE 64
- 
--#define PREFETCH_AREA(p, s)  {            \
--    const char* const _ptr = (const char*)(p);  \
--    size_t const _size = (size_t)(s);     \
--    size_t _pos;                          \
--    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
--        PREFETCH_L2(_ptr + _pos);         \
--    }                                     \
--}
-+#define PREFETCH_AREA(p, s)                              \
-+    do {                                                 \
-+        const char* const _ptr = (const char*)(p);       \
-+        size_t const _size = (size_t)(s);                \
-+        size_t _pos;                                     \
-+        for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
-+            PREFETCH_L2(_ptr + _pos);                    \
-+        }                                                \
-+    } while (0)
- 
- /* vectorization
-  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
-@@ -126,9 +143,9 @@
- #define UNLIKELY(x) (__builtin_expect((x), 0))
- 
- #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
--#  define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
-+#  define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
- #else
--#  define ZSTD_UNREACHABLE { assert(0); }
-+#  define ZSTD_UNREACHABLE do { assert(0); } while (0)
- #endif
- 
- /* disable warnings */
-@@ -179,6 +196,85 @@
- *  Sanitizer
- *****************************************************************/
- 
-+/*
-+ * Zstd relies on pointer overflow in its decompressor.
-+ * We add this attribute to functions that rely on pointer overflow.
-+ */
-+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+#  if __has_attribute(no_sanitize)
-+#    if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
-+       /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
-+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
-+#    else
-+       /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
-+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
-+#    endif
-+#  else
-+#    define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+#  endif
-+#endif
-+
-+/*
-+ * Helper function to perform a wrapped pointer difference without trigging
-+ * UBSAN.
-+ *
-+ * @returns lhs - rhs with wrapping
-+ */
-+MEM_STATIC
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
-+{
-+    return lhs - rhs;
-+}
-+
-+/*
-+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
-+ *
-+ * @return ptr + add with wrapping
-+ */
-+MEM_STATIC
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
-+{
-+    return ptr + add;
-+}
-+
-+/*
-+ * Helper function to perform a wrapped pointer subtraction without triggering
-+ * UBSAN.
-+ *
-+ * @return ptr - sub with wrapping
-+ */
-+MEM_STATIC
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
-+{
-+    return ptr - sub;
-+}
-+
-+/*
-+ * Helper function to add to a pointer that works around C's undefined behavior
-+ * of adding 0 to NULL.
-+ *
-+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
-+ */
-+MEM_STATIC
-+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
-+{
-+    return add > 0 ? ptr + add : ptr;
-+}
-+
-+/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
-+ * abundance of caution, disable our custom poisoning on mingw. */
-+#ifdef __MINGW32__
-+#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
-+#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
-+#endif
-+#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
-+#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
-+#endif
-+#endif
-+
- 
- 
- #endif /* ZSTD_COMPILER_H */
-diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
-index 0db7b42407ee..d8319a2bef4c 100644
---- a/lib/zstd/common/cpu.h
-+++ b/lib/zstd/common/cpu.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
-index bb863c9ea616..8eb6aa9a3b20 100644
---- a/lib/zstd/common/debug.c
-+++ b/lib/zstd/common/debug.c
-@@ -1,7 +1,8 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * debug
-  * Part of FSE library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  * You can contact the author at :
-  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -21,4 +22,10 @@
- 
- #include "debug.h"
- 
-+#if (DEBUGLEVEL>=2)
-+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
-+ * translation unit is empty. So remove this from Linux kernel builds, but
-+ * otherwise just leave it in.
-+ */
- int g_debuglevel = DEBUGLEVEL;
-+#endif
-diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
-index 6dd88d1fbd02..226ba3c57ec3 100644
---- a/lib/zstd/common/debug.h
-+++ b/lib/zstd/common/debug.h
-@@ -1,7 +1,8 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /* ******************************************************************
-  * debug
-  * Part of FSE library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  * You can contact the author at :
-  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -82,18 +83,27 @@ extern int g_debuglevel; /* the variable is only declared,
-                             It's useful when enabling very verbose levels
-                             on selective conditions (such as position in src) */
- 
--#  define RAWLOG(l, ...) {                                       \
--                if (l<=g_debuglevel) {                           \
--                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
--            }   }
--#  define DEBUGLOG(l, ...) {                                     \
--                if (l<=g_debuglevel) {                           \
--                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
--                    ZSTD_DEBUG_PRINT(" \n");                     \
--            }   }
-+#  define RAWLOG(l, ...)                   \
-+    do {                                   \
-+        if (l<=g_debuglevel) {             \
-+            ZSTD_DEBUG_PRINT(__VA_ARGS__); \
-+        }                                  \
-+    } while (0)
-+
-+#define STRINGIFY(x) #x
-+#define TOSTRING(x) STRINGIFY(x)
-+#define LINE_AS_STRING TOSTRING(__LINE__)
-+
-+#  define DEBUGLOG(l, ...)                               \
-+    do {                                                 \
-+        if (l<=g_debuglevel) {                           \
-+            ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
-+            ZSTD_DEBUG_PRINT(" \n");                     \
-+        }                                                \
-+    } while (0)
- #else
--#  define RAWLOG(l, ...)      {}    /* disabled */
--#  define DEBUGLOG(l, ...)    {}    /* disabled */
-+#  define RAWLOG(l, ...)   do { } while (0)    /* disabled */
-+#  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
- #endif
- 
- 
-diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
-index fef67056f052..6cdd82233fb5 100644
---- a/lib/zstd/common/entropy_common.c
-+++ b/lib/zstd/common/entropy_common.c
-@@ -1,6 +1,7 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * Common functions of New Generation Entropy library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -19,8 +20,8 @@
- #include "error_private.h"       /* ERR_*, ERROR */
- #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
- #include "fse.h"
--#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
- #include "huf.h"
-+#include "bits.h"                /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
- 
- 
- /*===   Version   ===*/
-@@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
- /*-**************************************************************
- *  FSE NCount encoding-decoding
- ****************************************************************/
--static U32 FSE_ctz(U32 val)
--{
--    assert(val != 0);
--    {
--#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
--        return __builtin_ctz(val);
--#   else   /* Software version */
--        U32 count = 0;
--        while ((val & 1) == 0) {
--            val >>= 1;
--            ++count;
--        }
--        return count;
--#   endif
--    }
--}
--
- FORCE_INLINE_TEMPLATE
- size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-                            const void* headerBuffer, size_t hbSize)
-@@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
-              * repeat.
-              * Avoid UB by setting the high bit to 1.
-              */
--            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
-+            int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
-             while (repeats >= 12) {
-                 charnum += 3 * 12;
-                 if (LIKELY(ip <= iend-7)) {
-@@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
-                     ip = iend - 4;
-                 }
-                 bitStream = MEM_readLE32(ip) >> bitCount;
--                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
-+                repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
-             }
-             charnum += 3 * repeats;
-             bitStream >>= 2 * repeats;
-@@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
-                  * know that threshold > 1.
-                  */
-                 if (remaining <= 1) break;
--                nbBits = BIT_highbit32(remaining) + 1;
-+                nbBits = ZSTD_highbit32(remaining) + 1;
-                 threshold = 1 << (nbBits - 1);
-             }
-             if (charnum >= maxSV1) break;
-@@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                      const void* src, size_t srcSize)
- {
-     U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
--    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
-+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
- }
- 
- FORCE_INLINE_TEMPLATE size_t
-@@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-     if (weightTotal == 0) return ERROR(corruption_detected);
- 
-     /* get last non-null symbol weight (implied, total must be 2^n) */
--    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-+    {   U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
-         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
-         *tableLogPtr = tableLog;
-         /* determine last weight */
-         {   U32 const total = 1 << tableLog;
-             U32 const rest = total - weightTotal;
--            U32 const verif = 1 << BIT_highbit32(rest);
--            U32 const lastWeight = BIT_highbit32(rest) + 1;
-+            U32 const verif = 1 << ZSTD_highbit32(rest);
-+            U32 const lastWeight = ZSTD_highbit32(rest) + 1;
-             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
-             huffWeight[oSize] = (BYTE)lastWeight;
-             rankStats[lastWeight]++;
-@@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                      U32* nbSymbolsPtr, U32* tableLogPtr,
-                      const void* src, size_t srcSize,
-                      void* workSpace, size_t wkspSize,
--                     int bmi2)
-+                     int flags)
- {
- #if DYNAMIC_BMI2
--    if (bmi2) {
-+    if (flags & HUF_flags_bmi2) {
-         return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
-     }
- #endif
--    (void)bmi2;
-+    (void)flags;
-     return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
- }
-diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
-index 6d1135f8c373..a4062d30d170 100644
---- a/lib/zstd/common/error_private.c
-+++ b/lib/zstd/common/error_private.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum code)
-     case PREFIX(version_unsupported): return "Version not supported";
-     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-     case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
--    case PREFIX(corruption_detected): return "Corrupted block detected";
-+    case PREFIX(corruption_detected): return "Data corruption detected";
-     case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
-+    case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
-     case PREFIX(parameter_unsupported): return "Unsupported parameter";
-+    case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
-     case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
-     case PREFIX(init_missing): return "Context should be init first";
-     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
-@@ -38,17 +41,22 @@ const char* ERR_getErrorString(ERR_enum code)
-     case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
-     case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
-     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
-+    case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
-     case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
-     case PREFIX(dictionary_wrong): return "Dictionary mismatch";
-     case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
-     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-     case PREFIX(srcSize_wrong): return "Src size is incorrect";
-     case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
-+    case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
-+    case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
-         /* following error codes are not stable and may be removed or changed in a future version */
-     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
-     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
-     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
-     case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
-+    case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
-+    case PREFIX(externalSequences_invalid): return "External sequences are not valid";
-     case PREFIX(maxCode):
-     default: return notErrorCode;
-     }
-diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
-index ca5101e542fa..0410ca415b54 100644
---- a/lib/zstd/common/error_private.h
-+++ b/lib/zstd/common/error_private.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -49,8 +50,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
- ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
- 
- /* check and forward error code */
--#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
--#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
-+#define CHECK_V_F(e, f)     \
-+    size_t const e = f;     \
-+    do {                    \
-+        if (ERR_isError(e)) \
-+            return e;       \
-+    } while (0)
-+#define CHECK_F(f)   do { CHECK_V_F(_var_err__, f); } while (0)
- 
- 
- /*-****************************************
-@@ -84,10 +90,12 @@ void _force_has_format_string(const char *format, ...) {
-  * We want to force this function invocation to be syntactically correct, but
-  * we don't want to force runtime evaluation of its arguments.
-  */
--#define _FORCE_HAS_FORMAT_STRING(...) \
--  if (0) { \
--    _force_has_format_string(__VA_ARGS__); \
--  }
-+#define _FORCE_HAS_FORMAT_STRING(...)              \
-+    do {                                           \
-+        if (0) {                                   \
-+            _force_has_format_string(__VA_ARGS__); \
-+        }                                          \
-+    } while (0)
- 
- #define ERR_QUOTE(str) #str
- 
-@@ -98,48 +106,50 @@ void _force_has_format_string(const char *format, ...) {
-  * In order to do that (particularly, printing the conditional that failed),
-  * this can't just wrap RETURN_ERROR().
-  */
--#define RETURN_ERROR_IF(cond, err, ...) \
--  if (cond) { \
--    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
--           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
--    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
--    RAWLOG(3, ": " __VA_ARGS__); \
--    RAWLOG(3, "\n"); \
--    return ERROR(err); \
--  }
-+#define RETURN_ERROR_IF(cond, err, ...)                                        \
-+    do {                                                                       \
-+        if (cond) {                                                            \
-+            RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s",          \
-+                  __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
-+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                             \
-+            RAWLOG(3, ": " __VA_ARGS__);                                       \
-+            RAWLOG(3, "\n");                                                   \
-+            return ERROR(err);                                                 \
-+        }                                                                      \
-+    } while (0)
- 
- /*
-  * Unconditionally return the specified error.
-  *
-  * In debug modes, prints additional information.
-  */
--#define RETURN_ERROR(err, ...) \
--  do { \
--    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
--           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
--    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
--    RAWLOG(3, ": " __VA_ARGS__); \
--    RAWLOG(3, "\n"); \
--    return ERROR(err); \
--  } while(0);
-+#define RETURN_ERROR(err, ...)                                               \
-+    do {                                                                     \
-+        RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
-+              __FILE__, __LINE__, ERR_QUOTE(ERROR(err)));                    \
-+        _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                               \
-+        RAWLOG(3, ": " __VA_ARGS__);                                         \
-+        RAWLOG(3, "\n");                                                     \
-+        return ERROR(err);                                                   \
-+    } while(0)
- 
- /*
-  * If the provided expression evaluates to an error code, returns that error code.
-  *
-  * In debug modes, prints additional information.
-  */
--#define FORWARD_IF_ERROR(err, ...) \
--  do { \
--    size_t const err_code = (err); \
--    if (ERR_isError(err_code)) { \
--      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
--             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
--      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
--      RAWLOG(3, ": " __VA_ARGS__); \
--      RAWLOG(3, "\n"); \
--      return err_code; \
--    } \
--  } while(0);
-+#define FORWARD_IF_ERROR(err, ...)                                                 \
-+    do {                                                                           \
-+        size_t const err_code = (err);                                             \
-+        if (ERR_isError(err_code)) {                                               \
-+            RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s",                 \
-+                  __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
-+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                                 \
-+            RAWLOG(3, ": " __VA_ARGS__);                                           \
-+            RAWLOG(3, "\n");                                                       \
-+            return err_code;                                                       \
-+        }                                                                          \
-+    } while(0)
- 
- 
- #endif /* ERROR_H_MODULE */
-diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
-index 4507043b2287..2185a578617d 100644
---- a/lib/zstd/common/fse.h
-+++ b/lib/zstd/common/fse.h
-@@ -1,7 +1,8 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /* ******************************************************************
-  * FSE : Finite State Entropy codec
-  * Public Prototypes declaration
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  * You can contact the author at :
-  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -50,34 +51,6 @@
- FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /*< library version number; to be used when checking dll version */
- 
- 
--/*-****************************************
--*  FSE simple functions
--******************************************/
--/*! FSE_compress() :
--    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
--    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
--    @return : size of compressed data (<= dstCapacity).
--    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
--                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
--                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
--*/
--FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
--                             const void* src, size_t srcSize);
--
--/*! FSE_decompress():
--    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
--    into already allocated destination buffer 'dst', of size 'dstCapacity'.
--    @return : size of regenerated data (<= maxDstSize),
--              or an error code, which can be tested using FSE_isError() .
--
--    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
--    Why ? : making this distinction requires a header.
--    Header management is intentionally delegated to the user layer, which can better manage special cases.
--*/
--FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
--                               const void* cSrc, size_t cSrcSize);
--
--
- /*-*****************************************
- *  Tool functions
- ******************************************/
-@@ -88,20 +61,6 @@ FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return
- FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
- 
- 
--/*-*****************************************
--*  FSE advanced functions
--******************************************/
--/*! FSE_compress2() :
--    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
--    Both parameters can be defined as '0' to mean : use default value
--    @return : size of compressed data
--    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
--                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
--                     if FSE_isError(return), it's an error code.
--*/
--FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
--
--
- /*-*****************************************
- *  FSE detailed API
- ******************************************/
-@@ -161,8 +120,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
- /*! Constructor and Destructor of FSE_CTable.
-     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
- typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
--FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
--FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
- 
- /*! FSE_buildCTable():
-     Builds `ct`, which must be already allocated, using FSE_createCTable().
-@@ -238,23 +195,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
-                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
-                            const void* rBuffer, size_t rBuffSize, int bmi2);
- 
--/*! Constructor and Destructor of FSE_DTable.
--    Note that its size depends on 'tableLog' */
- typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
--FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
--FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
--
--/*! FSE_buildDTable():
--    Builds 'dt', which must be already allocated, using FSE_createDTable().
--    return : 0, or an errorCode, which can be tested using FSE_isError() */
--FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
--
--/*! FSE_decompress_usingDTable():
--    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
--    into `dst` which must be already allocated.
--    @return : size of regenerated data (necessarily <= `dstCapacity`),
--              or an errorCode, which can be tested using FSE_isError() */
--FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
- 
- /*!
- Tutorial :
-@@ -286,6 +227,7 @@ If there is an error, the function will return an error code, which can be teste
- 
- #endif  /* FSE_H */
- 
-+
- #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
- #define FSE_H_FSE_STATIC_LINKING_ONLY
- 
-@@ -317,16 +259,6 @@ If there is an error, the function will return an error code, which can be teste
- unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
- /*< same as FSE_optimalTableLog(), which used `minus==2` */
- 
--/* FSE_compress_wksp() :
-- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
-- * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
-- */
--#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
--size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
--
--size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
--/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
--
- size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
- /*< build a fake FSE_CTable, designed to compress always the same symbolValue */
- 
-@@ -344,19 +276,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
- FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
- /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
- 
--size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
--/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
--
--size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
--/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
--
--#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
-+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
- #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
--size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
--/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
--
- size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
--/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
-+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
-+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
- 
- typedef enum {
-    FSE_repeat_none,  /*< Cannot use the previous table */
-@@ -539,20 +463,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
-     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
-     const U16* const stateTable = (const U16*)(statePtr->stateTable);
-     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
--    BIT_addBits(bitC, statePtr->value, nbBitsOut);
-+    BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
-     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
- }
- 
- MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
- {
--    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
-+    BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
-     BIT_flushBits(bitC);
- }
- 
- 
- /* FSE_getMaxNbBits() :
-  * Approximate maximum cost of a symbol, in bits.
-- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
-+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
-  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
-  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
- MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
-diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
-index 8dcb8ca39767..3a17e84f27bf 100644
---- a/lib/zstd/common/fse_decompress.c
-+++ b/lib/zstd/common/fse_decompress.c
-@@ -1,6 +1,7 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * FSE : Finite State Entropy decoder
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -22,8 +23,8 @@
- #define FSE_STATIC_LINKING_ONLY
- #include "fse.h"
- #include "error_private.h"
--#define ZSTD_DEPS_NEED_MALLOC
--#include "zstd_deps.h"
-+#include "zstd_deps.h"  /* ZSTD_memcpy */
-+#include "bits.h"       /* ZSTD_highbit32 */
- 
- 
- /* **************************************************************
-@@ -55,19 +56,6 @@
- #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
- #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
- 
--
--/* Function templates */
--FSE_DTable* FSE_createDTable (unsigned tableLog)
--{
--    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
--    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
--}
--
--void FSE_freeDTable (FSE_DTable* dt)
--{
--    ZSTD_free(dt);
--}
--
- static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
- {
-     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
-@@ -96,7 +84,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
-                     symbolNext[s] = 1;
-                 } else {
-                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
--                    symbolNext[s] = normalizedCounter[s];
-+                    symbolNext[s] = (U16)normalizedCounter[s];
-         }   }   }
-         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
-     }
-@@ -111,8 +99,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
-          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
-          * our buffer to handle the over-write.
-          */
--        {
--            U64 const add = 0x0101010101010101ull;
-+        {   U64 const add = 0x0101010101010101ull;
-             size_t pos = 0;
-             U64 sv = 0;
-             U32 s;
-@@ -123,14 +110,13 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
-                 for (i = 8; i < n; i += 8) {
-                     MEM_write64(spread + pos + i, sv);
-                 }
--                pos += n;
--            }
--        }
-+                pos += (size_t)n;
-+        }   }
-         /* Now we spread those positions across the table.
--         * The benefit of doing it in two stages is that we avoid the the
-+         * The benefit of doing it in two stages is that we avoid the
-          * variable size inner loop, which caused lots of branch misses.
-          * Now we can run through all the positions without any branch misses.
--         * We unroll the loop twice, since that is what emperically worked best.
-+         * We unroll the loop twice, since that is what empirically worked best.
-          */
-         {
-             size_t position = 0;
-@@ -166,7 +152,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
-         for (u=0; u<tableSize; u++) {
-             FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-             U32 const nextState = symbolNext[symbol]++;
--            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
-+            tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
-             tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
-     }   }
- 
-@@ -184,49 +170,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
- /*-*******************************************************
- *  Decompression (Byte symbols)
- *********************************************************/
--size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
--{
--    void* ptr = dt;
--    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
--    void* dPtr = dt + 1;
--    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
--
--    DTableH->tableLog = 0;
--    DTableH->fastMode = 0;
--
--    cell->newState = 0;
--    cell->symbol = symbolValue;
--    cell->nbBits = 0;
--
--    return 0;
--}
--
--
--size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
--{
--    void* ptr = dt;
--    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
--    void* dPtr = dt + 1;
--    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
--    const unsigned tableSize = 1 << nbBits;
--    const unsigned tableMask = tableSize - 1;
--    const unsigned maxSV1 = tableMask+1;
--    unsigned s;
--
--    /* Sanity checks */
--    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
--
--    /* Build Decoding Table */
--    DTableH->tableLog = (U16)nbBits;
--    DTableH->fastMode = 1;
--    for (s=0; s<maxSV1; s++) {
--        dinfo[s].newState = 0;
--        dinfo[s].symbol = (BYTE)s;
--        dinfo[s].nbBits = (BYTE)nbBits;
--    }
--
--    return 0;
--}
- 
- FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
-           void* dst, size_t maxDstSize,
-@@ -287,32 +230,12 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
-             break;
-     }   }
- 
--    return op-ostart;
--}
--
--
--size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
--                            const void* cSrc, size_t cSrcSize,
--                            const FSE_DTable* dt)
--{
--    const void* ptr = dt;
--    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
--    const U32 fastMode = DTableH->fastMode;
--
--    /* select fast mode (static) */
--    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
--    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
--}
--
--
--size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
--{
--    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
-+    assert(op >= ostart);
-+    return (size_t)(op-ostart);
- }
- 
- typedef struct {
-     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
--    FSE_DTable dtable[]; /* Dynamically sized */
- } FSE_DecompressWksp;
- 
- 
-@@ -327,13 +250,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
-     unsigned tableLog;
-     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
-+    size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
-+    FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
- 
--    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
-+    FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
-     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
- 
-+    /* correct offset to dtable depends on this property */
-+    FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
-+
-     /* normal FSE decoding mode */
--    {
--        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-+    {   size_t const NCountLength =
-+            FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-         if (FSE_isError(NCountLength)) return NCountLength;
-         if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-         assert(NCountLength <= cSrcSize);
-@@ -342,19 +270,20 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
-     }
- 
-     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
--    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
-+    assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
-+    workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
-     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
- 
--    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
-+    CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
- 
-     {
--        const void* ptr = wksp->dtable;
-+        const void* ptr = dtable;
-         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
-         const U32 fastMode = DTableH->fastMode;
- 
-         /* select fast mode (static) */
--        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
--        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
-+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
-+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
-     }
- }
- 
-@@ -382,9 +311,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
-     return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
- }
- 
--
--typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
--
--
--
- #endif   /* FSE_COMMONDEFS_ONLY */
-diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
-index 5042ff870308..57462466e188 100644
---- a/lib/zstd/common/huf.h
-+++ b/lib/zstd/common/huf.h
-@@ -1,7 +1,8 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /* ******************************************************************
-  * huff0 huffman codec,
-  * part of Finite State Entropy library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  * You can contact the author at :
-  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -18,99 +19,22 @@
- 
- /* *** Dependencies *** */
- #include "zstd_deps.h"    /* size_t */
--
--
--/* *** library symbols visibility *** */
--/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
-- *        HUF symbols remain "private" (internal symbols for library only).
-- *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
--#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
--#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
--#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
--#  define HUF_PUBLIC_API __declspec(dllexport)
--#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
--#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
--#else
--#  define HUF_PUBLIC_API
--#endif
--
--
--/* ========================== */
--/* ***  simple functions  *** */
--/* ========================== */
--
--/* HUF_compress() :
-- *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
-- * 'dst' buffer must be already allocated.
-- *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
-- * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
-- * @return : size of compressed data (<= `dstCapacity`).
-- *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
-- *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
-- */
--HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
--                             const void* src, size_t srcSize);
--
--/* HUF_decompress() :
-- *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
-- *  into already allocated buffer 'dst', of minimum size 'dstSize'.
-- * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
-- *  Note : in contrast with FSE, HUF_decompress can regenerate
-- *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
-- *         because it knows size to regenerate (originalSize).
-- * @return : size of regenerated data (== originalSize),
-- *           or an error code, which can be tested using HUF_isError()
-- */
--HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
--                               const void* cSrc, size_t cSrcSize);
-+#include "mem.h"          /* U32 */
-+#define FSE_STATIC_LINKING_ONLY
-+#include "fse.h"
- 
- 
- /* ***   Tool functions *** */
--#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /*< maximum input size for a single block compressed with HUF_compress */
--HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
-+#define HUF_BLOCKSIZE_MAX (128 * 1024)   /*< maximum input size for a single block compressed with HUF_compress */
-+size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
- 
- /* Error Management */
--HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
--HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
--
-+unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
-+const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
- 
--/* ***   Advanced function   *** */
- 
--/* HUF_compress2() :
-- *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
-- * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
-- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
--HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
--                               const void* src, size_t srcSize,
--                               unsigned maxSymbolValue, unsigned tableLog);
--
--/* HUF_compress4X_wksp() :
-- *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
-- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
- #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
- #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
--HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
--                                     const void* src, size_t srcSize,
--                                     unsigned maxSymbolValue, unsigned tableLog,
--                                     void* workSpace, size_t wkspSize);
--
--#endif   /* HUF_H_298734234 */
--
--/* ******************************************************************
-- *  WARNING !!
-- *  The following section contains advanced and experimental definitions
-- *  which shall never be used in the context of a dynamic library,
-- *  because they are not guaranteed to remain stable in the future.
-- *  Only consider them in association with static linking.
-- * *****************************************************************/
--#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
--#define HUF_H_HUF_STATIC_LINKING_ONLY
--
--/* *** Dependencies *** */
--#include "mem.h"   /* U32 */
--#define FSE_STATIC_LINKING_ONLY
--#include "fse.h"
--
- 
- /* *** Constants *** */
- #define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
-@@ -151,25 +75,49 @@ typedef U32 HUF_DTable;
- /* ****************************************
- *  Advanced decompression functions
- ******************************************/
--size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
--#endif
- 
--size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< decodes RLE and uncompressed */
--size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
--size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
--size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
--size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
--size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
--#endif
-+/*
-+ * Huffman flags bitset.
-+ * For all flags, 0 is the default value.
-+ */
-+typedef enum {
-+    /*
-+     * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
-+     * Otherwise: Ignored.
-+     */
-+    HUF_flags_bmi2 = (1 << 0),
-+    /*
-+     * If set: Test possible table depths to find the one that produces the smallest header + encoded size.
-+     * If unset: Use heuristic to find the table depth.
-+     */
-+    HUF_flags_optimalDepth = (1 << 1),
-+    /*
-+     * If set: If the previous table can encode the input, always reuse the previous table.
-+     * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
-+     */
-+    HUF_flags_preferRepeat = (1 << 2),
-+    /*
-+     * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
-+     * If unset: Always histogram the entire input.
-+     */
-+    HUF_flags_suspectUncompressible = (1 << 3),
-+    /*
-+     * If set: Don't use assembly implementations
-+     * If unset: Allow using assembly implementations
-+     */
-+    HUF_flags_disableAsm = (1 << 4),
-+    /*
-+     * If set: Don't use the fast decoding loop, always use the fallback decoding loop.
-+     * If unset: Use the fast decoding loop when possible.
-+     */
-+    HUF_flags_disableFast = (1 << 5)
-+} HUF_flags_e;
- 
- 
- /* ****************************************
-  *  HUF detailed API
-  * ****************************************/
-+#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
- 
- /*! HUF_compress() does the following:
-  *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
-@@ -182,12 +130,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-  *  For example, it's possible to compress several blocks using the same 'CTable',
-  *  or to save and regenerate 'CTable' using external methods.
-  */
--unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
--size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
--size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
-+unsigned HUF_minTableLog(unsigned symbolCardinality);
-+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
-+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
-+ size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
- size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
--size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
--size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
-+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
- size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
- int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
- 
-@@ -196,6 +144,7 @@ typedef enum {
-    HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
-    HUF_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
-  } HUF_repeat;
-+
- /* HUF_compress4X_repeat() :
-  *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-  *  If it uses hufTable it does not modify hufTable or repeat.
-@@ -206,13 +155,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
-                        const void* src, size_t srcSize,
-                        unsigned maxSymbolValue, unsigned tableLog,
-                        void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
--                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
-+                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
- 
- /* HUF_buildCTable_wksp() :
-  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
-  * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
-  */
--#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
-+#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
- #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
- size_t HUF_buildCTable_wksp (HUF_CElt* tree,
-                        const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
-@@ -238,7 +187,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
-                           U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
-                           const void* src, size_t srcSize,
-                           void* workspace, size_t wkspSize,
--                          int bmi2);
-+                          int flags);
- 
- /* HUF_readCTable() :
-  *  Loading a CTable saved with HUF_writeCTable() */
-@@ -246,9 +195,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
- 
- /* HUF_getNbBitsFromCTable() :
-  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
-- *  Note 1 : is not inlined, as HUF_CElt definition is private */
-+ *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
-+ *  Note 2 : is not inlined, as HUF_CElt definition is private
-+ */
- U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
- 
-+typedef struct {
-+    BYTE tableLog;
-+    BYTE maxSymbolValue;
-+    BYTE unused[sizeof(size_t) - 2];
-+} HUF_CTableHeader;
-+
-+/* HUF_readCTableHeader() :
-+ *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
-+ */
-+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
-+
- /*
-  * HUF_decompress() does the following:
-  * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
-@@ -276,32 +238,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
- #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
- #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
- 
--#ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
--size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
--#endif
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
--size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
--#endif
--
--size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
--#ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
--#endif
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
--#endif
--
- 
- /* ====================== */
- /* single stream variants */
- /* ====================== */
- 
--size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
--size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
--size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
--size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
-+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
- /* HUF_compress1X_repeat() :
-  *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-  *  If it uses hufTable it does not modify hufTable or repeat.
-@@ -312,47 +254,28 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
-                        const void* src, size_t srcSize,
-                        unsigned maxSymbolValue, unsigned tableLog,
-                        void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
--                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
-+                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
- 
--size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
--#endif
--
--size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
--size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
--#ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
--size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
--#endif
-+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
- #ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
--size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
--#endif
--
--size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /*< automatic selection of sing or double symbol decoder, based on DTable */
--#ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
--#endif
--#ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);   /*< double-symbols decoder */
- #endif
- 
- /* BMI2 variants.
-  * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
-  */
--size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
-+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
- #ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
-+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
- #endif
--size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
--size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
-+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
-+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
- #ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
-+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
- #endif
- #ifndef HUF_FORCE_DECOMPRESS_X1
--size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
-+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
- #endif
- 
--#endif /* HUF_STATIC_LINKING_ONLY */
-+#endif   /* HUF_H_298734234 */
- 
-diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
-index 1d9cc03924ca..2e91e7780c1f 100644
---- a/lib/zstd/common/mem.h
-+++ b/lib/zstd/common/mem.h
-@@ -1,6 +1,6 @@
- /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -24,6 +24,7 @@
- /*-****************************************
- *  Compiler specifics
- ******************************************/
-+#undef MEM_STATIC /* may be already defined from common/compiler.h */
- #define MEM_STATIC static inline
- 
- /*-**************************************************************
-diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h
-index 0e3b2c0a527d..f08638cced6c 100644
---- a/lib/zstd/common/portability_macros.h
-+++ b/lib/zstd/common/portability_macros.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -12,7 +13,7 @@
- #define ZSTD_PORTABILITY_MACROS_H
- 
- /*
-- * This header file contains macro defintions to support portability.
-+ * This header file contains macro definitions to support portability.
-  * This header is shared between C and ASM code, so it MUST only
-  * contain macro definitions. It MUST not contain any C code.
-  *
-@@ -45,6 +46,8 @@
- /* Mark the internal assembly functions as hidden  */
- #ifdef __ELF__
- # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
-+#elif defined(__APPLE__)
-+# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
- #else
- # define ZSTD_HIDE_ASM_FUNCTION(func)
- #endif
-@@ -65,7 +68,7 @@
- #endif
- 
- /*
-- * Only enable assembly for GNUC comptabile compilers,
-+ * Only enable assembly for GNUC compatible compilers,
-  * because other platforms may not support GAS assembly syntax.
-  *
-  * Only enable assembly for Linux / MacOS, other platforms may
-@@ -90,4 +93,23 @@
-  */
- #define ZSTD_ENABLE_ASM_X86_64_BMI2 0
- 
-+/*
-+ * For x86 ELF targets, add .note.gnu.property section for Intel CET in
-+ * assembly sources when CET is enabled.
-+ *
-+ * Additionally, any function that may be called indirectly must begin
-+ * with ZSTD_CET_ENDBRANCH.
-+ */
-+#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
-+    && defined(__has_include)
-+# if __has_include(<cet.h>)
-+#  include <cet.h>
-+#  define ZSTD_CET_ENDBRANCH _CET_ENDBR
-+# endif
-+#endif
-+
-+#ifndef ZSTD_CET_ENDBRANCH
-+# define ZSTD_CET_ENDBRANCH
-+#endif
-+
- #endif /* ZSTD_PORTABILITY_MACROS_H */
-diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
-index 3d7e35b309b5..44b95b25344a 100644
---- a/lib/zstd/common/zstd_common.c
-+++ b/lib/zstd/common/zstd_common.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -14,7 +15,6 @@
- *  Dependencies
- ***************************************/
- #define ZSTD_DEPS_NEED_MALLOC
--#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
- #include "error_private.h"
- #include "zstd_internal.h"
- 
-@@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
- /*! ZSTD_getErrorString() :
-  *  provides error code string from enum */
- const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
--
--
--
--/*=**************************************************************
--*  Custom allocator
--****************************************************************/
--void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
--{
--    if (customMem.customAlloc)
--        return customMem.customAlloc(customMem.opaque, size);
--    return ZSTD_malloc(size);
--}
--
--void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
--{
--    if (customMem.customAlloc) {
--        /* calloc implemented as malloc+memset;
--         * not as efficient as calloc, but next best guess for custom malloc */
--        void* const ptr = customMem.customAlloc(customMem.opaque, size);
--        ZSTD_memset(ptr, 0, size);
--        return ptr;
--    }
--    return ZSTD_calloc(1, size);
--}
--
--void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
--{
--    if (ptr!=NULL) {
--        if (customMem.customFree)
--            customMem.customFree(customMem.opaque, ptr);
--        else
--            ZSTD_free(ptr);
--    }
--}
-diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
-index 2c34e8a33a1c..f931f7d0e294 100644
---- a/lib/zstd/common/zstd_deps.h
-+++ b/lib/zstd/common/zstd_deps.h
-@@ -1,6 +1,6 @@
- /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
- 
- #endif /* ZSTD_DEPS_IO */
- #endif /* ZSTD_DEPS_NEED_IO */
-+
-+/*
-+ * Only requested when MSAN is enabled.
-+ * Need:
-+ * intptr_t
-+ */
-+#ifdef ZSTD_DEPS_NEED_STDINT
-+#ifndef ZSTD_DEPS_STDINT
-+#define ZSTD_DEPS_STDINT
-+
-+/* intptr_t already provided by ZSTD_DEPS_COMMON */
-+
-+#endif /* ZSTD_DEPS_STDINT */
-+#endif /* ZSTD_DEPS_NEED_STDINT */
-diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
-index 93305d9b41bb..11da1233e890 100644
---- a/lib/zstd/common/zstd_internal.h
-+++ b/lib/zstd/common/zstd_internal.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -28,7 +29,6 @@
- #include <linux/zstd.h>
- #define FSE_STATIC_LINKING_ONLY
- #include "fse.h"
--#define HUF_STATIC_LINKING_ONLY
- #include "huf.h"
- #include <linux/xxhash.h>                /* XXH_reset, update, digest */
- #define ZSTD_TRACE 0
-@@ -83,9 +83,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
- #define ZSTD_FRAMECHECKSUMSIZE 4
- 
- #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
--#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
-+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */)   /* for a non-null block */
-+#define MIN_LITERALS_FOR_4_STREAMS 6
- 
--#define HufLog 12
- typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
- 
- #define LONGNBSEQ 0x7F00
-@@ -93,6 +93,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
- #define MINMATCH 3
- 
- #define Litbits  8
-+#define LitHufLog 11
- #define MaxLit ((1<<Litbits) - 1)
- #define MaxML   52
- #define MaxLL   35
-@@ -103,6 +104,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
- #define LLFSELog    9
- #define OffFSELog   8
- #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
-+#define MaxMLBits 16
-+#define MaxLLBits 16
- 
- #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
- /* Each table cannot take more than #symbols * FSELog bits */
-@@ -166,7 +169,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
-     ZSTD_memcpy(dst, src, 8);
- #endif
- }
--#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
-+#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
- 
- /* Need to use memmove here since the literal buffer can now be located within
-    the dst buffer. In circumstances where the op "catches up" to where the
-@@ -186,7 +189,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
-     ZSTD_memcpy(dst, copy16_buf, 16);
- #endif
- }
--#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
-+#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
- 
- #define WILDCOPY_OVERLENGTH 32
- #define WILDCOPY_VECLEN 16
-@@ -215,7 +218,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
-     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
-         /* Handle short offset copies. */
-         do {
--            COPY8(op, ip)
-+            COPY8(op, ip);
-         } while (op < oend);
-     } else {
-         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
-@@ -225,12 +228,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
-          * one COPY16() in the first call. Then, do two calls per loop since
-          * at that point it is more likely to have a high trip count.
-          */
--#ifdef __aarch64__
--        do {
--            COPY16(op, ip);
--        }
--        while (op < oend);
--#else
-         ZSTD_copy16(op, ip);
-         if (16 >= length) return;
-         op += 16;
-@@ -240,7 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
-             COPY16(op, ip);
-         }
-         while (op < oend);
--#endif
-     }
- }
- 
-@@ -289,11 +285,11 @@ typedef enum {
- typedef struct {
-     seqDef* sequencesStart;
-     seqDef* sequences;      /* ptr to end of sequences */
--    BYTE* litStart;
--    BYTE* lit;              /* ptr to end of literals */
--    BYTE* llCode;
--    BYTE* mlCode;
--    BYTE* ofCode;
-+    BYTE*  litStart;
-+    BYTE*  lit;             /* ptr to end of literals */
-+    BYTE*  llCode;
-+    BYTE*  mlCode;
-+    BYTE*  ofCode;
-     size_t maxNbSeq;
-     size_t maxNbLit;
- 
-@@ -301,8 +297,8 @@ typedef struct {
-      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-      * the existing value of the litLength or matchLength by 0x10000.
-      */
--    ZSTD_longLengthType_e   longLengthType;
--    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
-+    ZSTD_longLengthType_e longLengthType;
-+    U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
- } seqStore_t;
- 
- typedef struct {
-@@ -321,10 +317,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
-     seqLen.matchLength = seq->mlBase + MINMATCH;
-     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
--            seqLen.litLength += 0xFFFF;
-+            seqLen.litLength += 0x10000;
-         }
-         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
--            seqLen.matchLength += 0xFFFF;
-+            seqLen.matchLength += 0x10000;
-         }
-     }
-     return seqLen;
-@@ -337,72 +333,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
-  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
-  */
- typedef struct {
-+    size_t nbBlocks;
-     size_t compressedSize;
-     unsigned long long decompressedBound;
- } ZSTD_frameSizeInfo;   /* decompress & legacy */
- 
- const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
--void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
--
--/* custom memory allocation functions */
--void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
--void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
--void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
--
--
--MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
--{
--    assert(val != 0);
--    {
--#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
--        return __builtin_clz (val) ^ 31;
--#   else   /* Software version */
--        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
--        U32 v = val;
--        v |= v >> 1;
--        v |= v >> 2;
--        v |= v >> 4;
--        v |= v >> 8;
--        v |= v >> 16;
--        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
--#   endif
--    }
--}
--
--/*
-- * Counts the number of trailing zeros of a `size_t`.
-- * Most compilers should support CTZ as a builtin. A backup
-- * implementation is provided if the builtin isn't supported, but
-- * it may not be terribly efficient.
-- */
--MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
--{
--    if (MEM_64bits()) {
--#       if (__GNUC__ >= 4)
--            return __builtin_ctzll((U64)val);
--#       else
--            static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
--                                                      4, 25, 14, 28,  9, 34, 20, 56,
--                                                      5, 17, 26, 54, 15, 41, 29, 43,
--                                                      10, 31, 38, 35, 21, 45, 49, 57,
--                                                      63,  6, 12, 18, 24, 27, 33, 55,
--                                                      16, 53, 40, 42, 30, 37, 44, 48,
--                                                      62, 11, 23, 32, 52, 39, 36, 47,
--                                                      61, 22, 51, 46, 60, 50, 59, 58 };
--            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
--#       endif
--    } else { /* 32 bits */
--#       if (__GNUC__ >= 3)
--            return __builtin_ctz((U32)val);
--#       else
--            static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
--                                                     30, 22, 20, 15, 25, 17,  4,  8,
--                                                     31, 27, 13, 23, 21, 19, 16,  7,
--                                                     26, 12, 18,  6, 11,  5, 10,  9 };
--            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
--#       endif
--    }
--}
-+int ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
- 
- 
- /* ZSTD_invalidateRepCodes() :
-@@ -420,13 +357,13 @@ typedef struct {
- 
- /*! ZSTD_getcBlockSize() :
-  *  Provides the size of compressed block from block header `src` */
--/* Used by: decompress, fullbench (does not get its definition from here) */
-+/*  Used by: decompress, fullbench */
- size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
-                           blockProperties_t* bpPtr);
- 
- /*! ZSTD_decodeSeqHeaders() :
-  *  decode sequence header from src */
--/* Used by: decompress, fullbench (does not get its definition from here) */
-+/*  Used by: zstd_decompress_block, fullbench */
- size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
-                        const void* src, size_t srcSize);
- 
-diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h
-index d9a76112ec3a..6ab8be6532ef 100644
---- a/lib/zstd/compress/clevels.h
-+++ b/lib/zstd/compress/clevels.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
-index ec5b1ca6d71a..44a3c10becf2 100644
---- a/lib/zstd/compress/fse_compress.c
-+++ b/lib/zstd/compress/fse_compress.c
-@@ -1,6 +1,7 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * FSE : Finite State Entropy encoder
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -25,7 +26,8 @@
- #include "../common/error_private.h"
- #define ZSTD_DEPS_NEED_MALLOC
- #define ZSTD_DEPS_NEED_MATH64
--#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
-+#include "../common/zstd_deps.h"  /* ZSTD_memset */
-+#include "../common/bits.h" /* ZSTD_highbit32 */
- 
- 
- /* **************************************************************
-@@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
-     assert(tableLog < 16);   /* required for threshold strategy to work */
- 
-     /* For explanations on how to distribute symbol values over the table :
--     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
-+     * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
- 
-      #ifdef __clang_analyzer__
-      ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
-@@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
-                 break;
-             default :
-                 assert(normalizedCounter[s] > 1);
--                {   U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
-+                {   U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
-                     U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
-                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-                     symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
-@@ -224,8 +226,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
-     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
-                                    + 4 /* bitCount initialized at 4 */
-                                    + 2 /* first two symbols may use one additional bit each */) / 8)
--                                    + 1 /* round up to whole nb bytes */
--                                    + 2 /* additional two bytes for bitstream flush */;
-+                                   + 1 /* round up to whole nb bytes */
-+                                   + 2 /* additional two bytes for bitstream flush */;
-     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
- }
- 
-@@ -254,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
-     /* Init */
-     remaining = tableSize+1;   /* +1 for extra accuracy */
-     threshold = tableSize;
--    nbBits = tableLog+1;
-+    nbBits = (int)tableLog+1;
- 
-     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
-         if (previousIs0) {
-@@ -273,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
-             }
-             while (symbol >= start+3) {
-                 start+=3;
--                bitStream += 3 << bitCount;
-+                bitStream += 3U << bitCount;
-                 bitCount += 2;
-             }
-             bitStream += (symbol-start) << bitCount;
-@@ -293,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
-             count++;   /* +1 for extra accuracy */
-             if (count>=threshold)
-                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
--            bitStream += count << bitCount;
-+            bitStream += (U32)count << bitCount;
-             bitCount  += nbBits;
-             bitCount  -= (count<max);
-             previousIs0  = (count==1);
-@@ -321,7 +323,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
-     out[1] = (BYTE)(bitStream>>8);
-     out+= (bitCount+7) /8;
- 
--    return (out-ostart);
-+    assert(out >= ostart);
-+    return (size_t)(out-ostart);
- }
- 
- 
-@@ -342,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
- *  FSE Compression Code
- ****************************************************************/
- 
--FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
--{
--    size_t size;
--    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
--    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
--    return (FSE_CTable*)ZSTD_malloc(size);
--}
--
--void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
--
- /* provides the minimum logSize to safely represent a distribution */
- static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
- {
--    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
--    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-+    U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
-+    U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
-     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-     assert(srcSize > 1); /* Not supported, RLE should be used instead */
-     return minBits;
-@@ -364,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
- 
- unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
- {
--    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
-+    U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
-     U32 tableLog = maxTableLog;
-     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
-     assert(srcSize > 1); /* Not supported, RLE should be used instead */
-@@ -532,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
-     return tableLog;
- }
- 
--
--/* fake FSE_CTable, for raw (uncompressed) input */
--size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
--{
--    const unsigned tableSize = 1 << nbBits;
--    const unsigned tableMask = tableSize - 1;
--    const unsigned maxSymbolValue = tableMask;
--    void* const ptr = ct;
--    U16* const tableU16 = ( (U16*) ptr) + 2;
--    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
--    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
--    unsigned s;
--
--    /* Sanity checks */
--    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
--
--    /* header */
--    tableU16[-2] = (U16) nbBits;
--    tableU16[-1] = (U16) maxSymbolValue;
--
--    /* Build table */
--    for (s=0; s<tableSize; s++)
--        tableU16[s] = (U16)(tableSize + s);
--
--    /* Build Symbol Transformation Table */
--    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
--        for (s=0; s<=maxSymbolValue; s++) {
--            symbolTT[s].deltaNbBits = deltaNbBits;
--            symbolTT[s].deltaFindState = s-1;
--    }   }
--
--    return 0;
--}
--
- /* fake FSE_CTable, for rle input (always same symbol) */
- size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
- {
-@@ -664,5 +623,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
- 
- size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
- 
--
- #endif   /* FSE_COMMONDEFS_ONLY */
-diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c
-index 3ddc6dfb6894..0b12587cc14b 100644
---- a/lib/zstd/compress/hist.c
-+++ b/lib/zstd/compress/hist.c
-@@ -1,7 +1,8 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * hist : Histogram functions
-  * part of Finite State Entropy project
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h
-index fc1830abc9c6..f7687b0fc20a 100644
---- a/lib/zstd/compress/hist.h
-+++ b/lib/zstd/compress/hist.h
-@@ -1,7 +1,8 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /* ******************************************************************
-  * hist : Histogram functions
-  * part of Finite State Entropy project
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
-index 74ef0db47621..0b229f5d2ae2 100644
---- a/lib/zstd/compress/huf_compress.c
-+++ b/lib/zstd/compress/huf_compress.c
-@@ -1,6 +1,7 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * Huffman encoder, part of New Generation Entropy library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -26,9 +27,9 @@
- #include "hist.h"
- #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
- #include "../common/fse.h"        /* header compression */
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include "../common/error_private.h"
-+#include "../common/bits.h"       /* ZSTD_highbit32 */
- 
- 
- /* **************************************************************
-@@ -39,13 +40,67 @@
- 
- 
- /* **************************************************************
--*  Utils
-+*  Required declarations
- ****************************************************************/
--unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-+typedef struct nodeElt_s {
-+    U32 count;
-+    U16 parent;
-+    BYTE byte;
-+    BYTE nbBits;
-+} nodeElt;
-+
-+
-+/* **************************************************************
-+*  Debug Traces
-+****************************************************************/
-+
-+#if DEBUGLEVEL >= 2
-+
-+static size_t showU32(const U32* arr, size_t size)
- {
--    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-+    size_t u;
-+    for (u=0; u<size; u++) {
-+        RAWLOG(6, " %u", arr[u]); (void)arr;
-+    }
-+    RAWLOG(6, " \n");
-+    return size;
- }
- 
-+static size_t HUF_getNbBits(HUF_CElt elt);
-+
-+static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
-+{
-+    size_t u;
-+    for (u=0; u<size; u++) {
-+        RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
-+    }
-+    RAWLOG(6, " \n");
-+    return size;
-+
-+}
-+
-+static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
-+{
-+    size_t u;
-+    for (u=0; u<size; u++) {
-+        RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
-+    }
-+    RAWLOG(6, " \n");
-+    return size;
-+}
-+
-+static size_t showHNodeBits(const nodeElt* hnode, size_t size)
-+{
-+    size_t u;
-+    for (u=0; u<size; u++) {
-+        RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
-+    }
-+    RAWLOG(6, " \n");
-+    return size;
-+}
-+
-+#endif
-+
- 
- /* *******************************************************
- *  HUF : Huffman block compression
-@@ -86,7 +141,10 @@ typedef struct {
-     S16 norm[HUF_TABLELOG_MAX+1];
- } HUF_CompressWeightsWksp;
- 
--static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
-+static size_t
-+HUF_compressWeights(void* dst, size_t dstSize,
-+              const void* weightTable, size_t wtSize,
-+                    void* workspace, size_t workspaceSize)
- {
-     BYTE* const ostart = (BYTE*) dst;
-     BYTE* op = ostart;
-@@ -137,7 +195,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
- 
- static size_t HUF_getValue(HUF_CElt elt)
- {
--    return elt & ~0xFF;
-+    return elt & ~(size_t)0xFF;
- }
- 
- static size_t HUF_getValueFast(HUF_CElt elt)
-@@ -160,6 +218,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
-     }
- }
- 
-+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
-+{
-+    HUF_CTableHeader header;
-+    ZSTD_memcpy(&header, ctable, sizeof(header));
-+    return header;
-+}
-+
-+static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
-+{
-+    HUF_CTableHeader header;
-+    HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
-+    ZSTD_memset(&header, 0, sizeof(header));
-+    assert(tableLog < 256);
-+    header.tableLog = (BYTE)tableLog;
-+    assert(maxSymbolValue < 256);
-+    header.maxSymbolValue = (BYTE)maxSymbolValue;
-+    ZSTD_memcpy(ctable, &header, sizeof(header));
-+}
-+
- typedef struct {
-     HUF_CompressWeightsWksp wksp;
-     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
-@@ -175,6 +252,11 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
-     U32 n;
-     HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
- 
-+    HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
-+
-+    assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
-+    assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
-+
-     /* check conditions */
-     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
-     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
-@@ -204,16 +286,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
-     return ((maxSymbolValue+1)/2) + 1;
- }
- 
--/*! HUF_writeCTable() :
--    `CTable` : Huffman tree to save, using huf representation.
--    @return : size of saved CTable */
--size_t HUF_writeCTable (void* dst, size_t maxDstSize,
--                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
--{
--    HUF_WriteCTableWksp wksp;
--    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
--}
--
- 
- size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
- {
-@@ -231,7 +303,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
-     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
- 
--    CTable[0] = tableLog;
-+    *maxSymbolValuePtr = nbSymbols - 1;
-+
-+    HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
- 
-     /* Prepare base value per rank */
-     {   U32 n, nextRankStart = 0;
-@@ -263,74 +337,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
-         { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
-     }
- 
--    *maxSymbolValuePtr = nbSymbols - 1;
-     return readSize;
- }
- 
- U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
- {
--    const HUF_CElt* ct = CTable + 1;
-+    const HUF_CElt* const ct = CTable + 1;
-     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
-+    if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
-+        return 0;
-     return (U32)HUF_getNbBits(ct[symbolValue]);
- }
- 
- 
--typedef struct nodeElt_s {
--    U32 count;
--    U16 parent;
--    BYTE byte;
--    BYTE nbBits;
--} nodeElt;
--
- /*
-  * HUF_setMaxHeight():
-- * Enforces maxNbBits on the Huffman tree described in huffNode.
-+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
-  *
-- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
-- * the tree to so that it is a valid canonical Huffman tree.
-+ * It attempts to convert all nodes with nbBits > @targetNbBits
-+ * to employ @targetNbBits instead. Then it adjusts the tree
-+ * so that it remains a valid canonical Huffman tree.
-  *
-  * @pre               The sum of the ranks of each symbol == 2^largestBits,
-  *                    where largestBits == huffNode[lastNonNull].nbBits.
-  * @post              The sum of the ranks of each symbol == 2^largestBits,
-- *                    where largestBits is the return value <= maxNbBits.
-+ *                    where largestBits is the return value (expected <= targetNbBits).
-  *
-- * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
-+ * @param huffNode    The Huffman tree modified in place to enforce targetNbBits.
-+ *                    It's presumed sorted, from most frequent to rarest symbol.
-  * @param lastNonNull The symbol with the lowest count in the Huffman tree.
-- * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
-+ * @param targetNbBits  The allowed number of bits, which the Huffman tree
-  *                    may not respect. After this function the Huffman tree will
-- *                    respect maxNbBits.
-- * @return            The maximum number of bits of the Huffman tree after adjustment,
-- *                    necessarily no more than maxNbBits.
-+ *                    respect targetNbBits.
-+ * @return            The maximum number of bits of the Huffman tree after adjustment.
-  */
--static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
- {
-     const U32 largestBits = huffNode[lastNonNull].nbBits;
--    /* early exit : no elt > maxNbBits, so the tree is already valid. */
--    if (largestBits <= maxNbBits) return largestBits;
-+    /* early exit : no elt > targetNbBits, so the tree is already valid. */
-+    if (largestBits <= targetNbBits) return largestBits;
-+
-+    DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
- 
-     /* there are several too large elements (at least >= 2) */
-     {   int totalCost = 0;
--        const U32 baseCost = 1 << (largestBits - maxNbBits);
-+        const U32 baseCost = 1 << (largestBits - targetNbBits);
-         int n = (int)lastNonNull;
- 
--        /* Adjust any ranks > maxNbBits to maxNbBits.
-+        /* Adjust any ranks > targetNbBits to targetNbBits.
-          * Compute totalCost, which is how far the sum of the ranks is
-          * we are over 2^largestBits after adjust the offending ranks.
-          */
--        while (huffNode[n].nbBits > maxNbBits) {
-+        while (huffNode[n].nbBits > targetNbBits) {
-             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
--            huffNode[n].nbBits = (BYTE)maxNbBits;
-+            huffNode[n].nbBits = (BYTE)targetNbBits;
-             n--;
-         }
--        /* n stops at huffNode[n].nbBits <= maxNbBits */
--        assert(huffNode[n].nbBits <= maxNbBits);
--        /* n end at index of smallest symbol using < maxNbBits */
--        while (huffNode[n].nbBits == maxNbBits) --n;
-+        /* n stops at huffNode[n].nbBits <= targetNbBits */
-+        assert(huffNode[n].nbBits <= targetNbBits);
-+        /* n end at index of smallest symbol using < targetNbBits */
-+        while (huffNode[n].nbBits == targetNbBits) --n;
- 
--        /* renorm totalCost from 2^largestBits to 2^maxNbBits
-+        /* renorm totalCost from 2^largestBits to 2^targetNbBits
-          * note : totalCost is necessarily a multiple of baseCost */
--        assert((totalCost & (baseCost - 1)) == 0);
--        totalCost >>= (largestBits - maxNbBits);
-+        assert(((U32)totalCost & (baseCost - 1)) == 0);
-+        totalCost >>= (largestBits - targetNbBits);
-         assert(totalCost > 0);
- 
-         /* repay normalized cost */
-@@ -339,19 +410,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
- 
-             /* Get pos of last (smallest = lowest cum. count) symbol per rank */
-             ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
--            {   U32 currentNbBits = maxNbBits;
-+            {   U32 currentNbBits = targetNbBits;
-                 int pos;
-                 for (pos=n ; pos >= 0; pos--) {
-                     if (huffNode[pos].nbBits >= currentNbBits) continue;
--                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
--                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
-+                    currentNbBits = huffNode[pos].nbBits;   /* < targetNbBits */
-+                    rankLast[targetNbBits-currentNbBits] = (U32)pos;
-             }   }
- 
-             while (totalCost > 0) {
-                 /* Try to reduce the next power of 2 above totalCost because we
-                  * gain back half the rank.
-                  */
--                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
-+                U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
-                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
-                     U32 const highPos = rankLast[nBitsToDecrease];
-                     U32 const lowPos = rankLast[nBitsToDecrease-1];
-@@ -391,7 +462,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-                     rankLast[nBitsToDecrease] = noSymbol;
-                 else {
-                     rankLast[nBitsToDecrease]--;
--                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
-+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
-                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
-                 }
-             }   /* while (totalCost > 0) */
-@@ -403,11 +474,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-              * TODO.
-              */
-             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
--                /* special case : no rank 1 symbol (using maxNbBits-1);
--                 * let's create one from largest rank 0 (using maxNbBits).
-+                /* special case : no rank 1 symbol (using targetNbBits-1);
-+                 * let's create one from largest rank 0 (using targetNbBits).
-                  */
-                 if (rankLast[1] == noSymbol) {
--                    while (huffNode[n].nbBits == maxNbBits) n--;
-+                    while (huffNode[n].nbBits == targetNbBits) n--;
-                     huffNode[n+1].nbBits--;
-                     assert(n >= 0);
-                     rankLast[1] = (U32)(n+1);
-@@ -421,7 +492,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-         }   /* repay normalized cost */
-     }   /* there are several too large elements (at least >= 2) */
- 
--    return maxNbBits;
-+    return targetNbBits;
- }
- 
- typedef struct {
-@@ -429,7 +500,7 @@ typedef struct {
-     U16 curr;
- } rankPos;
- 
--typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
-+typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
- 
- /* Number of buckets available for HUF_sort() */
- #define RANK_POSITION_TABLE_SIZE 192
-@@ -448,8 +519,8 @@ typedef struct {
-  * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
-  */
- #define RANK_POSITION_MAX_COUNT_LOG 32
--#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
--#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
-+#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
-+#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
- 
- /* Return the appropriate bucket index for a given count. See definition of
-  * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
-@@ -457,7 +528,7 @@ typedef struct {
- static U32 HUF_getIndex(U32 const count) {
-     return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
-         ? count
--        : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
-+        : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
- }
- 
- /* Helper swap function for HUF_quickSortPartition() */
-@@ -580,7 +651,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
- 
-     /* Sort each bucket. */
-     for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
--        U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
-+        int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
-         U32 const bucketStartIdx = rankPosition[n].base;
-         if (bucketSize > 1) {
-             assert(bucketStartIdx < maxSymbolValue1);
-@@ -591,6 +662,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
-     assert(HUF_isSorted(huffNode, maxSymbolValue1));
- }
- 
-+
- /* HUF_buildCTable_wksp() :
-  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
-  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
-@@ -611,6 +683,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
-     int lowS, lowN;
-     int nodeNb = STARTNODE;
-     int n, nodeRoot;
-+    DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
-     /* init for parents */
-     nonNullRank = (int)maxSymbolValue;
-     while(huffNode[nonNullRank].count == 0) nonNullRank--;
-@@ -637,6 +710,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
-     for (n=0; n<=nonNullRank; n++)
-         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
- 
-+    DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
-+
-     return nonNullRank;
- }
- 
-@@ -671,31 +746,40 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
-         HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
-     for (n=0; n<alphabetSize; n++)
-         HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
--    CTable[0] = maxNbBits;
-+
-+    HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
- }
- 
--size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
-+size_t
-+HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
-+                     void* workSpace, size_t wkspSize)
- {
--    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
-+    HUF_buildCTable_wksp_tables* const wksp_tables =
-+        (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
-     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
-     nodeElt* const huffNode = huffNode0+1;
-     int nonNullRank;
- 
-+    HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
-+
-+    DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
-+
-     /* safety checks */
-     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
--      return ERROR(workSpace_tooSmall);
-+        return ERROR(workSpace_tooSmall);
-     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
-     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
--      return ERROR(maxSymbolValue_tooLarge);
-+        return ERROR(maxSymbolValue_tooLarge);
-     ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
- 
-     /* sort, decreasing order */
-     HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
-+    DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
- 
-     /* build tree */
-     nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
- 
--    /* enforce maxTableLog */
-+    /* determine and enforce maxTableLog */
-     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
-     if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
- 
-@@ -716,13 +800,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
- }
- 
- int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
--  HUF_CElt const* ct = CTable + 1;
--  int bad = 0;
--  int s;
--  for (s = 0; s <= (int)maxSymbolValue; ++s) {
--    bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
--  }
--  return !bad;
-+    HUF_CTableHeader header = HUF_readCTableHeader(CTable);
-+    HUF_CElt const* ct = CTable + 1;
-+    int bad = 0;
-+    int s;
-+
-+    assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
-+
-+    if (header.maxSymbolValue < maxSymbolValue)
-+        return 0;
-+
-+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
-+        bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
-+    }
-+    return !bad;
- }
- 
- size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-@@ -804,7 +895,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
- #if DEBUGLEVEL >= 1
-     {
-         size_t const nbBits = HUF_getNbBits(elt);
--        size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
-+        size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
-         (void)dirtyBits;
-         /* Middle bits are 0. */
-         assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
-@@ -884,7 +975,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
-     {
-         size_t const nbBits = bitC->bitPos[0] & 0xFF;
-         if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
--        return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
-+        return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
-     }
- }
- 
-@@ -964,17 +1055,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
-                                    const void* src, size_t srcSize,
-                                    const HUF_CElt* CTable)
- {
--    U32 const tableLog = (U32)CTable[0];
-+    U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
-     HUF_CElt const* ct = CTable + 1;
-     const BYTE* ip = (const BYTE*) src;
-     BYTE* const ostart = (BYTE*)dst;
-     BYTE* const oend = ostart + dstSize;
--    BYTE* op = ostart;
-     HUF_CStream_t bitC;
- 
-     /* init */
-     if (dstSize < 8) return 0;   /* not enough space to compress */
--    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
-+    { BYTE* op = ostart;
-+      size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
-       if (HUF_isError(initErr)) return 0; }
- 
-     if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
-@@ -1045,9 +1136,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
- static size_t
- HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
-                               const void* src, size_t srcSize,
--                              const HUF_CElt* CTable, const int bmi2)
-+                              const HUF_CElt* CTable, const int flags)
- {
--    if (bmi2) {
-+    if (flags & HUF_flags_bmi2) {
-         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
-     }
-     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
-@@ -1058,28 +1149,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
- static size_t
- HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
-                               const void* src, size_t srcSize,
--                              const HUF_CElt* CTable, const int bmi2)
-+                              const HUF_CElt* CTable, const int flags)
- {
--    (void)bmi2;
-+    (void)flags;
-     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
- }
- 
- #endif
- 
--size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
- {
--    return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
--}
--
--size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
--{
--    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
-+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
- }
- 
- static size_t
- HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
-                               const void* src, size_t srcSize,
--                              const HUF_CElt* CTable, int bmi2)
-+                              const HUF_CElt* CTable, int flags)
- {
-     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
-     const BYTE* ip = (const BYTE*) src;
-@@ -1093,7 +1179,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
-     op += 6;   /* jumpTable */
- 
-     assert(op <= oend);
--    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
-         if (cSize == 0 || cSize > 65535) return 0;
-         MEM_writeLE16(ostart, (U16)cSize);
-         op += cSize;
-@@ -1101,7 +1187,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
- 
-     ip += segmentSize;
-     assert(op <= oend);
--    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
-         if (cSize == 0 || cSize > 65535) return 0;
-         MEM_writeLE16(ostart+2, (U16)cSize);
-         op += cSize;
-@@ -1109,7 +1195,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
- 
-     ip += segmentSize;
-     assert(op <= oend);
--    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
-         if (cSize == 0 || cSize > 65535) return 0;
-         MEM_writeLE16(ostart+4, (U16)cSize);
-         op += cSize;
-@@ -1118,7 +1204,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
-     ip += segmentSize;
-     assert(op <= oend);
-     assert(ip <= iend);
--    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
-+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
-         if (cSize == 0 || cSize > 65535) return 0;
-         op += cSize;
-     }
-@@ -1126,14 +1212,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
-     return (size_t)(op-ostart);
- }
- 
--size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
--{
--    return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
--}
--
--size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
-+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
- {
--    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
-+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
- }
- 
- typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
-@@ -1141,11 +1222,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
- static size_t HUF_compressCTable_internal(
-                 BYTE* const ostart, BYTE* op, BYTE* const oend,
-                 const void* src, size_t srcSize,
--                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
-+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
- {
-     size_t const cSize = (nbStreams==HUF_singleStream) ?
--                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
--                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
-+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
-+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
-     if (HUF_isError(cSize)) { return cSize; }
-     if (cSize==0) { return 0; }   /* uncompressible */
-     op += cSize;
-@@ -1168,6 +1249,81 @@ typedef struct {
- #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
- #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10  /* Must be >= 2 */
- 
-+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
-+{
-+    unsigned cardinality = 0;
-+    unsigned i;
-+
-+    for (i = 0; i < maxSymbolValue + 1; i++) {
-+        if (count[i] != 0) cardinality += 1;
-+    }
-+
-+    return cardinality;
-+}
-+
-+unsigned HUF_minTableLog(unsigned symbolCardinality)
-+{
-+    U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
-+    return minBitsSymbols;
-+}
-+
-+unsigned HUF_optimalTableLog(
-+            unsigned maxTableLog,
-+            size_t srcSize,
-+            unsigned maxSymbolValue,
-+            void* workSpace, size_t wkspSize,
-+            HUF_CElt* table,
-+      const unsigned* count,
-+            int flags)
-+{
-+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
-+    assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
-+
-+    if (!(flags & HUF_flags_optimalDepth)) {
-+        /* cheap evaluation, based on FSE */
-+        return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-+    }
-+
-+    {   BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
-+        size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
-+        size_t hSize, newSize;
-+        const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
-+        const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
-+        size_t optSize = ((size_t) ~0) - 1;
-+        unsigned optLog = maxTableLog, optLogGuess;
-+
-+        DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
-+
-+        /* Search until size increases */
-+        for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
-+            DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
-+
-+            {   size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
-+                if (ERR_isError(maxBits)) continue;
-+
-+                if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
-+
-+                hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
-+            }
-+
-+            if (ERR_isError(hSize)) continue;
-+
-+            newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
-+
-+            if (newSize > optSize + 1) {
-+                break;
-+            }
-+
-+            if (newSize < optSize) {
-+                optSize = newSize;
-+                optLog = optLogGuess;
-+            }
-+        }
-+        assert(optLog <= HUF_TABLELOG_MAX);
-+        return optLog;
-+    }
-+}
-+
- /* HUF_compress_internal() :
-  * `workSpace_align4` must be aligned on 4-bytes boundaries,
-  * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
-@@ -1177,14 +1333,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
-                        unsigned maxSymbolValue, unsigned huffLog,
-                        HUF_nbStreams_e nbStreams,
-                        void* workSpace, size_t wkspSize,
--                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
--                 const int bmi2, unsigned suspectUncompressible)
-+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
- {
-     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
-     BYTE* const ostart = (BYTE*)dst;
-     BYTE* const oend = ostart + dstSize;
-     BYTE* op = ostart;
- 
-+    DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
-     HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
- 
-     /* checks & inits */
-@@ -1198,16 +1354,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
-     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
- 
-     /* Heuristic : If old table is valid, use it for small inputs */
--    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
-+    if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
-         return HUF_compressCTable_internal(ostart, op, oend,
-                                            src, srcSize,
--                                           nbStreams, oldHufTable, bmi2);
-+                                           nbStreams, oldHufTable, flags);
-     }
- 
-     /* If uncompressible data is suspected, do a smaller sampling first */
-     DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
--    if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
-+    if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
-         size_t largestTotal = 0;
-+        DEBUGLOG(5, "input suspected incompressible : sampling to check");
-         {   unsigned maxSymbolValueBegin = maxSymbolValue;
-             CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
-             largestTotal += largestBegin;
-@@ -1224,6 +1381,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
-         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
-         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
-     }
-+    DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
- 
-     /* Check validity of previous table */
-     if ( repeat
-@@ -1232,25 +1390,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
-         *repeat = HUF_repeat_none;
-     }
-     /* Heuristic : use existing table for small inputs */
--    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
-+    if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
-         return HUF_compressCTable_internal(ostart, op, oend,
-                                            src, srcSize,
--                                           nbStreams, oldHufTable, bmi2);
-+                                           nbStreams, oldHufTable, flags);
-     }
- 
-     /* Build Huffman Tree */
--    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
-     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
-                                             maxSymbolValue, huffLog,
-                                             &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
-         CHECK_F(maxBits);
-         huffLog = (U32)maxBits;
--    }
--    /* Zero unused symbols in CTable, so we can check it for validity */
--    {
--        size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
--        size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
--        ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
-+        DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
-     }
- 
-     /* Write table description header */
-@@ -1263,7 +1416,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
-             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
-                 return HUF_compressCTable_internal(ostart, op, oend,
-                                                    src, srcSize,
--                                                   nbStreams, oldHufTable, bmi2);
-+                                                   nbStreams, oldHufTable, flags);
-         }   }
- 
-         /* Use the new huffman table */
-@@ -1275,61 +1428,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
-     }
-     return HUF_compressCTable_internal(ostart, op, oend,
-                                        src, srcSize,
--                                       nbStreams, table->CTable, bmi2);
--}
--
--
--size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
--                      const void* src, size_t srcSize,
--                      unsigned maxSymbolValue, unsigned huffLog,
--                      void* workSpace, size_t wkspSize)
--{
--    return HUF_compress_internal(dst, dstSize, src, srcSize,
--                                 maxSymbolValue, huffLog, HUF_singleStream,
--                                 workSpace, wkspSize,
--                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
-+                                       nbStreams, table->CTable, flags);
- }
- 
- size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
-                       const void* src, size_t srcSize,
-                       unsigned maxSymbolValue, unsigned huffLog,
-                       void* workSpace, size_t wkspSize,
--                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
--                      int bmi2, unsigned suspectUncompressible)
-+                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
- {
-+    DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
-     return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                  maxSymbolValue, huffLog, HUF_singleStream,
-                                  workSpace, wkspSize, hufTable,
--                                 repeat, preferRepeat, bmi2, suspectUncompressible);
--}
--
--/* HUF_compress4X_repeat():
-- * compress input using 4 streams.
-- * provide workspace to generate compression tables */
--size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
--                      const void* src, size_t srcSize,
--                      unsigned maxSymbolValue, unsigned huffLog,
--                      void* workSpace, size_t wkspSize)
--{
--    return HUF_compress_internal(dst, dstSize, src, srcSize,
--                                 maxSymbolValue, huffLog, HUF_fourStreams,
--                                 workSpace, wkspSize,
--                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
-+                                 repeat, flags);
- }
- 
- /* HUF_compress4X_repeat():
-  * compress input using 4 streams.
-  * consider skipping quickly
-- * re-use an existing huffman compression table */
-+ * reuse an existing huffman compression table */
- size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
-                       const void* src, size_t srcSize,
-                       unsigned maxSymbolValue, unsigned huffLog,
-                       void* workSpace, size_t wkspSize,
--                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
-+                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
- {
-+    DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
-     return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                  maxSymbolValue, huffLog, HUF_fourStreams,
-                                  workSpace, wkspSize,
--                                 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
-+                                 hufTable, repeat, flags);
- }
--
-diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
-index f620cafca633..0d139727cd39 100644
---- a/lib/zstd/compress/zstd_compress.c
-+++ b/lib/zstd/compress/zstd_compress.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -11,12 +12,12 @@
- /*-*************************************
- *  Dependencies
- ***************************************/
-+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
- #include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
- #include "../common/mem.h"
- #include "hist.h"           /* HIST_countFast_wksp */
- #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
- #include "../common/fse.h"
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include "zstd_compress_internal.h"
- #include "zstd_compress_sequences.h"
-@@ -27,6 +28,7 @@
- #include "zstd_opt.h"
- #include "zstd_ldm.h"
- #include "zstd_compress_superblock.h"
-+#include  "../common/bits.h"      /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
- 
- /* ***************************************************************
- *  Tuning parameters
-@@ -55,14 +57,17 @@
- *  Helper functions
- ***************************************/
- /* ZSTD_compressBound()
-- * Note that the result from this function is only compatible with the "normal"
-- * full-block strategy.
-- * When there are a lot of small blocks due to frequent flush in streaming mode
-- * the overhead of headers can make the compressed data to be larger than the
-- * return value of ZSTD_compressBound().
-+ * Note that the result from this function is only valid for
-+ * the one-pass compression functions.
-+ * When employing the streaming mode,
-+ * if flushes are frequently altering the size of blocks,
-+ * the overhead from block headers can make the compressed data larger
-+ * than the return value of ZSTD_compressBound().
-  */
- size_t ZSTD_compressBound(size_t srcSize) {
--    return ZSTD_COMPRESSBOUND(srcSize);
-+    size_t const r = ZSTD_COMPRESSBOUND(srcSize);
-+    if (r==0) return ERROR(srcSize_wrong);
-+    return r;
- }
- 
- 
-@@ -168,15 +173,13 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
- 
- size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
- {
-+    DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
-     if (cctx==NULL) return 0;   /* support free on NULL */
-     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
-                     "not compatible with static CCtx");
--    {
--        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
-+    {   int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
-         ZSTD_freeCCtxContent(cctx);
--        if (!cctxInWorkspace) {
--            ZSTD_customFree(cctx, cctx->customMem);
--        }
-+        if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);
-     }
-     return 0;
- }
-@@ -257,9 +260,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
-     return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
- }
- 
--/* Returns 1 if compression parameters are such that we should
-+/* Returns ZSTD_ps_enable if compression parameters are such that we should
-  * enable long distance matching (wlog >= 27, strategy >= btopt).
-- * Returns 0 otherwise.
-+ * Returns ZSTD_ps_disable otherwise.
-  */
- static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
-                                  const ZSTD_compressionParameters* const cParams) {
-@@ -267,6 +270,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
-     return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
- }
- 
-+static int ZSTD_resolveExternalSequenceValidation(int mode) {
-+    return mode;
-+}
-+
-+/* Resolves maxBlockSize to the default if no value is present. */
-+static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
-+    if (maxBlockSize == 0) {
-+        return ZSTD_BLOCKSIZE_MAX;
-+    } else {
-+        return maxBlockSize;
-+    }
-+}
-+
-+static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
-+    if (value != ZSTD_ps_auto) return value;
-+    if (cLevel < 10) {
-+        return ZSTD_ps_disable;
-+    } else {
-+        return ZSTD_ps_enable;
-+    }
-+}
-+
-+/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
-+ * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
-+static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
-+    return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
-+}
-+
- static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
-         ZSTD_compressionParameters cParams)
- {
-@@ -284,6 +315,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
-     }
-     cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
-     cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
-+    cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
-+    cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
-+    cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
-+                                                                             cctxParams.compressionLevel);
-     assert(!ZSTD_checkCParams(cParams));
-     return cctxParams;
- }
-@@ -329,10 +364,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
- #define ZSTD_NO_CLEVEL 0
- 
- /*
-- * Initializes the cctxParams from params and compressionLevel.
-+ * Initializes `cctxParams` from `params` and `compressionLevel`.
-  * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
-  */
--static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
-+static void
-+ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
-+                        const ZSTD_parameters* params,
-+                              int compressionLevel)
- {
-     assert(!ZSTD_checkCParams(params->cParams));
-     ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
-@@ -345,6 +383,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
-     cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
-     cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams);
-     cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
-+    cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
-+    cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
-+    cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
-     DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
-                 cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
- }
-@@ -359,7 +400,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
- 
- /*
-  * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
-- * @param param Validated zstd parameters.
-+ * @param params Validated zstd parameters.
-  */
- static void ZSTD_CCtxParams_setZstdParams(
-         ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
-@@ -455,8 +496,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
-         return bounds;
- 
-     case ZSTD_c_enableLongDistanceMatching:
--        bounds.lowerBound = 0;
--        bounds.upperBound = 1;
-+        bounds.lowerBound = (int)ZSTD_ps_auto;
-+        bounds.upperBound = (int)ZSTD_ps_disable;
-         return bounds;
- 
-     case ZSTD_c_ldmHashLog:
-@@ -549,6 +590,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
-         bounds.upperBound = 1;
-         return bounds;
- 
-+    case ZSTD_c_prefetchCDictTables:
-+        bounds.lowerBound = (int)ZSTD_ps_auto;
-+        bounds.upperBound = (int)ZSTD_ps_disable;
-+        return bounds;
-+
-+    case ZSTD_c_enableSeqProducerFallback:
-+        bounds.lowerBound = 0;
-+        bounds.upperBound = 1;
-+        return bounds;
-+
-+    case ZSTD_c_maxBlockSize:
-+        bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
-+        bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
-+        return bounds;
-+
-+    case ZSTD_c_searchForExternalRepcodes:
-+        bounds.lowerBound = (int)ZSTD_ps_auto;
-+        bounds.upperBound = (int)ZSTD_ps_disable;
-+        return bounds;
-+
-     default:
-         bounds.error = ERROR(parameter_unsupported);
-         return bounds;
-@@ -567,10 +628,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
-     return 0;
- }
- 
--#define BOUNDCHECK(cParam, val) { \
--    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
--                    parameter_outOfBound, "Param out of bounds"); \
--}
-+#define BOUNDCHECK(cParam, val)                                       \
-+    do {                                                              \
-+        RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val),        \
-+                        parameter_outOfBound, "Param out of bounds"); \
-+    } while (0)
- 
- 
- static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
-@@ -613,6 +675,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
-     case ZSTD_c_useBlockSplitter:
-     case ZSTD_c_useRowMatchFinder:
-     case ZSTD_c_deterministicRefPrefix:
-+    case ZSTD_c_prefetchCDictTables:
-+    case ZSTD_c_enableSeqProducerFallback:
-+    case ZSTD_c_maxBlockSize:
-+    case ZSTD_c_searchForExternalRepcodes:
-     default:
-         return 0;
-     }
-@@ -625,7 +691,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
-         if (ZSTD_isUpdateAuthorized(param)) {
-             cctx->cParamsChanged = 1;
-         } else {
--            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
-+            RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
-     }   }
- 
-     switch(param)
-@@ -668,6 +734,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
-     case ZSTD_c_useBlockSplitter:
-     case ZSTD_c_useRowMatchFinder:
-     case ZSTD_c_deterministicRefPrefix:
-+    case ZSTD_c_prefetchCDictTables:
-+    case ZSTD_c_enableSeqProducerFallback:
-+    case ZSTD_c_maxBlockSize:
-+    case ZSTD_c_searchForExternalRepcodes:
-         break;
- 
-     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-@@ -723,12 +793,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
-     case ZSTD_c_minMatch :
-         if (value!=0)   /* 0 => use default */
-             BOUNDCHECK(ZSTD_c_minMatch, value);
--        CCtxParams->cParams.minMatch = value;
-+        CCtxParams->cParams.minMatch = (U32)value;
-         return CCtxParams->cParams.minMatch;
- 
-     case ZSTD_c_targetLength :
-         BOUNDCHECK(ZSTD_c_targetLength, value);
--        CCtxParams->cParams.targetLength = value;
-+        CCtxParams->cParams.targetLength = (U32)value;
-         return CCtxParams->cParams.targetLength;
- 
-     case ZSTD_c_strategy :
-@@ -741,12 +811,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
-         /* Content size written in frame header _when known_ (default:1) */
-         DEBUGLOG(4, "set content size flag = %u", (value!=0));
-         CCtxParams->fParams.contentSizeFlag = value != 0;
--        return CCtxParams->fParams.contentSizeFlag;
-+        return (size_t)CCtxParams->fParams.contentSizeFlag;
- 
-     case ZSTD_c_checksumFlag :
-         /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
-         CCtxParams->fParams.checksumFlag = value != 0;
--        return CCtxParams->fParams.checksumFlag;
-+        return (size_t)CCtxParams->fParams.checksumFlag;
- 
-     case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
-         DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
-@@ -755,18 +825,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
- 
-     case ZSTD_c_forceMaxWindow :
-         CCtxParams->forceWindow = (value != 0);
--        return CCtxParams->forceWindow;
-+        return (size_t)CCtxParams->forceWindow;
- 
-     case ZSTD_c_forceAttachDict : {
-         const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
--        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
-+        BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
-         CCtxParams->attachDictPref = pref;
-         return CCtxParams->attachDictPref;
-     }
- 
-     case ZSTD_c_literalCompressionMode : {
-         const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
--        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
-+        BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
-         CCtxParams->literalCompressionMode = lcm;
-         return CCtxParams->literalCompressionMode;
-     }
-@@ -789,47 +859,50 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
- 
-     case ZSTD_c_enableDedicatedDictSearch :
-         CCtxParams->enableDedicatedDictSearch = (value!=0);
--        return CCtxParams->enableDedicatedDictSearch;
-+        return (size_t)CCtxParams->enableDedicatedDictSearch;
- 
-     case ZSTD_c_enableLongDistanceMatching :
-+        BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);
-         CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
-         return CCtxParams->ldmParams.enableLdm;
- 
-     case ZSTD_c_ldmHashLog :
-         if (value!=0)   /* 0 ==> auto */
-             BOUNDCHECK(ZSTD_c_ldmHashLog, value);
--        CCtxParams->ldmParams.hashLog = value;
-+        CCtxParams->ldmParams.hashLog = (U32)value;
-         return CCtxParams->ldmParams.hashLog;
- 
-     case ZSTD_c_ldmMinMatch :
-         if (value!=0)   /* 0 ==> default */
-             BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
--        CCtxParams->ldmParams.minMatchLength = value;
-+        CCtxParams->ldmParams.minMatchLength = (U32)value;
-         return CCtxParams->ldmParams.minMatchLength;
- 
-     case ZSTD_c_ldmBucketSizeLog :
-         if (value!=0)   /* 0 ==> default */
-             BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
--        CCtxParams->ldmParams.bucketSizeLog = value;
-+        CCtxParams->ldmParams.bucketSizeLog = (U32)value;
-         return CCtxParams->ldmParams.bucketSizeLog;
- 
-     case ZSTD_c_ldmHashRateLog :
-         if (value!=0)   /* 0 ==> default */
-             BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
--        CCtxParams->ldmParams.hashRateLog = value;
-+        CCtxParams->ldmParams.hashRateLog = (U32)value;
-         return CCtxParams->ldmParams.hashRateLog;
- 
-     case ZSTD_c_targetCBlockSize :
--        if (value!=0)   /* 0 ==> default */
-+        if (value!=0) {  /* 0 ==> default */
-+            value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
-             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
--        CCtxParams->targetCBlockSize = value;
-+        }
-+        CCtxParams->targetCBlockSize = (U32)value;
-         return CCtxParams->targetCBlockSize;
- 
-     case ZSTD_c_srcSizeHint :
-         if (value!=0)    /* 0 ==> default */
-             BOUNDCHECK(ZSTD_c_srcSizeHint, value);
-         CCtxParams->srcSizeHint = value;
--        return CCtxParams->srcSizeHint;
-+        return (size_t)CCtxParams->srcSizeHint;
- 
-     case ZSTD_c_stableInBuffer:
-         BOUNDCHECK(ZSTD_c_stableInBuffer, value);
-@@ -849,7 +922,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
-     case ZSTD_c_validateSequences:
-         BOUNDCHECK(ZSTD_c_validateSequences, value);
-         CCtxParams->validateSequences = value;
--        return CCtxParams->validateSequences;
-+        return (size_t)CCtxParams->validateSequences;
- 
-     case ZSTD_c_useBlockSplitter:
-         BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
-@@ -864,7 +937,28 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
-     case ZSTD_c_deterministicRefPrefix:
-         BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
-         CCtxParams->deterministicRefPrefix = !!value;
--        return CCtxParams->deterministicRefPrefix;
-+        return (size_t)CCtxParams->deterministicRefPrefix;
-+
-+    case ZSTD_c_prefetchCDictTables:
-+        BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
-+        CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
-+        return CCtxParams->prefetchCDictTables;
-+
-+    case ZSTD_c_enableSeqProducerFallback:
-+        BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
-+        CCtxParams->enableMatchFinderFallback = value;
-+        return (size_t)CCtxParams->enableMatchFinderFallback;
-+
-+    case ZSTD_c_maxBlockSize:
-+        if (value!=0)    /* 0 ==> default */
-+            BOUNDCHECK(ZSTD_c_maxBlockSize, value);
-+        CCtxParams->maxBlockSize = value;
-+        return CCtxParams->maxBlockSize;
-+
-+    case ZSTD_c_searchForExternalRepcodes:
-+        BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
-+        CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
-+        return CCtxParams->searchForExternalRepcodes;
- 
-     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-     }
-@@ -980,6 +1074,18 @@ size_t ZSTD_CCtxParams_getParameter(
-     case ZSTD_c_deterministicRefPrefix:
-         *value = (int)CCtxParams->deterministicRefPrefix;
-         break;
-+    case ZSTD_c_prefetchCDictTables:
-+        *value = (int)CCtxParams->prefetchCDictTables;
-+        break;
-+    case ZSTD_c_enableSeqProducerFallback:
-+        *value = CCtxParams->enableMatchFinderFallback;
-+        break;
-+    case ZSTD_c_maxBlockSize:
-+        *value = (int)CCtxParams->maxBlockSize;
-+        break;
-+    case ZSTD_c_searchForExternalRepcodes:
-+        *value = (int)CCtxParams->searchForExternalRepcodes;
-+        break;
-     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-     }
-     return 0;
-@@ -1006,9 +1112,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
-     return 0;
- }
- 
-+size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
-+{
-+    ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
-+    DEBUGLOG(4, "ZSTD_CCtx_setCParams");
-+    /* only update if all parameters are valid */
-+    FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
-+    return 0;
-+}
-+
-+size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
-+{
-+    ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
-+    DEBUGLOG(4, "ZSTD_CCtx_setFParams");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
-+    return 0;
-+}
-+
-+size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
-+{
-+    DEBUGLOG(4, "ZSTD_CCtx_setParams");
-+    /* First check cParams, because we want to update all or none. */
-+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
-+    /* Next set fParams, because this could fail if the cctx isn't in init stage. */
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
-+    /* Finally set cParams, which should succeed. */
-+    FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
-+    return 0;
-+}
-+
- size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
- {
--    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
-+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
-     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                     "Can't set pledgedSrcSize when not in init stage.");
-     cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
-@@ -1024,9 +1168,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
-         ZSTD_compressionParameters* cParams);
- 
- /*
-- * Initializes the local dict using the requested parameters.
-- * NOTE: This does not use the pledged src size, because it may be used for more
-- * than one compression.
-+ * Initializes the local dictionary using requested parameters.
-+ * NOTE: Initialization does not employ the pledged src size,
-+ * because the dictionary may be used for multiple compressions.
-  */
- static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
- {
-@@ -1039,8 +1183,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
-         return 0;
-     }
-     if (dl->cdict != NULL) {
--        assert(cctx->cdict == dl->cdict);
-         /* Local dictionary already initialized. */
-+        assert(cctx->cdict == dl->cdict);
-         return 0;
-     }
-     assert(dl->dictSize > 0);
-@@ -1060,26 +1204,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
- }
- 
- size_t ZSTD_CCtx_loadDictionary_advanced(
--        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
--        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
-+        ZSTD_CCtx* cctx,
-+        const void* dict, size_t dictSize,
-+        ZSTD_dictLoadMethod_e dictLoadMethod,
-+        ZSTD_dictContentType_e dictContentType)
- {
--    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
--                    "Can't load a dictionary when ctx is not in init stage.");
-     DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
--    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
--    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
-+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-+                    "Can't load a dictionary when cctx is not in init stage.");
-+    ZSTD_clearAllDicts(cctx);  /* erase any previously set dictionary */
-+    if (dict == NULL || dictSize == 0)  /* no dictionary */
-         return 0;
-     if (dictLoadMethod == ZSTD_dlm_byRef) {
-         cctx->localDict.dict = dict;
-     } else {
-+        /* copy dictionary content inside CCtx to own its lifetime */
-         void* dictBuffer;
-         RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
--                        "no malloc for static CCtx");
-+                        "static CCtx can't allocate for an internal copy of dictionary");
-         dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
--        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
-+        RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
-+                        "allocation failed for dictionary content");
-         ZSTD_memcpy(dictBuffer, dict, dictSize);
--        cctx->localDict.dictBuffer = dictBuffer;
--        cctx->localDict.dict = dictBuffer;
-+        cctx->localDict.dictBuffer = dictBuffer;  /* owned ptr to free */
-+        cctx->localDict.dict = dictBuffer;        /* read-only reference */
-     }
-     cctx->localDict.dictSize = dictSize;
-     cctx->localDict.dictContentType = dictContentType;
-@@ -1149,7 +1297,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
-     if ( (reset == ZSTD_reset_parameters)
-       || (reset == ZSTD_reset_session_and_parameters) ) {
-         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
--                        "Can't reset parameters only when not in init stage.");
-+                        "Reset parameters is only possible during init stage.");
-         ZSTD_clearAllDicts(cctx);
-         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
-     }
-@@ -1178,11 +1326,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
- static ZSTD_compressionParameters
- ZSTD_clampCParams(ZSTD_compressionParameters cParams)
- {
--#   define CLAMP_TYPE(cParam, val, type) {                                \
--        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
--        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
--        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
--    }
-+#   define CLAMP_TYPE(cParam, val, type)                                      \
-+        do {                                                                  \
-+            ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
-+            if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
-+            else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
-+        } while (0)
- #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
-     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
-     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
-@@ -1247,12 +1396,55 @@ static ZSTD_compressionParameters
- ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
-                             unsigned long long srcSize,
-                             size_t dictSize,
--                            ZSTD_cParamMode_e mode)
-+                            ZSTD_cParamMode_e mode,
-+                            ZSTD_paramSwitch_e useRowMatchFinder)
- {
-     const U64 minSrcSize = 513; /* (1<<9) + 1 */
-     const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
-     assert(ZSTD_checkCParams(cPar)==0);
- 
-+    /* Cascade the selected strategy down to the next-highest one built into
-+     * this binary. */
-+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_btultra2) {
-+        cPar.strategy = ZSTD_btultra;
-+    }
-+    if (cPar.strategy == ZSTD_btultra) {
-+        cPar.strategy = ZSTD_btopt;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_btopt) {
-+        cPar.strategy = ZSTD_btlazy2;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_btlazy2) {
-+        cPar.strategy = ZSTD_lazy2;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_lazy2) {
-+        cPar.strategy = ZSTD_lazy;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_lazy) {
-+        cPar.strategy = ZSTD_greedy;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_greedy) {
-+        cPar.strategy = ZSTD_dfast;
-+    }
-+#endif
-+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
-+    if (cPar.strategy == ZSTD_dfast) {
-+        cPar.strategy = ZSTD_fast;
-+        cPar.targetLength = 0;
-+    }
-+#endif
-+
-     switch (mode) {
-     case ZSTD_cpm_unknown:
-     case ZSTD_cpm_noAttachDict:
-@@ -1281,8 +1473,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
-     }
- 
-     /* resize windowLog if input is small enough, to use less memory */
--    if ( (srcSize < maxWindowResize)
--      && (dictSize < maxWindowResize) )  {
-+    if ( (srcSize <= maxWindowResize)
-+      && (dictSize <= maxWindowResize) )  {
-         U32 const tSize = (U32)(srcSize + dictSize);
-         static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
-         U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
-@@ -1300,6 +1492,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
-     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
-         cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
- 
-+    /* We can't use more than 32 bits of hash in total, so that means that we require:
-+     * (hashLog + 8) <= 32 && (chainLog + 8) <= 32
-+     */
-+    if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
-+        U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
-+        if (cPar.hashLog > maxShortCacheHashLog) {
-+            cPar.hashLog = maxShortCacheHashLog;
-+        }
-+        if (cPar.chainLog > maxShortCacheHashLog) {
-+            cPar.chainLog = maxShortCacheHashLog;
-+        }
-+    }
-+
-+
-+    /* At this point, we aren't 100% sure if we are using the row match finder.
-+     * Unless it is explicitly disabled, conservatively assume that it is enabled.
-+     * In this case it will only be disabled for small sources, so shrinking the
-+     * hash log a little bit shouldn't result in any ratio loss.
-+     */
-+    if (useRowMatchFinder == ZSTD_ps_auto)
-+        useRowMatchFinder = ZSTD_ps_enable;
-+
-+    /* We can't hash more than 32-bits in total. So that means that we require:
-+     * (hashLog - rowLog + 8) <= 32
-+     */
-+    if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
-+        /* Switch to 32-entry rows if searchLog is 5 (or more) */
-+        U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
-+        U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
-+        U32 const maxHashLog = maxRowHashLog + rowLog;
-+        assert(cPar.hashLog >= rowLog);
-+        if (cPar.hashLog > maxHashLog) {
-+            cPar.hashLog = maxHashLog;
-+        }
-+    }
-+
-     return cPar;
- }
- 
-@@ -1310,7 +1538,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
- {
-     cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
-     if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
--    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
-+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
- }
- 
- static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
-@@ -1341,7 +1569,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-     ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
-     assert(!ZSTD_checkCParams(cParams));
-     /* srcSizeHint == 0 means 0 */
--    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
-+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
- }
- 
- static size_t
-@@ -1367,10 +1595,10 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
-       + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
-       + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
-       + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
--      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
--      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
-+      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
-+      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
-     size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
--                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
-+                                            ? ZSTD_cwksp_aligned_alloc_size(hSize)
-                                             : 0;
-     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
-                                 ? optPotentialSpace
-@@ -1386,6 +1614,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
-     return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
- }
- 
-+/* Helper function for calculating memory requirements.
-+ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
-+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
-+    U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
-+    return blockSize / divider;
-+}
-+
- static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-         const ZSTD_compressionParameters* cParams,
-         const ldmParams_t* ldmParams,
-@@ -1393,12 +1628,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-         const ZSTD_paramSwitch_e useRowMatchFinder,
-         const size_t buffInSize,
-         const size_t buffOutSize,
--        const U64 pledgedSrcSize)
-+        const U64 pledgedSrcSize,
-+        int useSequenceProducer,
-+        size_t maxBlockSize)
- {
-     size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
--    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
--    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
--    size_t const maxNbSeq = blockSize / divider;
-+    size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
-+    size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
-     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
-                             + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
-                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
-@@ -1417,6 +1653,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- 
-     size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
- 
-+    size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
-+    size_t const externalSeqSpace = useSequenceProducer
-+        ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
-+        : 0;
-+
-     size_t const neededSpace =
-         cctxSpace +
-         entropySpace +
-@@ -1425,7 +1666,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-         ldmSeqSpace +
-         matchStateSize +
-         tokenSpace +
--        bufferSpace;
-+        bufferSpace +
-+        externalSeqSpace;
- 
-     DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
-     return neededSpace;
-@@ -1443,7 +1685,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
-      * be needed. However, we still allocate two 0-sized buffers, which can
-      * take space under ASAN. */
-     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
--        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
-+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
- }
- 
- size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
-@@ -1493,7 +1735,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
-     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
-     {   ZSTD_compressionParameters const cParams =
-                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
--        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
-+        size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);
-         size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
-                 ? ((size_t)1 << cParams.windowLog) + blockSize
-                 : 0;
-@@ -1504,7 +1746,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
- 
-         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-             &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
--            ZSTD_CONTENTSIZE_UNKNOWN);
-+            ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
-     }
- }
- 
-@@ -1637,6 +1879,19 @@ typedef enum {
-     ZSTD_resetTarget_CCtx
- } ZSTD_resetTarget_e;
- 
-+/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
-+static U64 ZSTD_bitmix(U64 val, U64 len) {
-+    val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
-+    val *= 0x9FB21C651E98DF25ULL;
-+    val ^= (val >> 35) + len ;
-+    val *= 0x9FB21C651E98DF25ULL;
-+    return val ^ (val >> 28);
-+}
-+
-+/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
-+static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
-+    ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
-+}
- 
- static size_t
- ZSTD_reset_matchState(ZSTD_matchState_t* ms,
-@@ -1664,6 +1919,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
-     }
- 
-     ms->hashLog3 = hashLog3;
-+    ms->lazySkipping = 0;
- 
-     ZSTD_invalidateMatchState(ms);
- 
-@@ -1685,22 +1941,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
-         ZSTD_cwksp_clean_tables(ws);
-     }
- 
--    /* opt parser space */
--    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
--        DEBUGLOG(4, "reserving optimal parser space");
--        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
--        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
--        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
--        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
--        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
--        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
--    }
--
-     if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
--        {   /* Row match finder needs an additional table of hashes ("tags") */
--            size_t const tagTableSize = hSize*sizeof(U16);
--            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
--            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
-+        /* Row match finder needs an additional table of hashes ("tags") */
-+        size_t const tagTableSize = hSize;
-+        /* We want to generate a new salt in case we reset a Cctx, but we always want to use
-+         * 0 when we reset a Cdict */
-+        if(forWho == ZSTD_resetTarget_CCtx) {
-+            ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
-+            ZSTD_advanceHashSalt(ms);
-+        } else {
-+            /* When we are not salting we want to always memset the memory */
-+            ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
-+            ZSTD_memset(ms->tagTable, 0, tagTableSize);
-+            ms->hashSalt = 0;
-         }
-         {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
-             U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
-@@ -1709,6 +1962,17 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
-         }
-     }
- 
-+    /* opt parser space */
-+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
-+        DEBUGLOG(4, "reserving optimal parser space");
-+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
-+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
-+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
-+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
-+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
-+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
-+    }
-+
-     ms->cParams = *cParams;
- 
-     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
-@@ -1768,6 +2032,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-     assert(params->useRowMatchFinder != ZSTD_ps_auto);
-     assert(params->useBlockSplitter != ZSTD_ps_auto);
-     assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
-+    assert(params->maxBlockSize != 0);
-     if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-         /* Adjust long distance matching parameters */
-         ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
-@@ -1776,9 +2041,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-     }
- 
-     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
--        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
--        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
--        size_t const maxNbSeq = blockSize / divider;
-+        size_t const blockSize = MIN(params->maxBlockSize, windowSize);
-+        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
-         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
-                 ? ZSTD_compressBound(blockSize) + 1
-                 : 0;
-@@ -1795,8 +2059,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-         size_t const neededSpace =
-             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-                 &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
--                buffInSize, buffOutSize, pledgedSrcSize);
--        int resizeWorkspace;
-+                buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
- 
-         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
- 
-@@ -1805,7 +2068,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-         {   /* Check if workspace is large enough, alloc a new one if needed */
-             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
-             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
--            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
-+            int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
-             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
-             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
- 
-@@ -1838,6 +2101,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
- 
-         /* init params */
-         zc->blockState.matchState.cParams = params->cParams;
-+        zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
-         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
-         zc->consumedSrcSize = 0;
-         zc->producedCSize = 0;
-@@ -1854,13 +2118,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
- 
-         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
- 
-+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
-+                &zc->blockState.matchState,
-+                ws,
-+                &params->cParams,
-+                params->useRowMatchFinder,
-+                crp,
-+                needsIndexReset,
-+                ZSTD_resetTarget_CCtx), "");
-+
-+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
-+
-+        /* ldm hash table */
-+        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-+            /* TODO: avoid memset? */
-+            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
-+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
-+            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
-+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
-+            zc->maxNbLdmSequences = maxNbLdmSeq;
-+
-+            ZSTD_window_init(&zc->ldmState.window);
-+            zc->ldmState.loadedDictEnd = 0;
-+        }
-+
-+        /* reserve space for block-level external sequences */
-+        if (ZSTD_hasExtSeqProd(params)) {
-+            size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
-+            zc->extSeqBufCapacity = maxNbExternalSeq;
-+            zc->extSeqBuf =
-+                (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
-+        }
-+
-+        /* buffers */
-+
-         /* ZSTD_wildcopy() is used to copy into the literals buffer,
-          * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
-          */
-         zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
-         zc->seqStore.maxNbLit = blockSize;
- 
--        /* buffers */
-         zc->bufferedPolicy = zbuff;
-         zc->inBuffSize = buffInSize;
-         zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
-@@ -1883,32 +2180,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-         zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
-         zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
-         zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
--        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
--
--        FORWARD_IF_ERROR(ZSTD_reset_matchState(
--            &zc->blockState.matchState,
--            ws,
--            &params->cParams,
--            params->useRowMatchFinder,
--            crp,
--            needsIndexReset,
--            ZSTD_resetTarget_CCtx), "");
--
--        /* ldm hash table */
--        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
--            /* TODO: avoid memset? */
--            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
--            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
--            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
--            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
--            zc->maxNbLdmSequences = maxNbLdmSeq;
--
--            ZSTD_window_init(&zc->ldmState.window);
--            zc->ldmState.loadedDictEnd = 0;
--        }
- 
-         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
--        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
-+        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
- 
-         zc->initialized = 1;
- 
-@@ -1980,7 +2254,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
-         }
- 
-         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
--                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
-+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict,
-+                                                     params.useRowMatchFinder);
-         params.cParams.windowLog = windowLog;
-         params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
-         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
-@@ -2019,6 +2294,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
-     return 0;
- }
- 
-+static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
-+                                        ZSTD_compressionParameters const* cParams) {
-+    if (ZSTD_CDictIndicesAreTagged(cParams)){
-+        /* Remove tags from the CDict table if they are present.
-+         * See docs on "short cache" in zstd_compress_internal.h for context. */
-+        size_t i;
-+        for (i = 0; i < tableSize; i++) {
-+            U32 const taggedIndex = src[i];
-+            U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
-+            dst[i] = index;
-+        }
-+    } else {
-+        ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
-+    }
-+}
-+
- static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
-                             const ZSTD_CDict* cdict,
-                             ZSTD_CCtx_params params,
-@@ -2054,21 +2345,23 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
-                                                             : 0;
-         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
- 
--        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
--               cdict->matchState.hashTable,
--               hSize * sizeof(U32));
-+        ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
-+                                cdict->matchState.hashTable,
-+                                hSize, cdict_cParams);
-+
-         /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
-         if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
--            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
--               cdict->matchState.chainTable,
--               chainSize * sizeof(U32));
-+            ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
-+                                    cdict->matchState.chainTable,
-+                                    chainSize, cdict_cParams);
-         }
-         /* copy tag table */
-         if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
--            size_t const tagTableSize = hSize*sizeof(U16);
-+            size_t const tagTableSize = hSize;
-             ZSTD_memcpy(cctx->blockState.matchState.tagTable,
--                cdict->matchState.tagTable,
--                tagTableSize);
-+                        cdict->matchState.tagTable,
-+                        tagTableSize);
-+            cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
-         }
-     }
- 
-@@ -2147,6 +2440,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
-         params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
-         params.ldmParams = srcCCtx->appliedParams.ldmParams;
-         params.fParams = fParams;
-+        params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
-         ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
-                                 /* loadedDictSize */ 0,
-                                 ZSTDcrp_leaveDirty, zbuff);
-@@ -2294,7 +2588,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
- 
- /* See doc/zstd_compression_format.md for detailed format description */
- 
--void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
-+int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
- {
-     const seqDef* const sequences = seqStorePtr->sequencesStart;
-     BYTE* const llCodeTable = seqStorePtr->llCode;
-@@ -2302,18 +2596,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
-     BYTE* const mlCodeTable = seqStorePtr->mlCode;
-     U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-     U32 u;
-+    int longOffsets = 0;
-     assert(nbSeq <= seqStorePtr->maxNbSeq);
-     for (u=0; u<nbSeq; u++) {
-         U32 const llv = sequences[u].litLength;
-+        U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);
-         U32 const mlv = sequences[u].mlBase;
-         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
--        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
-+        ofCodeTable[u] = (BYTE)ofCode;
-         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
-+        assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));
-+        if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)
-+            longOffsets = 1;
-     }
-     if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
-         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-     if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
-         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
-+    return longOffsets;
- }
- 
- /* ZSTD_useTargetCBlockSize():
-@@ -2347,6 +2647,7 @@ typedef struct {
-     U32 MLtype;
-     size_t size;
-     size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
-+    int longOffsets;
- } ZSTD_symbolEncodingTypeStats_t;
- 
- /* ZSTD_buildSequencesStatistics():
-@@ -2357,11 +2658,13 @@ typedef struct {
-  * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
-  */
- static ZSTD_symbolEncodingTypeStats_t
--ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
--                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
--                              BYTE* dst, const BYTE* const dstEnd,
--                              ZSTD_strategy strategy, unsigned* countWorkspace,
--                              void* entropyWorkspace, size_t entropyWkspSize) {
-+ZSTD_buildSequencesStatistics(
-+                const seqStore_t* seqStorePtr, size_t nbSeq,
-+                const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
-+                      BYTE* dst, const BYTE* const dstEnd,
-+                      ZSTD_strategy strategy, unsigned* countWorkspace,
-+                      void* entropyWorkspace, size_t entropyWkspSize)
-+{
-     BYTE* const ostart = dst;
-     const BYTE* const oend = dstEnd;
-     BYTE* op = ostart;
-@@ -2375,7 +2678,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
- 
-     stats.lastCountSize = 0;
-     /* convert length/distances into codes */
--    ZSTD_seqToCodes(seqStorePtr);
-+    stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
-     assert(op <= oend);
-     assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
-     /* build CTable for Literal Lengths */
-@@ -2480,22 +2783,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
-  */
- #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
- MEM_STATIC size_t
--ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
--                          const ZSTD_entropyCTables_t* prevEntropy,
--                                ZSTD_entropyCTables_t* nextEntropy,
--                          const ZSTD_CCtx_params* cctxParams,
--                                void* dst, size_t dstCapacity,
--                                void* entropyWorkspace, size_t entropyWkspSize,
--                          const int bmi2)
-+ZSTD_entropyCompressSeqStore_internal(
-+                        const seqStore_t* seqStorePtr,
-+                        const ZSTD_entropyCTables_t* prevEntropy,
-+                              ZSTD_entropyCTables_t* nextEntropy,
-+                        const ZSTD_CCtx_params* cctxParams,
-+                              void* dst, size_t dstCapacity,
-+                              void* entropyWorkspace, size_t entropyWkspSize,
-+                        const int bmi2)
- {
--    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-     unsigned* count = (unsigned*)entropyWorkspace;
-     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
-     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
-     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
-     const seqDef* const sequences = seqStorePtr->sequencesStart;
--    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-+    const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-     const BYTE* const llCodeTable = seqStorePtr->llCode;
-     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-@@ -2503,29 +2806,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
-     BYTE* const oend = ostart + dstCapacity;
-     BYTE* op = ostart;
-     size_t lastCountSize;
-+    int longOffsets = 0;
- 
-     entropyWorkspace = count + (MaxSeq + 1);
-     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
- 
--    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
-+    DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
-     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
-     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
- 
-     /* Compress literals */
-     {   const BYTE* const literals = seqStorePtr->litStart;
--        size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
--        size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
-+        size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-+        size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
-         /* Base suspicion of uncompressibility on ratio of literals to sequences */
-         unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
-         size_t const litSize = (size_t)(seqStorePtr->lit - literals);
-+
-         size_t const cSize = ZSTD_compressLiterals(
--                                    &prevEntropy->huf, &nextEntropy->huf,
--                                    cctxParams->cParams.strategy,
--                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
-                                     op, dstCapacity,
-                                     literals, litSize,
-                                     entropyWorkspace, entropyWkspSize,
--                                    bmi2, suspectUncompressible);
-+                                    &prevEntropy->huf, &nextEntropy->huf,
-+                                    cctxParams->cParams.strategy,
-+                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
-+                                    suspectUncompressible, bmi2);
-         FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
-         assert(cSize <= dstCapacity);
-         op += cSize;
-@@ -2551,11 +2856,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
-         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
-         return (size_t)(op - ostart);
-     }
--    {
--        ZSTD_symbolEncodingTypeStats_t stats;
--        BYTE* seqHead = op++;
-+    {   BYTE* const seqHead = op++;
-         /* build stats for sequences */
--        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
-+        const ZSTD_symbolEncodingTypeStats_t stats =
-+                ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                               op, oend,
-                                               strategy, count,
-@@ -2564,6 +2868,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
-         *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
-         lastCountSize = stats.lastCountSize;
-         op += stats.size;
-+        longOffsets = stats.longOffsets;
-     }
- 
-     {   size_t const bitstreamSize = ZSTD_encodeSequences(
-@@ -2598,14 +2903,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
- }
- 
- MEM_STATIC size_t
--ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
--                       const ZSTD_entropyCTables_t* prevEntropy,
--                             ZSTD_entropyCTables_t* nextEntropy,
--                       const ZSTD_CCtx_params* cctxParams,
--                             void* dst, size_t dstCapacity,
--                             size_t srcSize,
--                             void* entropyWorkspace, size_t entropyWkspSize,
--                             int bmi2)
-+ZSTD_entropyCompressSeqStore(
-+                    const seqStore_t* seqStorePtr,
-+                    const ZSTD_entropyCTables_t* prevEntropy,
-+                          ZSTD_entropyCTables_t* nextEntropy,
-+                    const ZSTD_CCtx_params* cctxParams,
-+                          void* dst, size_t dstCapacity,
-+                          size_t srcSize,
-+                          void* entropyWorkspace, size_t entropyWkspSize,
-+                          int bmi2)
- {
-     size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
-                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
-@@ -2615,15 +2921,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
-     /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
-      * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
-      */
--    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
-+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {
-+        DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
-         return 0;  /* block not compressed */
-+    }
-     FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
- 
-     /* Check compressibility */
-     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
-         if (cSize >= maxCSize) return 0;  /* block not compressed */
-     }
--    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
-+    DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
-+    /* libzstd decoder before  > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
-+     * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
-+     */
-+    assert(cSize < ZSTD_BLOCKSIZE_MAX);
-     return cSize;
- }
- 
-@@ -2635,40 +2947,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
-     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
-         { ZSTD_compressBlock_fast  /* default for 0 */,
-           ZSTD_compressBlock_fast,
--          ZSTD_compressBlock_doubleFast,
--          ZSTD_compressBlock_greedy,
--          ZSTD_compressBlock_lazy,
--          ZSTD_compressBlock_lazy2,
--          ZSTD_compressBlock_btlazy2,
--          ZSTD_compressBlock_btopt,
--          ZSTD_compressBlock_btultra,
--          ZSTD_compressBlock_btultra2 },
-+          ZSTD_COMPRESSBLOCK_DOUBLEFAST,
-+          ZSTD_COMPRESSBLOCK_GREEDY,
-+          ZSTD_COMPRESSBLOCK_LAZY,
-+          ZSTD_COMPRESSBLOCK_LAZY2,
-+          ZSTD_COMPRESSBLOCK_BTLAZY2,
-+          ZSTD_COMPRESSBLOCK_BTOPT,
-+          ZSTD_COMPRESSBLOCK_BTULTRA,
-+          ZSTD_COMPRESSBLOCK_BTULTRA2
-+        },
-         { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
-           ZSTD_compressBlock_fast_extDict,
--          ZSTD_compressBlock_doubleFast_extDict,
--          ZSTD_compressBlock_greedy_extDict,
--          ZSTD_compressBlock_lazy_extDict,
--          ZSTD_compressBlock_lazy2_extDict,
--          ZSTD_compressBlock_btlazy2_extDict,
--          ZSTD_compressBlock_btopt_extDict,
--          ZSTD_compressBlock_btultra_extDict,
--          ZSTD_compressBlock_btultra_extDict },
-+          ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
-+          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
-+        },
-         { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
-           ZSTD_compressBlock_fast_dictMatchState,
--          ZSTD_compressBlock_doubleFast_dictMatchState,
--          ZSTD_compressBlock_greedy_dictMatchState,
--          ZSTD_compressBlock_lazy_dictMatchState,
--          ZSTD_compressBlock_lazy2_dictMatchState,
--          ZSTD_compressBlock_btlazy2_dictMatchState,
--          ZSTD_compressBlock_btopt_dictMatchState,
--          ZSTD_compressBlock_btultra_dictMatchState,
--          ZSTD_compressBlock_btultra_dictMatchState },
-+          ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
-+          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
-+        },
-         { NULL  /* default for 0 */,
-           NULL,
-           NULL,
--          ZSTD_compressBlock_greedy_dedicatedDictSearch,
--          ZSTD_compressBlock_lazy_dedicatedDictSearch,
--          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
-+          ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
-+          ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
-+          ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
-           NULL,
-           NULL,
-           NULL,
-@@ -2681,18 +2996,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
-     DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
-     if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
-         static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
--            { ZSTD_compressBlock_greedy_row,
--            ZSTD_compressBlock_lazy_row,
--            ZSTD_compressBlock_lazy2_row },
--            { ZSTD_compressBlock_greedy_extDict_row,
--            ZSTD_compressBlock_lazy_extDict_row,
--            ZSTD_compressBlock_lazy2_extDict_row },
--            { ZSTD_compressBlock_greedy_dictMatchState_row,
--            ZSTD_compressBlock_lazy_dictMatchState_row,
--            ZSTD_compressBlock_lazy2_dictMatchState_row },
--            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
--            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
--            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
-+            {
-+                ZSTD_COMPRESSBLOCK_GREEDY_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY2_ROW
-+            },
-+            {
-+                ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
-+            },
-+            {
-+                ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
-+            },
-+            {
-+                ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
-+                ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
-+            }
-         };
-         DEBUGLOG(4, "Selecting a row-based matchfinder");
-         assert(useRowMatchFinder != ZSTD_ps_auto);
-@@ -2718,6 +3041,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
-     ssPtr->longLengthType = ZSTD_llt_none;
- }
- 
-+/* ZSTD_postProcessSequenceProducerResult() :
-+ * Validates and post-processes sequences obtained through the external matchfinder API:
-+ *   - Checks whether nbExternalSeqs represents an error condition.
-+ *   - Appends a block delimiter to outSeqs if one is not already present.
-+ *     See zstd.h for context regarding block delimiters.
-+ * Returns the number of sequences after post-processing, or an error code. */
-+static size_t ZSTD_postProcessSequenceProducerResult(
-+    ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
-+) {
-+    RETURN_ERROR_IF(
-+        nbExternalSeqs > outSeqsCapacity,
-+        sequenceProducer_failed,
-+        "External sequence producer returned error code %lu",
-+        (unsigned long)nbExternalSeqs
-+    );
-+
-+    RETURN_ERROR_IF(
-+        nbExternalSeqs == 0 && srcSize > 0,
-+        sequenceProducer_failed,
-+        "Got zero sequences from external sequence producer for a non-empty src buffer!"
-+    );
-+
-+    if (srcSize == 0) {
-+        ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
-+        return 1;
-+    }
-+
-+    {
-+        ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
-+
-+        /* We can return early if lastSeq is already a block delimiter. */
-+        if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
-+            return nbExternalSeqs;
-+        }
-+
-+        /* This error condition is only possible if the external matchfinder
-+         * produced an invalid parse, by definition of ZSTD_sequenceBound(). */
-+        RETURN_ERROR_IF(
-+            nbExternalSeqs == outSeqsCapacity,
-+            sequenceProducer_failed,
-+            "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
-+        );
-+
-+        /* lastSeq is not a block delimiter, so we need to append one. */
-+        ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
-+        return nbExternalSeqs + 1;
-+    }
-+}
-+
-+/* ZSTD_fastSequenceLengthSum() :
-+ * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
-+ * Similar to another function in zstd_compress.c (determine_blockSize),
-+ * except it doesn't check for a block delimiter to end summation.
-+ * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
-+ * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
-+static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
-+    size_t matchLenSum, litLenSum, i;
-+    matchLenSum = 0;
-+    litLenSum = 0;
-+    for (i = 0; i < seqBufSize; i++) {
-+        litLenSum += seqBuf[i].litLength;
-+        matchLenSum += seqBuf[i].matchLength;
-+    }
-+    return litLenSum + matchLenSum;
-+}
-+
- typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
- 
- static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-@@ -2727,7 +3116,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-     /* Assert that we have correctly flushed the ctx params into the ms's copy */
-     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
--    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
-+    /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
-+     * additional 1. We need to revisit and change this logic to be more consistent */
-+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
-         if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
-             ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
-         } else {
-@@ -2763,6 +3154,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-         }
-         if (zc->externSeqStore.pos < zc->externSeqStore.size) {
-             assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
-+
-+            /* External matchfinder + LDM is technically possible, just not implemented yet.
-+             * We need to revisit soon and implement it. */
-+            RETURN_ERROR_IF(
-+                ZSTD_hasExtSeqProd(&zc->appliedParams),
-+                parameter_combination_unsupported,
-+                "Long-distance matching with external sequence producer enabled is not currently supported."
-+            );
-+
-             /* Updates ldmSeqStore.pos */
-             lastLLSize =
-                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
-@@ -2774,6 +3174,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-         } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
-             rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
- 
-+            /* External matchfinder + LDM is technically possible, just not implemented yet.
-+             * We need to revisit soon and implement it. */
-+            RETURN_ERROR_IF(
-+                ZSTD_hasExtSeqProd(&zc->appliedParams),
-+                parameter_combination_unsupported,
-+                "Long-distance matching with external sequence producer enabled is not currently supported."
-+            );
-+
-             ldmSeqStore.seq = zc->ldmSequences;
-             ldmSeqStore.capacity = zc->maxNbLdmSequences;
-             /* Updates ldmSeqStore.size */
-@@ -2788,10 +3196,74 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-                                        zc->appliedParams.useRowMatchFinder,
-                                        src, srcSize);
-             assert(ldmSeqStore.pos == ldmSeqStore.size);
--        } else {   /* not long range mode */
--            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
--                                                                                    zc->appliedParams.useRowMatchFinder,
--                                                                                    dictMode);
-+        } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
-+            assert(
-+                zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
-+            );
-+            assert(zc->appliedParams.extSeqProdFunc != NULL);
-+
-+            {   U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
-+
-+                size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
-+                    zc->appliedParams.extSeqProdState,
-+                    zc->extSeqBuf,
-+                    zc->extSeqBufCapacity,
-+                    src, srcSize,
-+                    NULL, 0,  /* dict and dictSize, currently not supported */
-+                    zc->appliedParams.compressionLevel,
-+                    windowSize
-+                );
-+
-+                size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
-+                    zc->extSeqBuf,
-+                    nbExternalSeqs,
-+                    zc->extSeqBufCapacity,
-+                    srcSize
-+                );
-+
-+                /* Return early if there is no error, since we don't need to worry about last literals */
-+                if (!ZSTD_isError(nbPostProcessedSeqs)) {
-+                    ZSTD_sequencePosition seqPos = {0,0,0};
-+                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
-+                    RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
-+                    FORWARD_IF_ERROR(
-+                        ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
-+                            zc, &seqPos,
-+                            zc->extSeqBuf, nbPostProcessedSeqs,
-+                            src, srcSize,
-+                            zc->appliedParams.searchForExternalRepcodes
-+                        ),
-+                        "Failed to copy external sequences to seqStore!"
-+                    );
-+                    ms->ldmSeqStore = NULL;
-+                    DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
-+                    return ZSTDbss_compress;
-+                }
-+
-+                /* Propagate the error if fallback is disabled */
-+                if (!zc->appliedParams.enableMatchFinderFallback) {
-+                    return nbPostProcessedSeqs;
-+                }
-+
-+                /* Fallback to software matchfinder */
-+                {   ZSTD_blockCompressor const blockCompressor =
-+                        ZSTD_selectBlockCompressor(
-+                            zc->appliedParams.cParams.strategy,
-+                            zc->appliedParams.useRowMatchFinder,
-+                            dictMode);
-+                    ms->ldmSeqStore = NULL;
-+                    DEBUGLOG(
-+                        5,
-+                        "External sequence producer returned error code %lu. Falling back to internal parser.",
-+                        (unsigned long)nbExternalSeqs
-+                    );
-+                    lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
-+            }   }
-+        } else {   /* not long range mode and no external matchfinder */
-+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
-+                    zc->appliedParams.cParams.strategy,
-+                    zc->appliedParams.useRowMatchFinder,
-+                    dictMode);
-             ms->ldmSeqStore = NULL;
-             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
-         }
-@@ -2801,29 +3273,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-     return ZSTDbss_compress;
- }
- 
--static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
-+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
- {
--    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
--    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
--    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
--    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
--    size_t literalsRead = 0;
--    size_t lastLLSize;
-+    const seqDef* inSeqs = seqStore->sequencesStart;
-+    const size_t nbInSequences = seqStore->sequences - inSeqs;
-+    const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
- 
--    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
-+    ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
-+    const size_t nbOutSequences = nbInSequences + 1;
-+    size_t nbOutLiterals = 0;
-+    repcodes_t repcodes;
-     size_t i;
--    repcodes_t updatedRepcodes;
- 
--    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
--    /* Ensure we have enough space for last literals "sequence" */
--    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
--    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
--    for (i = 0; i < seqStoreSeqSize; ++i) {
--        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
--        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
--        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
-+    /* Bounds check that we have enough space for every input sequence
-+     * and the block delimiter
-+     */
-+    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
-+    RETURN_ERROR_IF(
-+        nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
-+        dstSize_tooSmall,
-+        "Not enough space to copy sequences");
-+
-+    ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
-+    for (i = 0; i < nbInSequences; ++i) {
-+        U32 rawOffset;
-+        outSeqs[i].litLength = inSeqs[i].litLength;
-+        outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
-         outSeqs[i].rep = 0;
- 
-+        /* Handle the possible single length >= 64K
-+         * There can only be one because we add MINMATCH to every match length,
-+         * and blocks are at most 128K.
-+         */
-         if (i == seqStore->longLengthPos) {
-             if (seqStore->longLengthType == ZSTD_llt_literalLength) {
-                 outSeqs[i].litLength += 0x10000;
-@@ -2832,37 +3313,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
-             }
-         }
- 
--        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
--            /* Derive the correct offset corresponding to a repcode */
--            outSeqs[i].rep = seqStoreSeqs[i].offBase;
-+        /* Determine the raw offset given the offBase, which may be a repcode. */
-+        if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
-+            const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
-+            assert(repcode > 0);
-+            outSeqs[i].rep = repcode;
-             if (outSeqs[i].litLength != 0) {
--                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
-+                rawOffset = repcodes.rep[repcode - 1];
-             } else {
--                if (outSeqs[i].rep == 3) {
--                    rawOffset = updatedRepcodes.rep[0] - 1;
-+                if (repcode == 3) {
-+                    assert(repcodes.rep[0] > 1);
-+                    rawOffset = repcodes.rep[0] - 1;
-                 } else {
--                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
-+                    rawOffset = repcodes.rep[repcode];
-                 }
-             }
-+        } else {
-+            rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
-         }
-         outSeqs[i].offset = rawOffset;
--        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
--           so we provide seqStoreSeqs[i].offset - 1 */
--        ZSTD_updateRep(updatedRepcodes.rep,
--                       seqStoreSeqs[i].offBase - 1,
--                       seqStoreSeqs[i].litLength == 0);
--        literalsRead += outSeqs[i].litLength;
-+
-+        /* Update repcode history for the sequence */
-+        ZSTD_updateRep(repcodes.rep,
-+                       inSeqs[i].offBase,
-+                       inSeqs[i].litLength == 0);
-+
-+        nbOutLiterals += outSeqs[i].litLength;
-     }
-     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
-      * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
-      * for the block boundary, according to the API.
-      */
--    assert(seqStoreLiteralsSize >= literalsRead);
--    lastLLSize = seqStoreLiteralsSize - literalsRead;
--    outSeqs[i].litLength = (U32)lastLLSize;
--    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
--    seqStoreSeqSize++;
--    zc->seqCollector.seqIndex += seqStoreSeqSize;
-+    assert(nbInLiterals >= nbOutLiterals);
-+    {
-+        const size_t lastLLSize = nbInLiterals - nbOutLiterals;
-+        outSeqs[nbInSequences].litLength = (U32)lastLLSize;
-+        outSeqs[nbInSequences].matchLength = 0;
-+        outSeqs[nbInSequences].offset = 0;
-+        assert(nbOutSequences == nbInSequences + 1);
-+    }
-+    seqCollector->seqIndex += nbOutSequences;
-+    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
-+
-+    return 0;
-+}
-+
-+size_t ZSTD_sequenceBound(size_t srcSize) {
-+    const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
-+    const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
-+    return maxNbSeq + maxNbDelims;
- }
- 
- size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-@@ -2871,6 +3370,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-     const size_t dstCapacity = ZSTD_compressBound(srcSize);
-     void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
-     SeqCollector seqCollector;
-+    {
-+        int targetCBlockSize;
-+        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
-+        RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
-+    }
-+    {
-+        int nbWorkers;
-+        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
-+        RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
-+    }
- 
-     RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
- 
-@@ -2880,8 +3389,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-     seqCollector.maxSequences = outSeqsSize;
-     zc->seqCollector = seqCollector;
- 
--    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
--    ZSTD_customFree(dst, ZSTD_defaultCMem);
-+    {
-+        const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
-+        ZSTD_customFree(dst, ZSTD_defaultCMem);
-+        FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
-+    }
-+    assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
-     return zc->seqCollector.seqIndex;
- }
- 
-@@ -2910,19 +3423,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) {
-     const size_t unrollMask = unrollSize - 1;
-     const size_t prefixLength = length & unrollMask;
-     size_t i;
--    size_t u;
-     if (length == 1) return 1;
-     /* Check if prefix is RLE first before using unrolled loop */
-     if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
-         return 0;
-     }
-     for (i = prefixLength; i != length; i += unrollSize) {
-+        size_t u;
-         for (u = 0; u < unrollSize; u += sizeof(size_t)) {
-             if (MEM_readST(ip + i + u) != valueST) {
-                 return 0;
--            }
--        }
--    }
-+    }   }   }
-     return 1;
- }
- 
-@@ -2938,7 +3449,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
-     return nbSeqs < 4 && nbLits < 10;
- }
- 
--static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
-+static void
-+ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
- {
-     ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
-     bs->prevCBlock = bs->nextCBlock;
-@@ -2946,7 +3458,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c
- }
- 
- /* Writes the block header */
--static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
-+static void
-+writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
-+{
-     U32 const cBlockHeader = cSize == 1 ?
-                         lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
-                         lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
-@@ -2959,13 +3473,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
-  *  Stores literals block type (raw, rle, compressed, repeat) and
-  *  huffman description table to hufMetadata.
-  *  Requires ENTROPY_WORKSPACE_SIZE workspace
-- *  @return : size of huffman description table or error code */
--static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
--                                            const ZSTD_hufCTables_t* prevHuf,
--                                                  ZSTD_hufCTables_t* nextHuf,
--                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
--                                                  const int literalsCompressionIsDisabled,
--                                                  void* workspace, size_t wkspSize)
-+ * @return : size of huffman description table, or an error code
-+ */
-+static size_t
-+ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
-+                               const ZSTD_hufCTables_t* prevHuf,
-+                                     ZSTD_hufCTables_t* nextHuf,
-+                                     ZSTD_hufCTablesMetadata_t* hufMetadata,
-+                               const int literalsCompressionIsDisabled,
-+                                     void* workspace, size_t wkspSize,
-+                                     int hufFlags)
- {
-     BYTE* const wkspStart = (BYTE*)workspace;
-     BYTE* const wkspEnd = wkspStart + wkspSize;
-@@ -2973,9 +3490,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
-     unsigned* const countWksp = (unsigned*)workspace;
-     const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-     BYTE* const nodeWksp = countWkspStart + countWkspSize;
--    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-+    const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);
-     unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
--    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-+    unsigned huffLog = LitHufLog;
-     HUF_repeat repeat = prevHuf->repeatMode;
-     DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
- 
-@@ -2990,73 +3507,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
- 
-     /* small ? don't even attempt compression (speed opt) */
- #ifndef COMPRESS_LITERALS_SIZE_MIN
--#define COMPRESS_LITERALS_SIZE_MIN 63
-+# define COMPRESS_LITERALS_SIZE_MIN 63  /* heuristic */
- #endif
-     {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-         if (srcSize <= minLitSize) {
-             DEBUGLOG(5, "set_basic - too small");
-             hufMetadata->hType = set_basic;
-             return 0;
--        }
--    }
-+    }   }
- 
-     /* Scan input and build symbol stats */
--    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-+    {   size_t const largest =
-+            HIST_count_wksp (countWksp, &maxSymbolValue,
-+                            (const BYTE*)src, srcSize,
-+                            workspace, wkspSize);
-         FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-         if (largest == srcSize) {
-+            /* only one literal symbol */
-             DEBUGLOG(5, "set_rle");
-             hufMetadata->hType = set_rle;
-             return 0;
-         }
-         if (largest <= (srcSize >> 7)+4) {
-+            /* heuristic: likely not compressible */
-             DEBUGLOG(5, "set_basic - no gain");
-             hufMetadata->hType = set_basic;
-             return 0;
--        }
--    }
-+    }   }
- 
-     /* Validate the previous Huffman table */
--    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-+    if (repeat == HUF_repeat_check
-+      && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-         repeat = HUF_repeat_none;
-     }
- 
-     /* Build Huffman Tree */
-     ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
--    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);
-+    assert(huffLog <= LitHufLog);
-     {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                     maxSymbolValue, huffLog,
-                                                     nodeWksp, nodeWkspSize);
-         FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-         huffLog = (U32)maxBits;
--        {   /* Build and write the CTable */
--            size_t const newCSize = HUF_estimateCompressedSize(
--                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
--            size_t const hSize = HUF_writeCTable_wksp(
--                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
--                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
--                    nodeWksp, nodeWkspSize);
--            /* Check against repeating the previous CTable */
--            if (repeat != HUF_repeat_none) {
--                size_t const oldCSize = HUF_estimateCompressedSize(
--                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
--                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
--                    DEBUGLOG(5, "set_repeat - smaller");
--                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
--                    hufMetadata->hType = set_repeat;
--                    return 0;
--                }
--            }
--            if (newCSize + hSize >= srcSize) {
--                DEBUGLOG(5, "set_basic - no gains");
-+    }
-+    {   /* Build and write the CTable */
-+        size_t const newCSize = HUF_estimateCompressedSize(
-+                (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-+        size_t const hSize = HUF_writeCTable_wksp(
-+                hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-+                (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
-+                nodeWksp, nodeWkspSize);
-+        /* Check against repeating the previous CTable */
-+        if (repeat != HUF_repeat_none) {
-+            size_t const oldCSize = HUF_estimateCompressedSize(
-+                    (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-+            if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-+                DEBUGLOG(5, "set_repeat - smaller");
-                 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
--                hufMetadata->hType = set_basic;
-+                hufMetadata->hType = set_repeat;
-                 return 0;
--            }
--            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
--            hufMetadata->hType = set_compressed;
--            nextHuf->repeatMode = HUF_repeat_check;
--            return hSize;
-+        }   }
-+        if (newCSize + hSize >= srcSize) {
-+            DEBUGLOG(5, "set_basic - no gains");
-+            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-+            hufMetadata->hType = set_basic;
-+            return 0;
-         }
-+        DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-+        hufMetadata->hType = set_compressed;
-+        nextHuf->repeatMode = HUF_repeat_check;
-+        return hSize;
-     }
- }
- 
-@@ -3066,8 +3587,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
-  * and updates nextEntropy to the appropriate repeatMode.
-  */
- static ZSTD_symbolEncodingTypeStats_t
--ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
--    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
-+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
-+{
-+    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
-     nextEntropy->litlength_repeatMode = FSE_repeat_none;
-     nextEntropy->offcode_repeatMode = FSE_repeat_none;
-     nextEntropy->matchlength_repeatMode = FSE_repeat_none;
-@@ -3078,16 +3600,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
-  *  Builds entropy for the sequences.
-  *  Stores symbol compression modes and fse table to fseMetadata.
-  *  Requires ENTROPY_WORKSPACE_SIZE wksp.
-- *  @return : size of fse tables or error code */
--static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
--                                              const ZSTD_fseCTables_t* prevEntropy,
--                                                    ZSTD_fseCTables_t* nextEntropy,
--                                              const ZSTD_CCtx_params* cctxParams,
--                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
--                                                    void* workspace, size_t wkspSize)
-+ * @return : size of fse tables or error code */
-+static size_t
-+ZSTD_buildBlockEntropyStats_sequences(
-+                const seqStore_t* seqStorePtr,
-+                const ZSTD_fseCTables_t* prevEntropy,
-+                      ZSTD_fseCTables_t* nextEntropy,
-+                const ZSTD_CCtx_params* cctxParams,
-+                      ZSTD_fseCTablesMetadata_t* fseMetadata,
-+                      void* workspace, size_t wkspSize)
- {
-     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
--    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-     BYTE* const ostart = fseMetadata->fseTablesBuffer;
-     BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-     BYTE* op = ostart;
-@@ -3114,23 +3638,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
- /* ZSTD_buildBlockEntropyStats() :
-  *  Builds entropy for the block.
-  *  Requires workspace size ENTROPY_WORKSPACE_SIZE
-- *
-- *  @return : 0 on success or error code
-+ * @return : 0 on success, or an error code
-+ *  Note : also employed in superblock
-  */
--size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
--                             const ZSTD_entropyCTables_t* prevEntropy,
--                                   ZSTD_entropyCTables_t* nextEntropy,
--                             const ZSTD_CCtx_params* cctxParams,
--                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
--                                   void* workspace, size_t wkspSize)
--{
--    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-+size_t ZSTD_buildBlockEntropyStats(
-+            const seqStore_t* seqStorePtr,
-+            const ZSTD_entropyCTables_t* prevEntropy,
-+                  ZSTD_entropyCTables_t* nextEntropy,
-+            const ZSTD_CCtx_params* cctxParams,
-+                  ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-+                  void* workspace, size_t wkspSize)
-+{
-+    size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
-+    int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
-+    int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;
-+
-     entropyMetadata->hufMetadata.hufDesSize =
-         ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
-                                             &prevEntropy->huf, &nextEntropy->huf,
-                                             &entropyMetadata->hufMetadata,
-                                             ZSTD_literalsCompressionIsDisabled(cctxParams),
--                                            workspace, wkspSize);
-+                                            workspace, wkspSize, hufFlags);
-+
-     FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
-     entropyMetadata->fseMetadata.fseTablesSize =
-         ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
-@@ -3143,11 +3672,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
- }
- 
- /* Returns the size estimate for the literals section (header + content) of a block */
--static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
--                                                const ZSTD_hufCTables_t* huf,
--                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
--                                                void* workspace, size_t wkspSize,
--                                                int writeEntropy)
-+static size_t
-+ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
-+                               const ZSTD_hufCTables_t* huf,
-+                               const ZSTD_hufCTablesMetadata_t* hufMetadata,
-+                               void* workspace, size_t wkspSize,
-+                               int writeEntropy)
- {
-     unsigned* const countWksp = (unsigned*)workspace;
-     unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-@@ -3169,12 +3699,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
- }
- 
- /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
--static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
--                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
--                        const FSE_CTable* fseCTable,
--                        const U8* additionalBits,
--                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
--                        void* workspace, size_t wkspSize)
-+static size_t
-+ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
-+                    const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
-+                    const FSE_CTable* fseCTable,
-+                    const U8* additionalBits,
-+                    short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
-+                    void* workspace, size_t wkspSize)
- {
-     unsigned* const countWksp = (unsigned*)workspace;
-     const BYTE* ctp = codeTable;
-@@ -3206,99 +3737,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
- }
- 
- /* Returns the size estimate for the sequences section (header + content) of a block */
--static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
--                                                  const BYTE* llCodeTable,
--                                                  const BYTE* mlCodeTable,
--                                                  size_t nbSeq,
--                                                  const ZSTD_fseCTables_t* fseTables,
--                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
--                                                  void* workspace, size_t wkspSize,
--                                                  int writeEntropy)
-+static size_t
-+ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
-+                                 const BYTE* llCodeTable,
-+                                 const BYTE* mlCodeTable,
-+                                 size_t nbSeq,
-+                                 const ZSTD_fseCTables_t* fseTables,
-+                                 const ZSTD_fseCTablesMetadata_t* fseMetadata,
-+                                 void* workspace, size_t wkspSize,
-+                                 int writeEntropy)
- {
-     size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
-     size_t cSeqSizeEstimate = 0;
-     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
--                                         fseTables->offcodeCTable, NULL,
--                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
--                                         workspace, wkspSize);
-+                                    fseTables->offcodeCTable, NULL,
-+                                    OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-+                                    workspace, wkspSize);
-     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
--                                         fseTables->litlengthCTable, LL_bits,
--                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
--                                         workspace, wkspSize);
-+                                    fseTables->litlengthCTable, LL_bits,
-+                                    LL_defaultNorm, LL_defaultNormLog, MaxLL,
-+                                    workspace, wkspSize);
-     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
--                                         fseTables->matchlengthCTable, ML_bits,
--                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
--                                         workspace, wkspSize);
-+                                    fseTables->matchlengthCTable, ML_bits,
-+                                    ML_defaultNorm, ML_defaultNormLog, MaxML,
-+                                    workspace, wkspSize);
-     if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
-     return cSeqSizeEstimate + sequencesSectionHeaderSize;
- }
- 
- /* Returns the size estimate for a given stream of literals, of, ll, ml */
--static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
--                                     const BYTE* ofCodeTable,
--                                     const BYTE* llCodeTable,
--                                     const BYTE* mlCodeTable,
--                                     size_t nbSeq,
--                                     const ZSTD_entropyCTables_t* entropy,
--                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
--                                     void* workspace, size_t wkspSize,
--                                     int writeLitEntropy, int writeSeqEntropy) {
-+static size_t
-+ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
-+                       const BYTE* ofCodeTable,
-+                       const BYTE* llCodeTable,
-+                       const BYTE* mlCodeTable,
-+                       size_t nbSeq,
-+                       const ZSTD_entropyCTables_t* entropy,
-+                       const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-+                       void* workspace, size_t wkspSize,
-+                       int writeLitEntropy, int writeSeqEntropy)
-+{
-     size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
--                                                         &entropy->huf, &entropyMetadata->hufMetadata,
--                                                         workspace, wkspSize, writeLitEntropy);
-+                                    &entropy->huf, &entropyMetadata->hufMetadata,
-+                                    workspace, wkspSize, writeLitEntropy);
-     size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
--                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
--                                                         workspace, wkspSize, writeSeqEntropy);
-+                                    nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
-+                                    workspace, wkspSize, writeSeqEntropy);
-     return seqSize + literalsSize + ZSTD_blockHeaderSize;
- }
- 
- /* Builds entropy statistics and uses them for blocksize estimation.
-  *
-- * Returns the estimated compressed size of the seqStore, or a zstd error.
-+ * @return: estimated compressed size of the seqStore, or a zstd error.
-  */
--static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
--    ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
-+static size_t
-+ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
-+{
-+    ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
-     DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
-     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
-                     &zc->blockState.prevCBlock->entropy,
-                     &zc->blockState.nextCBlock->entropy,
-                     &zc->appliedParams,
-                     entropyMetadata,
--                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
--    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
-+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
-+    return ZSTD_estimateBlockSize(
-+                    seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
-                     seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
-                     (size_t)(seqStore->sequences - seqStore->sequencesStart),
--                    &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
-+                    &zc->blockState.nextCBlock->entropy,
-+                    entropyMetadata,
-+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
-                     (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
- }
- 
- /* Returns literals bytes represented in a seqStore */
--static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
-+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
-+{
-     size_t literalsBytes = 0;
--    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
-+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
-     size_t i;
-     for (i = 0; i < nbSeqs; ++i) {
--        seqDef seq = seqStore->sequencesStart[i];
-+        seqDef const seq = seqStore->sequencesStart[i];
-         literalsBytes += seq.litLength;
-         if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
-             literalsBytes += 0x10000;
--        }
--    }
-+    }   }
-     return literalsBytes;
- }
- 
- /* Returns match bytes represented in a seqStore */
--static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
-+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
-+{
-     size_t matchBytes = 0;
--    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
-+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
-     size_t i;
-     for (i = 0; i < nbSeqs; ++i) {
-         seqDef seq = seqStore->sequencesStart[i];
-         matchBytes += seq.mlBase + MINMATCH;
-         if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
-             matchBytes += 0x10000;
--        }
--    }
-+    }   }
-     return matchBytes;
- }
- 
-@@ -3307,15 +3846,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
-  */
- static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
-                                const seqStore_t* originalSeqStore,
--                                     size_t startIdx, size_t endIdx) {
--    BYTE* const litEnd = originalSeqStore->lit;
--    size_t literalsBytes;
--    size_t literalsBytesPreceding = 0;
--
-+                                     size_t startIdx, size_t endIdx)
-+{
-     *resultSeqStore = *originalSeqStore;
-     if (startIdx > 0) {
-         resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
--        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
-+        resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
-     }
- 
-     /* Move longLengthPos into the correct position if necessary */
-@@ -3328,13 +3864,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
-     }
-     resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
-     resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
--    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
--    resultSeqStore->litStart += literalsBytesPreceding;
-     if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
-         /* This accounts for possible last literals if the derived chunk reaches the end of the block */
--        resultSeqStore->lit = litEnd;
-+        assert(resultSeqStore->lit == originalSeqStore->lit);
-     } else {
--        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
-+        size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
-+        resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;
-     }
-     resultSeqStore->llCode += startIdx;
-     resultSeqStore->mlCode += startIdx;
-@@ -3342,20 +3877,26 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
- }
- 
- /*
-- * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
-- * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
-+ * Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
-+ * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
-  */
- static U32
--ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
--{
--    U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0;  /* [ 0 - 3 ] */
--    assert(STORED_IS_REPCODE(offCode));
--    if (adjustedOffCode == ZSTD_REP_NUM) {
--        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
--        assert(rep[0] > 0);
-+ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
-+{
-+    U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;  /* [ 0 - 3 ] */
-+    assert(OFFBASE_IS_REPCODE(offBase));
-+    if (adjustedRepCode == ZSTD_REP_NUM) {
-+        assert(ll0);
-+        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1
-+         * This is only valid if it results in a valid offset value, aka > 0.
-+         * Note : it may happen that `rep[0]==1` in exceptional circumstances.
-+         * In which case this function will return 0, which is an invalid offset.
-+         * It's not an issue though, since this value will be
-+         * compared and discarded within ZSTD_seqStore_resolveOffCodes().
-+         */
-         return rep[0] - 1;
-     }
--    return rep[adjustedOffCode];
-+    return rep[adjustedRepCode];
- }
- 
- /*
-@@ -3371,30 +3912,33 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, c
-  *        1-3 : repcode 1-3
-  *        4+ : real_offset+3
-  */
--static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
--                                          seqStore_t* const seqStore, U32 const nbSeq) {
-+static void
-+ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
-+                        const seqStore_t* const seqStore, U32 const nbSeq)
-+{
-     U32 idx = 0;
-+    U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
-     for (; idx < nbSeq; ++idx) {
-         seqDef* const seq = seqStore->sequencesStart + idx;
--        U32 const ll0 = (seq->litLength == 0);
--        U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
--        assert(seq->offBase > 0);
--        if (STORED_IS_REPCODE(offCode)) {
--            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
--            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
-+        U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
-+        U32 const offBase = seq->offBase;
-+        assert(offBase > 0);
-+        if (OFFBASE_IS_REPCODE(offBase)) {
-+            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
-+            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
-             /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
-              * the repcode with the offset it actually references, determined by the compression
-              * repcode history.
-              */
-             if (dRawOffset != cRawOffset) {
--                seq->offBase = cRawOffset + ZSTD_REP_NUM;
-+                seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
-             }
-         }
-         /* Compression repcode history is always updated with values directly from the unmodified seqStore.
-          * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
-          */
--        ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
--        ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
-+        ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
-+        ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
-     }
- }
- 
-@@ -3404,10 +3948,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
-  * Returns the total size of that block (including header) or a ZSTD error code.
-  */
- static size_t
--ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
-+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
-+                            const seqStore_t* const seqStore,
-                                   repcodes_t* const dRep, repcodes_t* const cRep,
-                                   void* dst, size_t dstCapacity,
--                                  const void* src, size_t srcSize,
-+                            const void* src, size_t srcSize,
-                                   U32 lastBlock, U32 isPartition)
- {
-     const U32 rleMaxLength = 25;
-@@ -3442,8 +3987,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
-         cSeqsSize = 1;
-     }
- 
-+    /* Sequence collection not supported when block splitting */
-     if (zc->seqCollector.collectSequences) {
--        ZSTD_copyBlockSequences(zc);
-+        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
-         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-         return 0;
-     }
-@@ -3481,45 +4027,49 @@ typedef struct {
- 
- /* Helper function to perform the recursive search for block splits.
-  * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
-- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
-- * we do not recurse.
-+ * If advantageous to split, then we recurse down the two sub-blocks.
-+ * If not, or if an error occurred in estimation, then we do not recurse.
-  *
-- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
-+ * Note: The recursion depth is capped by a heuristic minimum number of sequences,
-+ * defined by MIN_SEQUENCES_BLOCK_SPLITTING.
-  * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
-  * In practice, recursion depth usually doesn't go beyond 4.
-  *
-- * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
-+ * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
-+ * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
-  * maximum of 128 KB, this value is actually impossible to reach.
-  */
- static void
- ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
-                              ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
- {
--    seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
--    seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
--    seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
-+    seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
-+    seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
-+    seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
-     size_t estimatedOriginalSize;
-     size_t estimatedFirstHalfSize;
-     size_t estimatedSecondHalfSize;
-     size_t midIdx = (startIdx + endIdx)/2;
- 
-+    DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
-+    assert(endIdx >= startIdx);
-     if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
--        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
-+        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
-         return;
-     }
--    DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
-     ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
-     ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
-     ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
-     estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
-     estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
-     estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
--    DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
-+    DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
-              estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
-     if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
-         return;
-     }
-     if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
-+        DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);
-         ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
-         splits->splitLocations[splits->idx] = (U32)midIdx;
-         splits->idx++;
-@@ -3527,14 +4077,18 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end
-     }
- }
- 
--/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
-+/* Base recursive function.
-+ * Populates a table with intra-block partition indices that can improve compression ratio.
-  *
-- * Returns the number of splits made (which equals the size of the partition table - 1).
-+ * @return: number of splits made (which equals the size of the partition table - 1).
-  */
--static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
--    seqStoreSplits splits = {partitions, 0};
-+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
-+{
-+    seqStoreSplits splits;
-+    splits.splitLocations = partitions;
-+    splits.idx = 0;
-     if (nbSeq <= 4) {
--        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
-+        DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
-         /* Refuse to try and split anything with less than 4 sequences */
-         return 0;
-     }
-@@ -3550,18 +4104,20 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
-  * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
-  */
- static size_t
--ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
--                                       const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
-+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
-+                                    void* dst, size_t dstCapacity,
-+                              const void* src, size_t blockSize,
-+                                    U32 lastBlock, U32 nbSeq)
- {
-     size_t cSize = 0;
-     const BYTE* ip = (const BYTE*)src;
-     BYTE* op = (BYTE*)dst;
-     size_t i = 0;
-     size_t srcBytesTotal = 0;
--    U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
--    seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
--    seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
--    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
-+    U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
-+    seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
-+    seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
-+    size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
- 
-     /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
-      * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
-@@ -3583,30 +4139,31 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
-     ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
-     ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
- 
--    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
-+    DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
-                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
-                 (unsigned)zc->blockState.matchState.nextToUpdate);
- 
-     if (numSplits == 0) {
--        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
--                                                                   &dRep, &cRep,
--                                                                    op, dstCapacity,
--                                                                    ip, blockSize,
--                                                                    lastBlock, 0 /* isPartition */);
-+        size_t cSizeSingleBlock =
-+            ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
-+                                            &dRep, &cRep,
-+                                            op, dstCapacity,
-+                                            ip, blockSize,
-+                                            lastBlock, 0 /* isPartition */);
-         FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
-         DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
--        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
-+        assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
-+        assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
-         return cSizeSingleBlock;
-     }
- 
-     ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
-     for (i = 0; i <= numSplits; ++i) {
--        size_t srcBytes;
-         size_t cSizeChunk;
-         U32 const lastPartition = (i == numSplits);
-         U32 lastBlockEntireSrc = 0;
- 
--        srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
-+        size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
-         srcBytesTotal += srcBytes;
-         if (lastPartition) {
-             /* This is the final partition, need to account for possible last literals */
-@@ -3621,7 +4178,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
-                                                        op, dstCapacity,
-                                                        ip, srcBytes,
-                                                        lastBlockEntireSrc, 1 /* isPartition */);
--        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
-+        DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
-+                    ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
-         FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
- 
-         ip += srcBytes;
-@@ -3629,10 +4187,10 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
-         dstCapacity -= cSizeChunk;
-         cSize += cSizeChunk;
-         *currSeqStore = *nextSeqStore;
--        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
-+        assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
-     }
--    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
--     * for the next block.
-+    /* cRep and dRep may have diverged during the compression.
-+     * If so, we use the dRep repcodes for the next block.
-      */
-     ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
-     return cSize;
-@@ -3643,8 +4201,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
-                               void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize, U32 lastBlock)
- {
--    const BYTE* ip = (const BYTE*)src;
--    BYTE* op = (BYTE*)dst;
-     U32 nbSeq;
-     size_t cSize;
-     DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
-@@ -3655,7 +4211,8 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
-         if (bss == ZSTDbss_noCompress) {
-             if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-                 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
--            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
-+            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
-+            cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
-             FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-             DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
-             return cSize;
-@@ -3673,9 +4230,9 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
-                             void* dst, size_t dstCapacity,
-                             const void* src, size_t srcSize, U32 frame)
- {
--    /* This the upper bound for the length of an rle block.
--     * This isn't the actual upper bound. Finding the real threshold
--     * needs further investigation.
-+    /* This is an estimated upper bound for the length of an rle block.
-+     * This isn't the actual upper bound.
-+     * Finding the real threshold needs further investigation.
-      */
-     const U32 rleMaxLength = 25;
-     size_t cSize;
-@@ -3687,11 +4244,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
- 
-     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
-         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
--        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
-+        if (bss == ZSTDbss_noCompress) {
-+            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
-+            cSize = 0;
-+            goto out;
-+        }
-     }
- 
-     if (zc->seqCollector.collectSequences) {
--        ZSTD_copyBlockSequences(zc);
-+        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
-         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-         return 0;
-     }
-@@ -3767,10 +4328,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
-          *   * cSize >= blockBound(srcSize): We have expanded the block too much so
-          *     emit an uncompressed block.
-          */
--        {
--            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
-+        {   size_t const cSize =
-+                ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
-             if (cSize != ERROR(dstSize_tooSmall)) {
--                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
-+                size_t const maxCSize =
-+                    srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
-                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
-                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
-                     ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-@@ -3778,7 +4340,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
-                 }
-             }
-         }
--    }
-+    } /* if (bss == ZSTDbss_compress)*/
- 
-     DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
-     /* Superblock compression failed, attempt to emit a single no compress block.
-@@ -3836,7 +4398,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
- *   All blocks will be terminated, all input will be consumed.
- *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
- *   Frame is supposed already started (header already produced)
--*   @return : compressed size, or an error code
-+*  @return : compressed size, or an error code
- */
- static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
-                                      void* dst, size_t dstCapacity,
-@@ -3860,7 +4422,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
-         ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
-         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
- 
--        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
-+        /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
-+         * additional 1. We need to revisit and change this logic to be more consistent */
-+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,
-                         dstSize_tooSmall,
-                         "not enough space to store compressed block");
-         if (remaining < blockSize) blockSize = remaining;
-@@ -3899,7 +4463,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
-                     MEM_writeLE24(op, cBlockHeader);
-                     cSize += ZSTD_blockHeaderSize;
-                 }
--            }
-+            }  /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
- 
- 
-             ip += blockSize;
-@@ -4001,19 +4565,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
-     }
- }
- 
--size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
-+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
- {
--    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
--                    "wrong cctx stage");
--    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
--                    parameter_unsupported,
--                    "incompatible with ldm");
-+    assert(cctx->stage == ZSTDcs_init);
-+    assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
-     cctx->externSeqStore.seq = seq;
-     cctx->externSeqStore.size = nbSeq;
-     cctx->externSeqStore.capacity = nbSeq;
-     cctx->externSeqStore.pos = 0;
-     cctx->externSeqStore.posInSequence = 0;
--    return 0;
- }
- 
- 
-@@ -4078,31 +4638,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
-     }
- }
- 
--size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
--                              void* dst, size_t dstCapacity,
--                        const void* src, size_t srcSize)
-+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
-+                                        void* dst, size_t dstCapacity,
-+                                  const void* src, size_t srcSize)
- {
-     DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
-     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
- }
- 
-+/* NOTE: Must just wrap ZSTD_compressContinue_public() */
-+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
-+                             void* dst, size_t dstCapacity,
-+                       const void* src, size_t srcSize)
-+{
-+    return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
-+}
- 
--size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
-+static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
- {
-     ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
-     assert(!ZSTD_checkCParams(cParams));
--    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
-+    return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
- }
- 
--size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-+/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
-+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
-+{
-+    return ZSTD_getBlockSize_deprecated(cctx);
-+}
-+
-+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
-+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
- {
-     DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
--    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
-+    { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
-       RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
- 
-     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
- }
- 
-+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
-+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-+{
-+    return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
-+}
-+
- /*! ZSTD_loadDictionaryContent() :
-  *  @return : 0, or an error code
-  */
-@@ -4111,25 +4691,36 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
-                                          ZSTD_cwksp* ws,
-                                          ZSTD_CCtx_params const* params,
-                                          const void* src, size_t srcSize,
--                                         ZSTD_dictTableLoadMethod_e dtlm)
-+                                         ZSTD_dictTableLoadMethod_e dtlm,
-+                                         ZSTD_tableFillPurpose_e tfp)
- {
-     const BYTE* ip = (const BYTE*) src;
-     const BYTE* const iend = ip + srcSize;
-     int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
- 
--    /* Assert that we the ms params match the params we're being given */
-+    /* Assert that the ms params match the params we're being given */
-     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
- 
--    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
-+    {   /* Ensure large dictionaries can't cause index overflow */
-+
-         /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
-          * Dictionaries right at the edge will immediately trigger overflow
-          * correction, but I don't want to insert extra constraints here.
-          */
--        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
--        /* We must have cleared our windows when our source is this large. */
--        assert(ZSTD_window_isEmpty(ms->window));
--        if (loadLdmDict)
--            assert(ZSTD_window_isEmpty(ls->window));
-+        U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
-+
-+        int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(&params->cParams);
-+        if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
-+            /* Some dictionary matchfinders in zstd use "short cache",
-+             * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
-+             * CDict hashtable entry as a tag rather than as part of an index.
-+             * When short cache is used, we need to truncate the dictionary
-+             * so that its indices don't overlap with the tag. */
-+            U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
-+            maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
-+            assert(!loadLdmDict);
-+        }
-+
-         /* If the dictionary is too large, only load the suffix of the dictionary. */
-         if (srcSize > maxDictSize) {
-             ip = iend - maxDictSize;
-@@ -4138,35 +4729,58 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
-         }
-     }
- 
--    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
-+    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
-+        /* We must have cleared our windows when our source is this large. */
-+        assert(ZSTD_window_isEmpty(ms->window));
-+        if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
-+    }
-     ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
--    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
--    ms->forceNonContiguous = params->deterministicRefPrefix;
- 
--    if (loadLdmDict) {
-+    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
-+
-+    if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
-         ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
-         ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
-+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
-     }
- 
-+    /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
-+    if (params->cParams.strategy < ZSTD_btultra) {
-+        U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
-+        if (srcSize > maxDictSize) {
-+            ip = iend - maxDictSize;
-+            src = ip;
-+            srcSize = maxDictSize;
-+        }
-+    }
-+
-+    ms->nextToUpdate = (U32)(ip - ms->window.base);
-+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
-+    ms->forceNonContiguous = params->deterministicRefPrefix;
-+
-     if (srcSize <= HASH_READ_SIZE) return 0;
- 
-     ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
- 
--    if (loadLdmDict)
--        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
--
-     switch(params->cParams.strategy)
-     {
-     case ZSTD_fast:
--        ZSTD_fillHashTable(ms, iend, dtlm);
-+        ZSTD_fillHashTable(ms, iend, dtlm, tfp);
-         break;
-     case ZSTD_dfast:
--        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
-+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
-+        ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
-+#else
-+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
-+#endif
-         break;
- 
-     case ZSTD_greedy:
-     case ZSTD_lazy:
-     case ZSTD_lazy2:
-+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
-         assert(srcSize >= HASH_READ_SIZE);
-         if (ms->dedicatedDictSearch) {
-             assert(ms->chainTable != NULL);
-@@ -4174,7 +4788,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
-         } else {
-             assert(params->useRowMatchFinder != ZSTD_ps_auto);
-             if (params->useRowMatchFinder == ZSTD_ps_enable) {
--                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
-+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
-                 ZSTD_memset(ms->tagTable, 0, tagTableSize);
-                 ZSTD_row_update(ms, iend-HASH_READ_SIZE);
-                 DEBUGLOG(4, "Using row-based hash table for lazy dict");
-@@ -4183,14 +4797,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
-                 DEBUGLOG(4, "Using chain-based hash table for lazy dict");
-             }
-         }
-+#else
-+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
-+#endif
-         break;
- 
-     case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-     case ZSTD_btopt:
-     case ZSTD_btultra:
-     case ZSTD_btultra2:
-+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
-         assert(srcSize >= HASH_READ_SIZE);
-         ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
-+#else
-+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
-+#endif
-         break;
- 
-     default:
-@@ -4237,11 +4860,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
- 
-         /* We only set the loaded table as valid if it contains all non-zero
-          * weights. Otherwise, we set it to check */
--        if (!hasZeroWeights)
-+        if (!hasZeroWeights && maxSymbolValue == 255)
-             bs->entropy.huf.repeatMode = HUF_repeat_valid;
- 
-         RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
--        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
-         dictPtr += hufHeaderSize;
-     }
- 
-@@ -4327,6 +4949,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
-                                       ZSTD_CCtx_params const* params,
-                                       const void* dict, size_t dictSize,
-                                       ZSTD_dictTableLoadMethod_e dtlm,
-+                                      ZSTD_tableFillPurpose_e tfp,
-                                       void* workspace)
- {
-     const BYTE* dictPtr = (const BYTE*)dict;
-@@ -4345,7 +4968,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
-     {
-         size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-         FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
--            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
-+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
-     }
-     return dictID;
- }
-@@ -4361,6 +4984,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
-                          const void* dict, size_t dictSize,
-                                ZSTD_dictContentType_e dictContentType,
-                                ZSTD_dictTableLoadMethod_e dtlm,
-+                               ZSTD_tableFillPurpose_e tfp,
-                                void* workspace)
- {
-     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
-@@ -4373,13 +4997,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
- 
-     /* dict restricted modes */
-     if (dictContentType == ZSTD_dct_rawContent)
--        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
-+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
- 
-     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
-         if (dictContentType == ZSTD_dct_auto) {
-             DEBUGLOG(4, "raw content dictionary detected");
-             return ZSTD_loadDictionaryContent(
--                ms, ls, ws, params, dict, dictSize, dtlm);
-+                ms, ls, ws, params, dict, dictSize, dtlm, tfp);
-         }
-         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
-         assert(0);   /* impossible */
-@@ -4387,13 +5011,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
- 
-     /* dict as full zstd dictionary */
-     return ZSTD_loadZstdDictionary(
--        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
-+        bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
- }
- 
- #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
- #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
- 
- /*! ZSTD_compressBegin_internal() :
-+ * Assumption : either @dict OR @cdict (or none) is non-NULL, never both
-  * @return : 0, or an error code */
- static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
-                                     const void* dict, size_t dictSize,
-@@ -4426,11 +5051,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
-                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
-                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
-                         cdict->dictContentSize, cdict->dictContentType, dtlm,
--                        cctx->entropyWorkspace)
-+                        ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
-               : ZSTD_compress_insertDictionary(
-                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
-                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
--                        dictContentType, dtlm, cctx->entropyWorkspace);
-+                        dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
-         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
-         assert(dictID <= UINT_MAX);
-         cctx->dictID = (U32)dictID;
-@@ -4471,11 +5096,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
-                                             &cctxParams, pledgedSrcSize);
- }
- 
--size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
-+static size_t
-+ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
- {
-     ZSTD_CCtx_params cctxParams;
--    {
--        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
-+    {   ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
-         ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
-     }
-     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
-@@ -4483,9 +5108,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
-                                        &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
- }
- 
-+size_t
-+ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
-+{
-+    return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
-+}
-+
- size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
- {
--    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
-+    return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
- }
- 
- 
-@@ -4496,14 +5127,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
- {
-     BYTE* const ostart = (BYTE*)dst;
-     BYTE* op = ostart;
--    size_t fhSize = 0;
- 
-     DEBUGLOG(4, "ZSTD_writeEpilogue");
-     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
- 
-     /* special case : empty frame */
-     if (cctx->stage == ZSTDcs_init) {
--        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
-+        size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
-         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
-         dstCapacity -= fhSize;
-         op += fhSize;
-@@ -4513,8 +5143,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
-     if (cctx->stage != ZSTDcs_ending) {
-         /* write one last empty block, make it the "last" block */
-         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
--        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
--        MEM_writeLE32(op, cBlockHeader24);
-+        ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
-+        RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
-+        MEM_writeLE24(op, cBlockHeader24);
-         op += ZSTD_blockHeaderSize;
-         dstCapacity -= ZSTD_blockHeaderSize;
-     }
-@@ -4537,9 +5168,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
-     (void)extraCSize;
- }
- 
--size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
--                         void* dst, size_t dstCapacity,
--                   const void* src, size_t srcSize)
-+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
-+                               void* dst, size_t dstCapacity,
-+                         const void* src, size_t srcSize)
- {
-     size_t endResult;
-     size_t const cSize = ZSTD_compressContinue_internal(cctx,
-@@ -4563,6 +5194,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
-     return cSize + endResult;
- }
- 
-+/* NOTE: Must just wrap ZSTD_compressEnd_public() */
-+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
-+                        void* dst, size_t dstCapacity,
-+                  const void* src, size_t srcSize)
-+{
-+    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
-+}
-+
- size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                          const void* src, size_t srcSize,
-@@ -4591,7 +5230,7 @@ size_t ZSTD_compress_advanced_internal(
-     FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
-                          dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
-                          params, srcSize, ZSTDb_not_buffered) , "");
--    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-+    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
- }
- 
- size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
-@@ -4709,7 +5348,7 @@ static size_t ZSTD_initCDict_internal(
-         {   size_t const dictID = ZSTD_compress_insertDictionary(
-                     &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
-                     &params, cdict->dictContent, cdict->dictContentSize,
--                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
-+                    dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
-             FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
-             assert(dictID <= (size_t)(U32)-1);
-             cdict->dictID = (U32)dictID;
-@@ -4811,7 +5450,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
-                         cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
-                         customMem);
- 
--    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
-+    if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
-                                     dict, dictSize,
-                                     dictLoadMethod, dictContentType,
-                                     cctxParams) )) {
-@@ -4906,6 +5545,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
-     params.cParams = cParams;
-     params.useRowMatchFinder = useRowMatchFinder;
-     cdict->useRowMatchFinder = useRowMatchFinder;
-+    cdict->compressionLevel = ZSTD_NO_CLEVEL;
- 
-     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
-                                               dict, dictSize,
-@@ -4985,12 +5625,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
- 
- /* ZSTD_compressBegin_usingCDict() :
-  * cdict must be != NULL */
--size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
-+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
- {
-     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-     return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
- }
- 
-+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
-+{
-+    return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
-+}
-+
- /*! ZSTD_compress_usingCDict_internal():
-  * Implementation of various ZSTD_compress_usingCDict* functions.
-  */
-@@ -5000,7 +5645,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
-                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
- {
-     FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
--    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-+    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
- }
- 
- /*! ZSTD_compress_usingCDict_advanced():
-@@ -5197,30 +5842,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
- 
- static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
- {
--    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
--    if (hintInSize==0) hintInSize = cctx->blockSize;
--    return hintInSize;
-+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
-+        return cctx->blockSize - cctx->stableIn_notConsumed;
-+    }
-+    assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
-+    {   size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
-+        if (hintInSize==0) hintInSize = cctx->blockSize;
-+        return hintInSize;
-+    }
- }
- 
- /* ZSTD_compressStream_generic():
-  *  internal function for all *compressStream*() variants
-- *  non-static, because can be called from zstdmt_compress.c
-- * @return : hint size for next input */
-+ * @return : hint size for next input to complete ongoing block */
- static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                                           ZSTD_outBuffer* output,
-                                           ZSTD_inBuffer* input,
-                                           ZSTD_EndDirective const flushMode)
- {
--    const char* const istart = (const char*)input->src;
--    const char* const iend = input->size != 0 ? istart + input->size : istart;
--    const char* ip = input->pos != 0 ? istart + input->pos : istart;
--    char* const ostart = (char*)output->dst;
--    char* const oend = output->size != 0 ? ostart + output->size : ostart;
--    char* op = output->pos != 0 ? ostart + output->pos : ostart;
-+    const char* const istart = (assert(input != NULL), (const char*)input->src);
-+    const char* const iend = (istart != NULL) ? istart + input->size : istart;
-+    const char* ip = (istart != NULL) ? istart + input->pos : istart;
-+    char* const ostart = (assert(output != NULL), (char*)output->dst);
-+    char* const oend = (ostart != NULL) ? ostart + output->size : ostart;
-+    char* op = (ostart != NULL) ? ostart + output->pos : ostart;
-     U32 someMoreWork = 1;
- 
-     /* check expectations */
--    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
-+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);
-+    assert(zcs != NULL);
-+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
-+        assert(input->pos >= zcs->stableIn_notConsumed);
-+        input->pos -= zcs->stableIn_notConsumed;
-+        if (ip) ip -= zcs->stableIn_notConsumed;
-+        zcs->stableIn_notConsumed = 0;
-+    }
-     if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
-         assert(zcs->inBuff != NULL);
-         assert(zcs->inBuffSize > 0);
-@@ -5229,8 +5885,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-         assert(zcs->outBuff !=  NULL);
-         assert(zcs->outBuffSize > 0);
-     }
--    assert(output->pos <= output->size);
-+    if (input->src == NULL) assert(input->size == 0);
-     assert(input->pos <= input->size);
-+    if (output->dst == NULL) assert(output->size == 0);
-+    assert(output->pos <= output->size);
-     assert((U32)flushMode <= (U32)ZSTD_e_end);
- 
-     while (someMoreWork) {
-@@ -5245,7 +5903,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
-               && (zcs->inBuffPos == 0) ) {
-                 /* shortcut to compression pass directly into output buffer */
--                size_t const cSize = ZSTD_compressEnd(zcs,
-+                size_t const cSize = ZSTD_compressEnd_public(zcs,
-                                                 op, oend-op, ip, iend-ip);
-                 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
-                 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
-@@ -5262,8 +5920,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                                         zcs->inBuff + zcs->inBuffPos, toLoad,
-                                         ip, iend-ip);
-                 zcs->inBuffPos += loaded;
--                if (loaded != 0)
--                    ip += loaded;
-+                if (ip) ip += loaded;
-                 if ( (flushMode == ZSTD_e_continue)
-                   && (zcs->inBuffPos < zcs->inBuffTarget) ) {
-                     /* not enough input to fill full block : stop here */
-@@ -5274,6 +5931,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                     /* empty */
-                     someMoreWork = 0; break;
-                 }
-+            } else {
-+                assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
-+                if ( (flushMode == ZSTD_e_continue)
-+                  && ( (size_t)(iend - ip) < zcs->blockSize) ) {
-+                    /* can't compress a full block : stop here */
-+                    zcs->stableIn_notConsumed = (size_t)(iend - ip);
-+                    ip = iend;  /* pretend to have consumed input */
-+                    someMoreWork = 0; break;
-+                }
-+                if ( (flushMode == ZSTD_e_flush)
-+                  && (ip == iend) ) {
-+                    /* empty */
-+                    someMoreWork = 0; break;
-+                }
-             }
-             /* compress current block (note : this stage cannot be stopped in the middle) */
-             DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
-@@ -5281,9 +5952,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                 void* cDst;
-                 size_t cSize;
-                 size_t oSize = oend-op;
--                size_t const iSize = inputBuffered
--                    ? zcs->inBuffPos - zcs->inToCompress
--                    : MIN((size_t)(iend - ip), zcs->blockSize);
-+                size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
-+                                                   : MIN((size_t)(iend - ip), zcs->blockSize);
-                 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
-                     cDst = op;   /* compress into output buffer, to skip flush stage */
-                 else
-@@ -5291,9 +5961,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                 if (inputBuffered) {
-                     unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
-                     cSize = lastBlock ?
--                            ZSTD_compressEnd(zcs, cDst, oSize,
-+                            ZSTD_compressEnd_public(zcs, cDst, oSize,
-                                         zcs->inBuff + zcs->inToCompress, iSize) :
--                            ZSTD_compressContinue(zcs, cDst, oSize,
-+                            ZSTD_compressContinue_public(zcs, cDst, oSize,
-                                         zcs->inBuff + zcs->inToCompress, iSize);
-                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
-                     zcs->frameEnded = lastBlock;
-@@ -5306,19 +5976,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                     if (!lastBlock)
-                         assert(zcs->inBuffTarget <= zcs->inBuffSize);
-                     zcs->inToCompress = zcs->inBuffPos;
--                } else {
--                    unsigned const lastBlock = (ip + iSize == iend);
--                    assert(flushMode == ZSTD_e_end /* Already validated */);
-+                } else { /* !inputBuffered, hence ZSTD_bm_stable */
-+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
-                     cSize = lastBlock ?
--                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
--                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
-+                            ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
-+                            ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
-                     /* Consume the input prior to error checking to mirror buffered mode. */
--                    if (iSize > 0)
--                        ip += iSize;
-+                    if (ip) ip += iSize;
-                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
-                     zcs->frameEnded = lastBlock;
--                    if (lastBlock)
--                        assert(ip == iend);
-+                    if (lastBlock) assert(ip == iend);
-                 }
-                 if (cDst == op) {  /* no need to flush */
-                     op += cSize;
-@@ -5388,8 +6055,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
- /* After a compression call set the expected input/output buffer.
-  * This is validated at the start of the next compression call.
-  */
--static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
-+static void
-+ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)
- {
-+    DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");
-     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
-         cctx->expectedInBuffer = *input;
-     }
-@@ -5408,22 +6077,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
- {
-     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
-         ZSTD_inBuffer const expect = cctx->expectedInBuffer;
--        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
--            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
--        if (endOp != ZSTD_e_end)
--            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
-+        if (expect.src != input->src || expect.pos != input->pos)
-+            RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");
-     }
-+    (void)endOp;
-     if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
-         size_t const outBufferSize = output->size - output->pos;
-         if (cctx->expectedOutBufferSize != outBufferSize)
--            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
-+            RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");
-     }
-     return 0;
- }
- 
- static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
-                                              ZSTD_EndDirective endOp,
--                                             size_t inSize) {
-+                                             size_t inSize)
-+{
-     ZSTD_CCtx_params params = cctx->requestedParams;
-     ZSTD_prefixDict const prefixDict = cctx->prefixDict;
-     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
-@@ -5437,9 +6106,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
-         params.compressionLevel = cctx->cdict->compressionLevel;
-     }
-     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
--    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
--    {
--        size_t const dictSize = prefixDict.dict
-+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-determine pledgedSrcSize */
-+
-+    {   size_t const dictSize = prefixDict.dict
-                 ? prefixDict.dictSize
-                 : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
-         ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
-@@ -5451,6 +6120,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
-     params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, &params.cParams);
-     params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams);
-     params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
-+    params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
-+    params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
-+    params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);
- 
-     {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
-         assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
-@@ -5477,6 +6149,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
-     return 0;
- }
- 
-+/* @return provides a minimum amount of data remaining to be flushed from internal buffers
-+ */
- size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                              ZSTD_outBuffer* output,
-                              ZSTD_inBuffer* input,
-@@ -5491,8 +6165,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
- 
-     /* transparent initialization stage */
-     if (cctx->streamStage == zcss_init) {
--        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
--        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
-+        size_t const inputSize = input->size - input->pos;  /* no obligation to start from pos==0 */
-+        size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;
-+        if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */
-+          && (endOp == ZSTD_e_continue)                             /* no flush requested, more input to come */
-+          && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) {              /* not even reached one block yet */
-+            if (cctx->stableIn_notConsumed) {  /* not the first time */
-+                /* check stable source guarantees */
-+                RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");
-+                RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");
-+            }
-+            /* pretend input was consumed, to give a sense forward progress */
-+            input->pos = input->size;
-+            /* save stable inBuffer, for later control, and flush/end */
-+            cctx->expectedInBuffer = *input;
-+            /* but actually input wasn't consumed, so keep track of position from where compression shall resume */
-+            cctx->stableIn_notConsumed += inputSize;
-+            /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */
-+            return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format);  /* at least some header to produce */
-+        }
-+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");
-+        ZSTD_setBufferExpectations(cctx, output, input);   /* Set initial buffer expectations now that we've initialized */
-     }
-     /* end of transparent initialization stage */
- 
-@@ -5510,13 +6203,20 @@ size_t ZSTD_compressStream2_simpleArgs (
-                       const void* src, size_t srcSize, size_t* srcPos,
-                             ZSTD_EndDirective endOp)
- {
--    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
--    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
-+    ZSTD_outBuffer output;
-+    ZSTD_inBuffer  input;
-+    output.dst = dst;
-+    output.size = dstCapacity;
-+    output.pos = *dstPos;
-+    input.src = src;
-+    input.size = srcSize;
-+    input.pos = *srcPos;
-     /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
--    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
--    *dstPos = output.pos;
--    *srcPos = input.pos;
--    return cErr;
-+    {   size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
-+        *dstPos = output.pos;
-+        *srcPos = input.pos;
-+        return cErr;
-+    }
- }
- 
- size_t ZSTD_compress2(ZSTD_CCtx* cctx,
-@@ -5539,6 +6239,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
-         /* Reset to the original values. */
-         cctx->requestedParams.inBufferMode = originalInBufferMode;
-         cctx->requestedParams.outBufferMode = originalOutBufferMode;
-+
-         FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
-         if (result != 0) {  /* compression not completed, due to lack of output space */
-             assert(oPos == dstCapacity);
-@@ -5549,64 +6250,61 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
-     }
- }
- 
--typedef struct {
--    U32 idx;             /* Index in array of ZSTD_Sequence */
--    U32 posInSequence;   /* Position within sequence at idx */
--    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
--} ZSTD_sequencePosition;
--
- /* ZSTD_validateSequence() :
-  * @offCode : is presumed to follow format required by ZSTD_storeSeq()
-  * @returns a ZSTD error code if sequence is not valid
-  */
- static size_t
--ZSTD_validateSequence(U32 offCode, U32 matchLength,
--                      size_t posInSrc, U32 windowLog, size_t dictSize)
-+ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
-+                      size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
- {
--    U32 const windowSize = 1 << windowLog;
-+    U32 const windowSize = 1u << windowLog;
-     /* posInSrc represents the amount of data the decoder would decode up to this point.
-      * As long as the amount of data decoded is less than or equal to window size, offsets may be
-      * larger than the total length of output decoded in order to reference the dict, even larger than
-      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
-      */
-     size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
--    RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
--    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
-+    size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
-+    RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
-+    /* Validate maxNbSeq is large enough for the given matchLength and minMatch */
-+    RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
-     return 0;
- }
- 
- /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
--static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
-+static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
- {
--    U32 offCode = STORE_OFFSET(rawOffset);
-+    U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
- 
-     if (!ll0 && rawOffset == rep[0]) {
--        offCode = STORE_REPCODE_1;
-+        offBase = REPCODE1_TO_OFFBASE;
-     } else if (rawOffset == rep[1]) {
--        offCode = STORE_REPCODE(2 - ll0);
-+        offBase = REPCODE_TO_OFFBASE(2 - ll0);
-     } else if (rawOffset == rep[2]) {
--        offCode = STORE_REPCODE(3 - ll0);
-+        offBase = REPCODE_TO_OFFBASE(3 - ll0);
-     } else if (ll0 && rawOffset == rep[0] - 1) {
--        offCode = STORE_REPCODE_3;
-+        offBase = REPCODE3_TO_OFFBASE;
-     }
--    return offCode;
-+    return offBase;
- }
- 
--/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
-- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
-- */
--static size_t
-+size_t
- ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-                                               ZSTD_sequencePosition* seqPos,
-                                         const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
--                                        const void* src, size_t blockSize)
-+                                        const void* src, size_t blockSize,
-+                                        ZSTD_paramSwitch_e externalRepSearch)
- {
-     U32 idx = seqPos->idx;
-+    U32 const startIdx = idx;
-     BYTE const* ip = (BYTE const*)(src);
-     const BYTE* const iend = ip + blockSize;
-     repcodes_t updatedRepcodes;
-     U32 dictSize;
- 
-+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
-+
-     if (cctx->cdict) {
-         dictSize = (U32)cctx->cdict->dictContentSize;
-     } else if (cctx->prefixDict.dict) {
-@@ -5615,25 +6313,55 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-         dictSize = 0;
-     }
-     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
--    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
-+    for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
-         U32 const litLength = inSeqs[idx].litLength;
--        U32 const ll0 = (litLength == 0);
-         U32 const matchLength = inSeqs[idx].matchLength;
--        U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
--        ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
-+        U32 offBase;
-+
-+        if (externalRepSearch == ZSTD_ps_disable) {
-+            offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
-+        } else {
-+            U32 const ll0 = (litLength == 0);
-+            offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
-+            ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
-+        }
- 
--        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
-+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
-         if (cctx->appliedParams.validateSequences) {
-             seqPos->posInSrc += litLength + matchLength;
--            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
--                                                cctx->appliedParams.cParams.windowLog, dictSize),
-+            FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-+                                                cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
-                                                 "Sequence validation failed");
-         }
--        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
-+        RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
-                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
--        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
-+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
-         ip += matchLength + litLength;
-     }
-+
-+    /* If we skipped repcode search while parsing, we need to update repcodes now */
-+    assert(externalRepSearch != ZSTD_ps_auto);
-+    assert(idx >= startIdx);
-+    if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
-+        U32* const rep = updatedRepcodes.rep;
-+        U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
-+
-+        if (lastSeqIdx >= startIdx + 2) {
-+            rep[2] = inSeqs[lastSeqIdx - 2].offset;
-+            rep[1] = inSeqs[lastSeqIdx - 1].offset;
-+            rep[0] = inSeqs[lastSeqIdx].offset;
-+        } else if (lastSeqIdx == startIdx + 1) {
-+            rep[2] = rep[0];
-+            rep[1] = inSeqs[lastSeqIdx - 1].offset;
-+            rep[0] = inSeqs[lastSeqIdx].offset;
-+        } else {
-+            assert(lastSeqIdx == startIdx);
-+            rep[2] = rep[1];
-+            rep[1] = rep[0];
-+            rep[0] = inSeqs[lastSeqIdx].offset;
-+        }
-+    }
-+
-     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
- 
-     if (inSeqs[idx].litLength) {
-@@ -5642,26 +6370,15 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-         ip += inSeqs[idx].litLength;
-         seqPos->posInSrc += inSeqs[idx].litLength;
-     }
--    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
-+    RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
-     seqPos->idx = idx+1;
-     return 0;
- }
- 
--/* Returns the number of bytes to move the current read position back by. Only non-zero
-- * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
-- * went wrong.
-- *
-- * This function will attempt to scan through blockSize bytes represented by the sequences
-- * in inSeqs, storing any (partial) sequences.
-- *
-- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
-- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
-- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
-- */
--static size_t
-+size_t
- ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                    const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
--                                   const void* src, size_t blockSize)
-+                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
- {
-     U32 idx = seqPos->idx;
-     U32 startPosInSequence = seqPos->posInSequence;
-@@ -5673,6 +6390,9 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
-     U32 bytesAdjustment = 0;
-     U32 finalMatchSplit = 0;
- 
-+    /* TODO(embg) support fast parsing mode in noBlockDelim mode */
-+    (void)externalRepSearch;
-+
-     if (cctx->cdict) {
-         dictSize = cctx->cdict->dictContentSize;
-     } else if (cctx->prefixDict.dict) {
-@@ -5680,7 +6400,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
-     } else {
-         dictSize = 0;
-     }
--    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
-+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
-     DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
-     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
-     while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
-@@ -5688,7 +6408,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
-         U32 litLength = currSeq.litLength;
-         U32 matchLength = currSeq.matchLength;
-         U32 const rawOffset = currSeq.offset;
--        U32 offCode;
-+        U32 offBase;
- 
-         /* Modify the sequence depending on where endPosInSequence lies */
-         if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
-@@ -5702,7 +6422,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
-             /* Move to the next sequence */
-             endPosInSequence -= currSeq.litLength + currSeq.matchLength;
-             startPosInSequence = 0;
--            idx++;
-         } else {
-             /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
-                does not reach the end of the match. So, we have to split the sequence */
-@@ -5742,21 +6461,23 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
-         }
-         /* Check if this offset can be represented with a repcode */
-         {   U32 const ll0 = (litLength == 0);
--            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
--            ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
-+            offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
-+            ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
-         }
- 
-         if (cctx->appliedParams.validateSequences) {
-             seqPos->posInSrc += litLength + matchLength;
--            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
--                                                   cctx->appliedParams.cParams.windowLog, dictSize),
-+            FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-+                                                   cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
-                                                    "Sequence validation failed");
-         }
--        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
--        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
-+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
-+        RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
-                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
--        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
-+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
-         ip += matchLength + litLength;
-+        if (!finalMatchSplit)
-+            idx++; /* Next Sequence */
-     }
-     DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
-     assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
-@@ -5779,7 +6500,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
- 
- typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
--                                       const void* src, size_t blockSize);
-+                                       const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
- static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
- {
-     ZSTD_sequenceCopier sequenceCopier = NULL;
-@@ -5793,6 +6514,57 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
-     return sequenceCopier;
- }
- 
-+/* Discover the size of next block by searching for the delimiter.
-+ * Note that a block delimiter **must** exist in this mode,
-+ * otherwise it's an input error.
-+ * The block size retrieved will be later compared to ensure it remains within bounds */
-+static size_t
-+blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
-+{
-+    int end = 0;
-+    size_t blockSize = 0;
-+    size_t spos = seqPos.idx;
-+    DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
-+    assert(spos <= inSeqsSize);
-+    while (spos < inSeqsSize) {
-+        end = (inSeqs[spos].offset == 0);
-+        blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
-+        if (end) {
-+            if (inSeqs[spos].matchLength != 0)
-+                RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
-+            break;
-+        }
-+        spos++;
-+    }
-+    if (!end)
-+        RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
-+    return blockSize;
-+}
-+
-+/* More a "target" block size */
-+static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
-+{
-+    int const lastBlock = (remaining <= blockSize);
-+    return lastBlock ? remaining : blockSize;
-+}
-+
-+static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
-+                           size_t blockSize, size_t remaining,
-+                     const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
-+{
-+    DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
-+    if (mode == ZSTD_sf_noBlockDelimiters)
-+        return blockSize_noDelimiter(blockSize, remaining);
-+    {   size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
-+        FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
-+        if (explicitBlockSize > blockSize)
-+            RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
-+        if (explicitBlockSize > remaining)
-+            RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
-+        return explicitBlockSize;
-+    }
-+}
-+
- /* Compress, block-by-block, all of the sequences given.
-  *
-  * Returns the cumulative size of all compressed blocks (including their headers),
-@@ -5805,9 +6577,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-                           const void* src, size_t srcSize)
- {
-     size_t cSize = 0;
--    U32 lastBlock;
--    size_t blockSize;
--    size_t compressedSeqsSize;
-     size_t remaining = srcSize;
-     ZSTD_sequencePosition seqPos = {0, 0, 0};
- 
-@@ -5827,22 +6596,29 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-     }
- 
-     while (remaining) {
-+        size_t compressedSeqsSize;
-         size_t cBlockSize;
-         size_t additionalByteAdjustment;
--        lastBlock = remaining <= cctx->blockSize;
--        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
-+        size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
-+                                        cctx->blockSize, remaining,
-+                                        inSeqs, inSeqsSize, seqPos);
-+        U32 const lastBlock = (blockSize == remaining);
-+        FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
-+        assert(blockSize <= remaining);
-         ZSTD_resetSeqStore(&cctx->seqStore);
--        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
-+        DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
- 
--        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
-+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
-         FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
-         blockSize -= additionalByteAdjustment;
- 
-         /* If blocks are too small, emit as a nocompress block */
--        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
-+        /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
-+         * additional 1. We need to revisit and change this logic to be more consistent */
-+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
-             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
-             FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
--            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
-+            DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
-             cSize += cBlockSize;
-             ip += blockSize;
-             op += cBlockSize;
-@@ -5851,6 +6627,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-             continue;
-         }
- 
-+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
-         compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
-                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
-                                 &cctx->appliedParams,
-@@ -5859,11 +6636,11 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-                                 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
-                                 cctx->bmi2);
-         FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
--        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
-+        DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
- 
-         if (!cctx->isFirstBlock &&
-             ZSTD_maybeRLE(&cctx->seqStore) &&
--            ZSTD_isRLE((BYTE const*)src, srcSize)) {
-+            ZSTD_isRLE(ip, blockSize)) {
-             /* We don't want to emit our first block as a RLE even if it qualifies because
-             * doing so will cause the decoder (cli only) to throw a "should consume all input error."
-             * This is only an issue for zstd <= v1.4.3
-@@ -5874,12 +6651,12 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-         if (compressedSeqsSize == 0) {
-             /* ZSTD_noCompressBlock writes the block header as well */
-             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
--            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
--            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
-+            FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");
-+            DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);
-         } else if (compressedSeqsSize == 1) {
-             cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
--            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
--            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
-+            FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");
-+            DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);
-         } else {
-             U32 cBlockHeader;
-             /* Error checking and repcodes update */
-@@ -5891,11 +6668,10 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-             cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
-             MEM_writeLE24(op, cBlockHeader);
-             cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
--            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
-+            DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);
-         }
- 
-         cSize += cBlockSize;
--        DEBUGLOG(4, "cSize running total: %zu", cSize);
- 
-         if (lastBlock) {
-             break;
-@@ -5906,12 +6682,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-             dstCapacity -= cBlockSize;
-             cctx->isFirstBlock = 0;
-         }
-+        DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
-     }
- 
-+    DEBUGLOG(4, "cSize final total: %zu", cSize);
-     return cSize;
- }
- 
--size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
-+size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
-+                              void* dst, size_t dstCapacity,
-                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                               const void* src, size_t srcSize)
- {
-@@ -5921,7 +6700,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
-     size_t frameHeaderSize = 0;
- 
-     /* Transparent initialization stage, same as compressStream2() */
--    DEBUGLOG(3, "ZSTD_compressSequences()");
-+    DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
-     assert(cctx != NULL);
-     FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
-     /* Begin writing output, starting with frame header */
-@@ -5949,26 +6728,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
-         cSize += 4;
-     }
- 
--    DEBUGLOG(3, "Final compressed size: %zu", cSize);
-+    DEBUGLOG(4, "Final compressed size: %zu", cSize);
-     return cSize;
- }
- 
- /*======   Finalize   ======*/
- 
-+static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)
-+{
-+    const ZSTD_inBuffer nullInput = { NULL, 0, 0 };
-+    const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
-+    return stableInput ? zcs->expectedInBuffer : nullInput;
-+}
-+
- /*! ZSTD_flushStream() :
-  * @return : amount of data remaining to flush */
- size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
- {
--    ZSTD_inBuffer input = { NULL, 0, 0 };
-+    ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
-+    input.size = input.pos; /* do not ingest more input during flush */
-     return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
- }
- 
- 
- size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
- {
--    ZSTD_inBuffer input = { NULL, 0, 0 };
-+    ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
-     size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
--    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
-+    FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");
-     if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
-     /* single thread mode : attempt to calculate remaining to flush more precisely */
-     {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
-@@ -6090,7 +6877,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
-             cp.targetLength = (unsigned)(-clampedCompressionLevel);
-         }
-         /* refine parameters based on srcSize & dictSize */
--        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
-+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
-     }
- }
- 
-@@ -6125,3 +6912,29 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
-     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
-     return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
- }
-+
-+void ZSTD_registerSequenceProducer(
-+    ZSTD_CCtx* zc,
-+    void* extSeqProdState,
-+    ZSTD_sequenceProducer_F extSeqProdFunc
-+) {
-+    assert(zc != NULL);
-+    ZSTD_CCtxParams_registerSequenceProducer(
-+        &zc->requestedParams, extSeqProdState, extSeqProdFunc
-+    );
-+}
-+
-+void ZSTD_CCtxParams_registerSequenceProducer(
-+  ZSTD_CCtx_params* params,
-+  void* extSeqProdState,
-+  ZSTD_sequenceProducer_F extSeqProdFunc
-+) {
-+    assert(params != NULL);
-+    if (extSeqProdFunc != NULL) {
-+        params->extSeqProdFunc = extSeqProdFunc;
-+        params->extSeqProdState = extSeqProdState;
-+    } else {
-+        params->extSeqProdFunc = NULL;
-+        params->extSeqProdState = NULL;
-+    }
-+}
-diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
-index 71697a11ae30..53cb582a8d2b 100644
---- a/lib/zstd/compress/zstd_compress_internal.h
-+++ b/lib/zstd/compress/zstd_compress_internal.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -20,6 +21,7 @@
- ***************************************/
- #include "../common/zstd_internal.h"
- #include "zstd_cwksp.h"
-+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
- 
- 
- /*-*************************************
-@@ -32,7 +34,7 @@
-                                        It's not a big deal though : candidate will just be sorted again.
-                                        Additionally, candidate position 1 will be lost.
-                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
--                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
-+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
-                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
- 
- 
-@@ -111,12 +113,13 @@ typedef struct {
- /* ZSTD_buildBlockEntropyStats() :
-  *  Builds entropy for the block.
-  *  @return : 0 on success or error code */
--size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
--                             const ZSTD_entropyCTables_t* prevEntropy,
--                                   ZSTD_entropyCTables_t* nextEntropy,
--                             const ZSTD_CCtx_params* cctxParams,
--                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
--                                   void* workspace, size_t wkspSize);
-+size_t ZSTD_buildBlockEntropyStats(
-+                    const seqStore_t* seqStorePtr,
-+                    const ZSTD_entropyCTables_t* prevEntropy,
-+                          ZSTD_entropyCTables_t* nextEntropy,
-+                    const ZSTD_CCtx_params* cctxParams,
-+                          ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-+                          void* workspace, size_t wkspSize);
- 
- /* *******************************
- *  Compression internals structs *
-@@ -142,26 +145,33 @@ typedef struct {
-   size_t capacity;      /* The capacity starting from `seq` pointer */
- } rawSeqStore_t;
- 
-+typedef struct {
-+    U32 idx;            /* Index in array of ZSTD_Sequence */
-+    U32 posInSequence;  /* Position within sequence at idx */
-+    size_t posInSrc;    /* Number of bytes given by sequences provided so far */
-+} ZSTD_sequencePosition;
-+
- UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
- 
- typedef struct {
--    int price;
--    U32 off;
--    U32 mlen;
--    U32 litlen;
--    U32 rep[ZSTD_REP_NUM];
-+    int price;  /* price from beginning of segment to this position */
-+    U32 off;    /* offset of previous match */
-+    U32 mlen;   /* length of previous match */
-+    U32 litlen; /* nb of literals since previous match */
-+    U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
- } ZSTD_optimal_t;
- 
- typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
- 
-+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
- typedef struct {
-     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
-     unsigned* litFreq;           /* table of literals statistics, of size 256 */
-     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
-     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
-     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
--    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
--    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
-+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
-+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
- 
-     U32  litSum;                 /* nb of literals */
-     U32  litLengthSum;           /* nb of litLength codes */
-@@ -212,8 +222,10 @@ struct ZSTD_matchState_t {
-     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
- 
-     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
--    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
-+    BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
-     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
-+    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
-+    U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
- 
-     U32* hashTable;
-     U32* hashTable3;
-@@ -228,6 +240,18 @@ struct ZSTD_matchState_t {
-     const ZSTD_matchState_t* dictMatchState;
-     ZSTD_compressionParameters cParams;
-     const rawSeqStore_t* ldmSeqStore;
-+
-+    /* Controls prefetching in some dictMatchState matchfinders.
-+     * This behavior is controlled from the cctx ms.
-+     * This parameter has no effect in the cdict ms. */
-+    int prefetchCDictTables;
-+
-+    /* When == 0, lazy match finders insert every position.
-+     * When != 0, lazy match finders only insert positions they search.
-+     * This allows them to skip much faster over incompressible data,
-+     * at a small cost to compression ratio.
-+     */
-+    int lazySkipping;
- };
- 
- typedef struct {
-@@ -324,6 +348,25 @@ struct ZSTD_CCtx_params_s {
- 
-     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
-     ZSTD_customMem customMem;
-+
-+    /* Controls prefetching in some dictMatchState matchfinders */
-+    ZSTD_paramSwitch_e prefetchCDictTables;
-+
-+    /* Controls whether zstd will fall back to an internal matchfinder
-+     * if the external matchfinder returns an error code. */
-+    int enableMatchFinderFallback;
-+
-+    /* Parameters for the external sequence producer API.
-+     * Users set these parameters through ZSTD_registerSequenceProducer().
-+     * It is not possible to set these parameters individually through the public API. */
-+    void* extSeqProdState;
-+    ZSTD_sequenceProducer_F extSeqProdFunc;
-+
-+    /* Adjust the max block size*/
-+    size_t maxBlockSize;
-+
-+    /* Controls repcode search in external sequence parsing */
-+    ZSTD_paramSwitch_e searchForExternalRepcodes;
- };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
- 
- #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
-@@ -404,6 +447,7 @@ struct ZSTD_CCtx_s {
- 
-     /* Stable in/out buffer verification */
-     ZSTD_inBuffer expectedInBuffer;
-+    size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
-     size_t expectedOutBufferSize;
- 
-     /* Dictionary */
-@@ -417,9 +461,14 @@ struct ZSTD_CCtx_s {
- 
-     /* Workspace for block splitter */
-     ZSTD_blockSplitCtx blockSplitCtx;
-+
-+    /* Buffer for output from external sequence producer */
-+    ZSTD_Sequence* extSeqBuf;
-+    size_t extSeqBufCapacity;
- };
- 
- typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
-+typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
- 
- typedef enum {
-     ZSTD_noDict = 0,
-@@ -441,7 +490,7 @@ typedef enum {
-                                  * In this mode we take both the source size and the dictionary size
-                                  * into account when selecting and adjusting the parameters.
-                                  */
--    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
-+    ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
-                                  * We don't know what these parameters are for. We default to the legacy
-                                  * behavior of taking both the source size and the dict size into account
-                                  * when selecting and adjusting parameters.
-@@ -500,9 +549,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
- /* ZSTD_noCompressBlock() :
-  * Writes uncompressed block to dst buffer from given src.
-  * Returns the size of the block */
--MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
-+MEM_STATIC size_t
-+ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
- {
-     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
-+    DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
-     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
-                     dstSize_tooSmall, "dst buf too small for uncompressed block");
-     MEM_writeLE24(dst, cBlockHeader24);
-@@ -510,7 +561,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
-     return ZSTD_blockHeaderSize + srcSize;
- }
- 
--MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
-+MEM_STATIC size_t
-+ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
- {
-     BYTE* const op = (BYTE*)dst;
-     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
-@@ -529,7 +581,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
- {
-     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
-     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
--    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
-     return (srcSize >> minlog) + 2;
- }
- 
-@@ -565,29 +617,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
-     while (ip < iend) *op++ = *ip++;
- }
- 
--#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
--#define STORE_REPCODE_1 STORE_REPCODE(1)
--#define STORE_REPCODE_2 STORE_REPCODE(2)
--#define STORE_REPCODE_3 STORE_REPCODE(3)
--#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
--#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
--#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
--#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
--#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
--#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
--#define STORED_TO_OFFBASE(o) ((o)+1)
--#define OFFBASE_TO_STORED(o) ((o)-1)
-+
-+#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
-+#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
-+#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
-+#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
-+#define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
-+#define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
-+#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
-+#define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
-+#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
- 
- /*! ZSTD_storeSeq() :
-- *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
-- *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
-+ *  Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
-+ *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
-  *  @matchLength : must be >= MINMATCH
-- *  Allowed to overread literals up to litLimit.
-+ *  Allowed to over-read literals up to litLimit.
- */
- HINT_INLINE UNUSED_ATTR void
- ZSTD_storeSeq(seqStore_t* seqStorePtr,
-               size_t litLength, const BYTE* literals, const BYTE* litLimit,
--              U32 offBase_minus1,
-+              U32 offBase,
-               size_t matchLength)
- {
-     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
-@@ -596,8 +646,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
-     static const BYTE* g_start = NULL;
-     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
-     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
--        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
--               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
-+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
-+               pos, (U32)litLength, (U32)matchLength, (U32)offBase);
-     }
- #endif
-     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
-@@ -607,9 +657,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
-     assert(literals + litLength <= litLimit);
-     if (litEnd <= litLimit_w) {
-         /* Common case we can use wildcopy.
--	 * First copy 16 bytes, because literals are likely short.
--	 */
--        assert(WILDCOPY_OVERLENGTH >= 16);
-+         * First copy 16 bytes, because literals are likely short.
-+         */
-+        ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
-         ZSTD_copy16(seqStorePtr->lit, literals);
-         if (litLength > 16) {
-             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
-@@ -628,7 +678,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
-     seqStorePtr->sequences[0].litLength = (U16)litLength;
- 
-     /* match offset */
--    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
-+    seqStorePtr->sequences[0].offBase = offBase;
- 
-     /* match Length */
-     assert(matchLength >= MINMATCH);
-@@ -646,17 +696,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
- 
- /* ZSTD_updateRep() :
-  * updates in-place @rep (array of repeat offsets)
-- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
-+ * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
-  */
- MEM_STATIC void
--ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
-+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
- {
--    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
-+    if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
-         rep[2] = rep[1];
-         rep[1] = rep[0];
--        rep[0] = STORED_OFFSET(offBase_minus1);
-+        rep[0] = OFFBASE_TO_OFFSET(offBase);
-     } else {   /* repcode */
--        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
-+        U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
-         if (repCode > 0) {  /* note : if repCode==0, no change */
-             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-@@ -673,11 +723,11 @@ typedef struct repcodes_s {
- } repcodes_t;
- 
- MEM_STATIC repcodes_t
--ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
-+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
- {
-     repcodes_t newReps;
-     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
--    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
-+    ZSTD_updateRep(newReps.rep, offBase, ll0);
-     return newReps;
- }
- 
-@@ -685,59 +735,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
- /*-*************************************
- *  Match length counter
- ***************************************/
--static unsigned ZSTD_NbCommonBytes (size_t val)
--{
--    if (MEM_isLittleEndian()) {
--        if (MEM_64bits()) {
--#       if (__GNUC__ >= 4)
--            return (__builtin_ctzll((U64)val) >> 3);
--#       else
--            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
--                                                     0, 3, 1, 3, 1, 4, 2, 7,
--                                                     0, 2, 3, 6, 1, 5, 3, 5,
--                                                     1, 3, 4, 4, 2, 5, 6, 7,
--                                                     7, 0, 1, 2, 3, 3, 4, 6,
--                                                     2, 6, 5, 5, 3, 4, 5, 6,
--                                                     7, 1, 2, 4, 6, 4, 4, 5,
--                                                     7, 2, 6, 5, 7, 6, 7, 7 };
--            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
--#       endif
--        } else { /* 32 bits */
--#       if (__GNUC__ >= 3)
--            return (__builtin_ctz((U32)val) >> 3);
--#       else
--            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
--                                                     3, 2, 2, 1, 3, 2, 0, 1,
--                                                     3, 3, 1, 2, 2, 2, 2, 0,
--                                                     3, 1, 2, 0, 1, 0, 1, 1 };
--            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
--#       endif
--        }
--    } else {  /* Big Endian CPU */
--        if (MEM_64bits()) {
--#       if (__GNUC__ >= 4)
--            return (__builtin_clzll(val) >> 3);
--#       else
--            unsigned r;
--            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
--            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
--            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
--            r += (!val);
--            return r;
--#       endif
--        } else { /* 32 bits */
--#       if (__GNUC__ >= 3)
--            return (__builtin_clz((U32)val) >> 3);
--#       else
--            unsigned r;
--            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
--            r += (!val);
--            return r;
--#       endif
--    }   }
--}
--
--
- MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
- {
-     const BYTE* const pStart = pIn;
-@@ -783,32 +780,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
-  *  Hashes
-  ***************************************/
- static const U32 prime3bytes = 506832829U;
--static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
--MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
-+static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
-+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
-+MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
- 
- static const U32 prime4bytes = 2654435761U;
--static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
--static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
-+static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
-+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
-+static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
- 
- static const U64 prime5bytes = 889523592379ULL;
--static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
--static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
-+static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
-+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
-+static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
- 
- static const U64 prime6bytes = 227718039650203ULL;
--static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
--static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-+static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
-+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
-+static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
- 
- static const U64 prime7bytes = 58295818150454627ULL;
--static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
--static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
-+static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
-+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
-+static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
- 
- static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
--static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
--static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-+static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
-+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
-+static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
-+
- 
- MEM_STATIC FORCE_INLINE_ATTR
- size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
- {
-+    /* Although some of these hashes do support hBits up to 64, some do not.
-+     * To be on the safe side, always avoid hBits > 32. */
-+    assert(hBits <= 32);
-+
-     switch(mls)
-     {
-     default:
-@@ -820,6 +828,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
-     }
- }
- 
-+MEM_STATIC FORCE_INLINE_ATTR
-+size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
-+    /* Although some of these hashes do support hBits up to 64, some do not.
-+     * To be on the safe side, always avoid hBits > 32. */
-+    assert(hBits <= 32);
-+
-+    switch(mls)
-+    {
-+        default:
-+        case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
-+        case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
-+        case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
-+        case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
-+        case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
-+    }
-+}
-+
-+
- /* ZSTD_ipow() :
-  * Return base^exponent.
-  */
-@@ -1011,7 +1037,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
-  * The least significant cycleLog bits of the indices must remain the same,
-  * which may be 0. Every index up to maxDist in the past must be valid.
-  */
--MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
-+MEM_STATIC
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
-                                            U32 maxDist, void const* src)
- {
-     /* preemptive overflow correction:
-@@ -1167,10 +1195,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
-                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
-         assert(blockEndIdx >= loadedDictEnd);
- 
--        if (blockEndIdx > loadedDictEnd + maxDist) {
-+        if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
-             /* On reaching window size, dictionaries are invalidated.
-              * For simplification, if window size is reached anywhere within next block,
-              * the dictionary is invalidated for the full block.
-+             *
-+             * We also have to invalidate the dictionary if ZSTD_window_update() has detected
-+             * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
-+             * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
-+             * dictMatchState, so setting it to NULL is not a problem.
-              */
-             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
-             *loadedDictEndPtr = 0;
-@@ -1199,7 +1232,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
-  * forget about the extDict. Handles overlap of the prefix and extDict.
-  * Returns non-zero if the segment is contiguous.
-  */
--MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-+MEM_STATIC
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_window_update(ZSTD_window_t* window,
-                                   void const* src, size_t srcSize,
-                                   int forceNonContiguous)
- {
-@@ -1302,6 +1337,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
- 
- #endif
- 
-+/* Short Cache */
-+
-+/* Normally, zstd matchfinders follow this flow:
-+ *     1. Compute hash at ip
-+ *     2. Load index from hashTable[hash]
-+ *     3. Check if *ip == *(base + index)
-+ * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
-+ *
-+ * Short cache is an optimization which allows us to avoid step 3 most of the time
-+ * when the data doesn't actually match. With short cache, the flow becomes:
-+ *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
-+ *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
-+ *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
-+ *
-+ * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
-+ * dictMatchState matchfinders.
-+ */
-+#define ZSTD_SHORT_CACHE_TAG_BITS 8
-+#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
-+
-+/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
-+ * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
-+MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
-+    size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
-+    U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
-+    assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
-+    hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
-+}
-+
-+/* Helper function for short cache matchfinders.
-+ * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
-+MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
-+    U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
-+    U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
-+    return tag1 == tag2;
-+}
- 
- 
- /* ===============================================================
-@@ -1381,11 +1452,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
-  * This cannot be used when long range matching is enabled.
-  * Zstd will use these sequences, and pass the literals to a secondary block
-  * compressor.
-- * @return : An error code on failure.
-  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
-  * access and data corruption.
-  */
--size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
-+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
- 
- /* ZSTD_cycleLog() :
-  *  condition for correct operation : hashLog > 1 */
-@@ -1396,4 +1466,55 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
-  */
- void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
- 
-+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
-+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
-+ * Note that the block delimiter must include the last literals of the block.
-+ */
-+size_t
-+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-+                                              ZSTD_sequencePosition* seqPos,
-+                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-+                                        const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
-+
-+/* Returns the number of bytes to move the current read position back by.
-+ * Only non-zero if we ended up splitting a sequence.
-+ * Otherwise, it may return a ZSTD error if something went wrong.
-+ *
-+ * This function will attempt to scan through blockSize bytes
-+ * represented by the sequences in @inSeqs,
-+ * storing any (partial) sequences.
-+ *
-+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
-+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
-+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
-+ */
-+size_t
-+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-+                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-+                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
-+
-+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
-+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
-+    return params->extSeqProdFunc != NULL;
-+}
-+
-+/* ===============================================================
-+ * Deprecated definitions that are still used internally to avoid
-+ * deprecation warnings. These functions are exactly equivalent to
-+ * their public variants, but avoid the deprecation warnings.
-+ * =============================================================== */
-+
-+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
-+
-+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
-+                                    void* dst, size_t dstCapacity,
-+                              const void* src, size_t srcSize);
-+
-+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
-+                               void* dst, size_t dstCapacity,
-+                         const void* src, size_t srcSize);
-+
-+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-+
-+
- #endif /* ZSTD_COMPRESS_H */
-diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
-index 52b0a8059aba..3e9ea46a670a 100644
---- a/lib/zstd/compress/zstd_compress_literals.c
-+++ b/lib/zstd/compress/zstd_compress_literals.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -13,11 +14,36 @@
-  ***************************************/
- #include "zstd_compress_literals.h"
- 
-+
-+/* **************************************************************
-+*  Debug Traces
-+****************************************************************/
-+#if DEBUGLEVEL >= 2
-+
-+static size_t showHexa(const void* src, size_t srcSize)
-+{
-+    const BYTE* const ip = (const BYTE*)src;
-+    size_t u;
-+    for (u=0; u<srcSize; u++) {
-+        RAWLOG(5, " %02X", ip[u]); (void)ip;
-+    }
-+    RAWLOG(5, " \n");
-+    return srcSize;
-+}
-+
-+#endif
-+
-+
-+/* **************************************************************
-+*  Literals compression - special cases
-+****************************************************************/
- size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
- {
-     BYTE* const ostart = (BYTE*)dst;
-     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
- 
-+    DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
-+
-     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
- 
-     switch(flSize)
-@@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
-     }
- 
-     ZSTD_memcpy(ostart + flSize, src, srcSize);
--    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
-+    DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
-     return srcSize + flSize;
- }
- 
-+static int allBytesIdentical(const void* src, size_t srcSize)
-+{
-+    assert(srcSize >= 1);
-+    assert(src != NULL);
-+    {   const BYTE b = ((const BYTE*)src)[0];
-+        size_t p;
-+        for (p=1; p<srcSize; p++) {
-+            if (((const BYTE*)src)[p] != b) return 0;
-+        }
-+        return 1;
-+    }
-+}
-+
- size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
- {
-     BYTE* const ostart = (BYTE*)dst;
-     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
- 
--    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
-+    assert(dstCapacity >= 4); (void)dstCapacity;
-+    assert(allBytesIdentical(src, srcSize));
- 
-     switch(flSize)
-     {
-@@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
-     }
- 
-     ostart[flSize] = *(const BYTE*)src;
--    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
-+    DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
-     return flSize+1;
- }
- 
--size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
--                              ZSTD_hufCTables_t* nextHuf,
--                              ZSTD_strategy strategy, int disableLiteralCompression,
--                              void* dst, size_t dstCapacity,
--                        const void* src, size_t srcSize,
--                              void* entropyWorkspace, size_t entropyWorkspaceSize,
--                        const int bmi2,
--                        unsigned suspectUncompressible)
-+/* ZSTD_minLiteralsToCompress() :
-+ * returns minimal amount of literals
-+ * for literal compression to even be attempted.
-+ * Minimum is made tighter as compression strategy increases.
-+ */
-+static size_t
-+ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
-+{
-+    assert((int)strategy >= 0);
-+    assert((int)strategy <= 9);
-+    /* btultra2 : min 8 bytes;
-+     * then 2x larger for each successive compression strategy
-+     * max threshold 64 bytes */
-+    {   int const shift = MIN(9-(int)strategy, 3);
-+        size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
-+        DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
-+        return mintc;
-+    }
-+}
-+
-+size_t ZSTD_compressLiterals (
-+                  void* dst, size_t dstCapacity,
-+            const void* src, size_t srcSize,
-+                  void* entropyWorkspace, size_t entropyWorkspaceSize,
-+            const ZSTD_hufCTables_t* prevHuf,
-+                  ZSTD_hufCTables_t* nextHuf,
-+                  ZSTD_strategy strategy,
-+                  int disableLiteralCompression,
-+                  int suspectUncompressible,
-+                  int bmi2)
- {
--    size_t const minGain = ZSTD_minGain(srcSize, strategy);
-     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
-     BYTE*  const ostart = (BYTE*)dst;
-     U32 singleStream = srcSize < 256;
-     symbolEncodingType_e hType = set_compressed;
-     size_t cLitSize;
- 
--    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
--                disableLiteralCompression, (U32)srcSize);
-+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
-+                disableLiteralCompression, (U32)srcSize, dstCapacity);
-+
-+    DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
- 
-     /* Prepare nextEntropy assuming reusing the existing table */
-     ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-@@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
-     if (disableLiteralCompression)
-         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- 
--    /* small ? don't even attempt compression (speed opt) */
--#   define COMPRESS_LITERALS_SIZE_MIN 63
--    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
--        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
--    }
-+    /* if too small, don't even attempt compression (speed opt) */
-+    if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
-+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- 
-     RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
-     {   HUF_repeat repeat = prevHuf->repeatMode;
--        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
-+        int const flags = 0
-+            | (bmi2 ? HUF_flags_bmi2 : 0)
-+            | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
-+            | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
-+            | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
-+
-+        typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
-+        huf_compress_f huf_compress;
-         if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
--        cLitSize = singleStream ?
--            HUF_compress1X_repeat(
--                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
--                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
--                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
--            HUF_compress4X_repeat(
--                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
--                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
--                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
-+        huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
-+        cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
-+                                src, srcSize,
-+                                HUF_SYMBOLVALUE_MAX, LitHufLog,
-+                                entropyWorkspace, entropyWorkspaceSize,
-+                                (HUF_CElt*)nextHuf->CTable,
-+                                &repeat, flags);
-+        DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
-         if (repeat != HUF_repeat_none) {
-             /* reused the existing table */
--            DEBUGLOG(5, "Reusing previous huffman table");
-+            DEBUGLOG(5, "reusing statistics from previous huffman block");
-             hType = set_repeat;
-         }
-     }
- 
--    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
--        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
--        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
--    }
-+    {   size_t const minGain = ZSTD_minGain(srcSize, strategy);
-+        if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
-+            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-+            return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-+    }   }
-     if (cLitSize==1) {
--        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
--        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
--    }
-+        /* A return value of 1 signals that the alphabet consists of a single symbol.
-+         * However, in some rare circumstances, it could be the compressed size (a single byte).
-+         * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
-+         * (it's also necessary to not generate statistics).
-+         * Therefore, in such a case, actively check that all bytes are identical. */
-+        if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
-+            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-+            return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
-+    }   }
- 
-     if (hType == set_compressed) {
-         /* using a newly constructed table */
-@@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
-     switch(lhSize)
-     {
-     case 3: /* 2 - 2 - 10 - 10 */
--        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
-+        if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
-+        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
-             MEM_writeLE24(ostart, lhc);
-             break;
-         }
-     case 4: /* 2 - 2 - 14 - 14 */
-+        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
-         {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
-             MEM_writeLE32(ostart, lhc);
-             break;
-         }
-     case 5: /* 2 - 2 - 18 - 18 */
-+        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
-         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
-             MEM_writeLE32(ostart, lhc);
-             ostart[4] = (BYTE)(cLitSize >> 10);
-diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
-index 9775fb97cb70..a2a85d6b69e5 100644
---- a/lib/zstd/compress/zstd_compress_literals.h
-+++ b/lib/zstd/compress/zstd_compress_literals.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -16,16 +17,24 @@
- 
- size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
-+/* ZSTD_compressRleLiteralsBlock() :
-+ * Conditions :
-+ * - All bytes in @src are identical
-+ * - dstCapacity >= 4 */
- size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
--/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
--size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
--                              ZSTD_hufCTables_t* nextHuf,
--                              ZSTD_strategy strategy, int disableLiteralCompression,
--                              void* dst, size_t dstCapacity,
-+/* ZSTD_compressLiterals():
-+ * @entropyWorkspace: must be aligned on 4-bytes boundaries
-+ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
-+ * @suspectUncompressible: sampling checks, to potentially skip huffman coding
-+ */
-+size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize,
-                               void* entropyWorkspace, size_t entropyWorkspaceSize,
--                        const int bmi2,
--                        unsigned suspectUncompressible);
-+                        const ZSTD_hufCTables_t* prevHuf,
-+                              ZSTD_hufCTables_t* nextHuf,
-+                              ZSTD_strategy strategy, int disableLiteralCompression,
-+                              int suspectUncompressible,
-+                              int bmi2);
- 
- #endif /* ZSTD_COMPRESS_LITERALS_H */
-diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
-index 21ddc1b37acf..5c028c78d889 100644
---- a/lib/zstd/compress/zstd_compress_sequences.c
-+++ b/lib/zstd/compress/zstd_compress_sequences.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
- {
-     /* Heuristic: This should cover most blocks <= 16K and
-      * start to fade out after 16K to about 32K depending on
--     * comprssibility.
-+     * compressibility.
-      */
-     return nbSeq >= 2048;
- }
-@@ -166,7 +167,7 @@ ZSTD_selectEncodingType(
-     if (mostFrequent == nbSeq) {
-         *repeatMode = FSE_repeat_none;
-         if (isDefaultAllowed && nbSeq <= 2) {
--            /* Prefer set_basic over set_rle when there are 2 or less symbols,
-+            /* Prefer set_basic over set_rle when there are 2 or fewer symbols,
-              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
-              * If basic encoding isn't possible, always choose RLE.
-              */
-diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h
-index 7991364c2f71..7fe6f4ff5cf2 100644
---- a/lib/zstd/compress/zstd_compress_sequences.h
-+++ b/lib/zstd/compress/zstd_compress_sequences.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
-index 17d836cc84e8..41f6521b27cd 100644
---- a/lib/zstd/compress/zstd_compress_superblock.c
-+++ b/lib/zstd/compress/zstd_compress_superblock.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -36,13 +37,14 @@
-  *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
-  *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
-  *  @return : compressed size of literals section of a sub-block
-- *            Or 0 if it unable to compress.
-+ *            Or 0 if unable to compress.
-  *            Or error code */
--static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
--                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
--                                    const BYTE* literals, size_t litSize,
--                                    void* dst, size_t dstSize,
--                                    const int bmi2, int writeEntropy, int* entropyWritten)
-+static size_t
-+ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-+                              const ZSTD_hufCTablesMetadata_t* hufMetadata,
-+                              const BYTE* literals, size_t litSize,
-+                              void* dst, size_t dstSize,
-+                              const int bmi2, int writeEntropy, int* entropyWritten)
- {
-     size_t const header = writeEntropy ? 200 : 0;
-     size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
-@@ -53,8 +55,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-     symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
-     size_t cLitSize = 0;
- 
--    (void)bmi2; /* TODO bmi2... */
--
-     DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
- 
-     *entropyWritten = 0;
-@@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
-     }
- 
--    /* TODO bmi2 */
--    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
--                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
-+    {   int const flags = bmi2 ? HUF_flags_bmi2 : 0;
-+        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
-+                                          : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
-         op += cSize;
-         cLitSize += cSize;
-         if (cSize == 0 || ERR_isError(cSize)) {
-@@ -103,7 +103,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-     switch(lhSize)
-     {
-     case 3: /* 2 - 2 - 10 - 10 */
--        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
-+        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
-             MEM_writeLE24(ostart, lhc);
-             break;
-         }
-@@ -123,26 +123,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-     }
-     *entropyWritten = 1;
-     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
--    return op-ostart;
-+    return (size_t)(op-ostart);
- }
- 
--static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
--    const seqDef* const sstart = sequences;
--    const seqDef* const send = sequences + nbSeq;
--    const seqDef* sp = sstart;
-+static size_t
-+ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
-+                   const seqDef* sequences, size_t nbSeqs,
-+                         size_t litSize, int lastSubBlock)
-+{
-     size_t matchLengthSum = 0;
-     size_t litLengthSum = 0;
--    (void)(litLengthSum); /* suppress unused variable warning on some environments */
--    while (send-sp > 0) {
--        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
-+    size_t n;
-+    for (n=0; n<nbSeqs; n++) {
-+        const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
-         litLengthSum += seqLen.litLength;
-         matchLengthSum += seqLen.matchLength;
--        sp++;
-     }
--    assert(litLengthSum <= litSize);
--    if (!lastSequence) {
-+    DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
-+                (unsigned)nbSeqs, (const void*)sequences,
-+                (unsigned)litLengthSum, (unsigned)matchLengthSum);
-+    if (!lastSubBlock)
-         assert(litLengthSum == litSize);
--    }
-+    else
-+        assert(litLengthSum <= litSize);
-+    (void)litLengthSum;
-     return matchLengthSum + litSize;
- }
- 
-@@ -156,13 +160,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
-  *  @return : compressed size of sequences section of a sub-block
-  *            Or 0 if it is unable to compress
-  *            Or error code. */
--static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
--                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
--                                              const seqDef* sequences, size_t nbSeq,
--                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
--                                              const ZSTD_CCtx_params* cctxParams,
--                                              void* dst, size_t dstCapacity,
--                                              const int bmi2, int writeEntropy, int* entropyWritten)
-+static size_t
-+ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
-+                                const ZSTD_fseCTablesMetadata_t* fseMetadata,
-+                                const seqDef* sequences, size_t nbSeq,
-+                                const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
-+                                const ZSTD_CCtx_params* cctxParams,
-+                                void* dst, size_t dstCapacity,
-+                                const int bmi2, int writeEntropy, int* entropyWritten)
- {
-     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-     BYTE* const ostart = (BYTE*)dst;
-@@ -176,14 +181,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
-     /* Sequences Header */
-     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
-                     dstSize_tooSmall, "");
--    if (nbSeq < 0x7F)
-+    if (nbSeq < 128)
-         *op++ = (BYTE)nbSeq;
-     else if (nbSeq < LONGNBSEQ)
-         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
-     else
-         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-     if (nbSeq==0) {
--        return op - ostart;
-+        return (size_t)(op - ostart);
-     }
- 
-     /* seqHead : flags for FSE encoding type */
-@@ -205,7 +210,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
-     }
- 
-     {   size_t const bitstreamSize = ZSTD_encodeSequences(
--                                        op, oend - op,
-+                                        op, (size_t)(oend - op),
-                                         fseTables->matchlengthCTable, mlCode,
-                                         fseTables->offcodeCTable, ofCode,
-                                         fseTables->litlengthCTable, llCode,
-@@ -249,7 +254,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
- #endif
- 
-     *entropyWritten = 1;
--    return op - ostart;
-+    return (size_t)(op - ostart);
- }
- 
- /* ZSTD_compressSubBlock() :
-@@ -275,7 +280,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
-                 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
-     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
-                                                         &entropyMetadata->hufMetadata, literals, litSize,
--                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
-+                                                        op, (size_t)(oend-op),
-+                                                        bmi2, writeLitEntropy, litEntropyWritten);
-         FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
-         if (cLitSize == 0) return 0;
-         op += cLitSize;
-@@ -285,18 +291,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
-                                                   sequences, nbSeq,
-                                                   llCode, mlCode, ofCode,
-                                                   cctxParams,
--                                                  op, oend-op,
-+                                                  op, (size_t)(oend-op),
-                                                   bmi2, writeSeqEntropy, seqEntropyWritten);
-         FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
-         if (cSeqSize == 0) return 0;
-         op += cSeqSize;
-     }
-     /* Write block header */
--    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
-+    {   size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
-         U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
-         MEM_writeLE24(ostart, cBlockHeader24);
-     }
--    return op-ostart;
-+    return (size_t)(op-ostart);
- }
- 
- static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
-@@ -385,7 +391,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
-     return cSeqSizeEstimate + sequencesSectionHeaderSize;
- }
- 
--static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
-+typedef struct {
-+    size_t estLitSize;
-+    size_t estBlockSize;
-+} EstimatedBlockSize;
-+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
-                                         const BYTE* ofCodeTable,
-                                         const BYTE* llCodeTable,
-                                         const BYTE* mlCodeTable,
-@@ -393,15 +403,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
-                                         const ZSTD_entropyCTables_t* entropy,
-                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                         void* workspace, size_t wkspSize,
--                                        int writeLitEntropy, int writeSeqEntropy) {
--    size_t cSizeEstimate = 0;
--    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
--                                                         &entropy->huf, &entropyMetadata->hufMetadata,
--                                                         workspace, wkspSize, writeLitEntropy);
--    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
-+                                        int writeLitEntropy, int writeSeqEntropy)
-+{
-+    EstimatedBlockSize ebs;
-+    ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
-+                                                        &entropy->huf, &entropyMetadata->hufMetadata,
-+                                                        workspace, wkspSize, writeLitEntropy);
-+    ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
-                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
-                                                          workspace, wkspSize, writeSeqEntropy);
--    return cSizeEstimate + ZSTD_blockHeaderSize;
-+    ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
-+    return ebs;
- }
- 
- static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
-@@ -415,13 +427,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
-     return 0;
- }
- 
-+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
-+{
-+    size_t n, total = 0;
-+    assert(sp != NULL);
-+    for (n=0; n<seqCount; n++) {
-+        total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
-+    }
-+    DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
-+    return total;
-+}
-+
-+#define BYTESCALE 256
-+
-+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
-+                size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
-+                int firstSubBlock)
-+{
-+    size_t n, budget = 0, inSize=0;
-+    /* entropy headers */
-+    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
-+    assert(firstSubBlock==0 || firstSubBlock==1);
-+    budget += headerSize;
-+
-+    /* first sequence => at least one sequence*/
-+    budget += sp[0].litLength * avgLitCost + avgSeqCost;
-+    if (budget > targetBudget) return 1;
-+    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
-+
-+    /* loop over sequences */
-+    for (n=1; n<nbSeqs; n++) {
-+        size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
-+        budget += currentCost;
-+        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
-+        /* stop when sub-block budget is reached */
-+        if ( (budget > targetBudget)
-+            /* though continue to expand until the sub-block is deemed compressible */
-+          && (budget < inSize * BYTESCALE) )
-+            break;
-+    }
-+
-+    return n;
-+}
-+
- /* ZSTD_compressSubBlock_multi() :
-  *  Breaks super-block into multiple sub-blocks and compresses them.
-- *  Entropy will be written to the first block.
-- *  The following blocks will use repeat mode to compress.
-- *  All sub-blocks are compressed blocks (no raw or rle blocks).
-- *  @return : compressed size of the super block (which is multiple ZSTD blocks)
-- *            Or 0 if it failed to compress. */
-+ *  Entropy will be written into the first block.
-+ *  The following blocks use repeat_mode to compress.
-+ *  Sub-blocks are all compressed, except the last one when beneficial.
-+ *  @return : compressed size of the super block (which features multiple ZSTD blocks)
-+ *            or 0 if it failed to compress. */
- static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
-                             const ZSTD_compressedBlockState_t* prevCBlock,
-                             ZSTD_compressedBlockState_t* nextCBlock,
-@@ -434,10 +489,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
- {
-     const seqDef* const sstart = seqStorePtr->sequencesStart;
-     const seqDef* const send = seqStorePtr->sequences;
--    const seqDef* sp = sstart;
-+    const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
-+    size_t const nbSeqs = (size_t)(send - sstart);
-     const BYTE* const lstart = seqStorePtr->litStart;
-     const BYTE* const lend = seqStorePtr->lit;
-     const BYTE* lp = lstart;
-+    size_t const nbLiterals = (size_t)(lend - lstart);
-     BYTE const* ip = (BYTE const*)src;
-     BYTE const* const iend = ip + srcSize;
-     BYTE* const ostart = (BYTE*)dst;
-@@ -446,112 +503,171 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
-     const BYTE* llCodePtr = seqStorePtr->llCode;
-     const BYTE* mlCodePtr = seqStorePtr->mlCode;
-     const BYTE* ofCodePtr = seqStorePtr->ofCode;
--    size_t targetCBlockSize = cctxParams->targetCBlockSize;
--    size_t litSize, seqCount;
--    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
-+    size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
-+    size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
-+    int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
-     int writeSeqEntropy = 1;
--    int lastSequence = 0;
--
--    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
--                (unsigned)(lend-lp), (unsigned)(send-sstart));
--
--    litSize = 0;
--    seqCount = 0;
--    do {
--        size_t cBlockSizeEstimate = 0;
--        if (sstart == send) {
--            lastSequence = 1;
--        } else {
--            const seqDef* const sequence = sp + seqCount;
--            lastSequence = sequence == send - 1;
--            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
--            seqCount++;
--        }
--        if (lastSequence) {
--            assert(lp <= lend);
--            assert(litSize <= (size_t)(lend - lp));
--            litSize = (size_t)(lend - lp);
-+
-+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
-+               (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
-+
-+        /* let's start by a general estimation for the full block */
-+    if (nbSeqs > 0) {
-+        EstimatedBlockSize const ebs =
-+                ZSTD_estimateSubBlockSize(lp, nbLiterals,
-+                                        ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
-+                                        &nextCBlock->entropy, entropyMetadata,
-+                                        workspace, wkspSize,
-+                                        writeLitEntropy, writeSeqEntropy);
-+        /* quick estimation */
-+        size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
-+        size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
-+        const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
-+        size_t n, avgBlockBudget, blockBudgetSupp=0;
-+        avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
-+        DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
-+                    (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
-+                    (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
-+        /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
-+         * this will result in the production of a single uncompressed block covering @srcSize.*/
-+        if (ebs.estBlockSize > srcSize) return 0;
-+
-+        /* compress and write sub-blocks */
-+        assert(nbSubBlocks>0);
-+        for (n=0; n < nbSubBlocks-1; n++) {
-+            /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
-+            size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
-+                                        avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
-+            /* if reached last sequence : break to last sub-block (simplification) */
-+            assert(seqCount <= (size_t)(send-sp));
-+            if (sp + seqCount == send) break;
-+            assert(seqCount > 0);
-+            /* compress sub-block */
-+            {   int litEntropyWritten = 0;
-+                int seqEntropyWritten = 0;
-+                size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
-+                const size_t decompressedSize =
-+                        ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
-+                size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-+                                                sp, seqCount,
-+                                                lp, litSize,
-+                                                llCodePtr, mlCodePtr, ofCodePtr,
-+                                                cctxParams,
-+                                                op, (size_t)(oend-op),
-+                                                bmi2, writeLitEntropy, writeSeqEntropy,
-+                                                &litEntropyWritten, &seqEntropyWritten,
-+                                                0);
-+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-+
-+                /* check compressibility, update state components */
-+                if (cSize > 0 && cSize < decompressedSize) {
-+                    DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
-+                                (unsigned)decompressedSize, (unsigned)cSize);
-+                    assert(ip + decompressedSize <= iend);
-+                    ip += decompressedSize;
-+                    lp += litSize;
-+                    op += cSize;
-+                    llCodePtr += seqCount;
-+                    mlCodePtr += seqCount;
-+                    ofCodePtr += seqCount;
-+                    /* Entropy only needs to be written once */
-+                    if (litEntropyWritten) {
-+                        writeLitEntropy = 0;
-+                    }
-+                    if (seqEntropyWritten) {
-+                        writeSeqEntropy = 0;
-+                    }
-+                    sp += seqCount;
-+                    blockBudgetSupp = 0;
-+            }   }
-+            /* otherwise : do not compress yet, coalesce current sub-block with following one */
-         }
--        /* I think there is an optimization opportunity here.
--         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
--         * since it recalculates estimate from scratch.
--         * For example, it would recount literal distribution and symbol codes every time.
--         */
--        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
--                                                       &nextCBlock->entropy, entropyMetadata,
--                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
--        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
--            int litEntropyWritten = 0;
--            int seqEntropyWritten = 0;
--            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
--            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
--                                                       sp, seqCount,
--                                                       lp, litSize,
--                                                       llCodePtr, mlCodePtr, ofCodePtr,
--                                                       cctxParams,
--                                                       op, oend-op,
--                                                       bmi2, writeLitEntropy, writeSeqEntropy,
--                                                       &litEntropyWritten, &seqEntropyWritten,
--                                                       lastBlock && lastSequence);
--            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
--            if (cSize > 0 && cSize < decompressedSize) {
--                DEBUGLOG(5, "Committed the sub-block");
--                assert(ip + decompressedSize <= iend);
--                ip += decompressedSize;
--                sp += seqCount;
--                lp += litSize;
--                op += cSize;
--                llCodePtr += seqCount;
--                mlCodePtr += seqCount;
--                ofCodePtr += seqCount;
--                litSize = 0;
--                seqCount = 0;
--                /* Entropy only needs to be written once */
--                if (litEntropyWritten) {
--                    writeLitEntropy = 0;
--                }
--                if (seqEntropyWritten) {
--                    writeSeqEntropy = 0;
--                }
-+    } /* if (nbSeqs > 0) */
-+
-+    /* write last block */
-+    DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
-+    {   int litEntropyWritten = 0;
-+        int seqEntropyWritten = 0;
-+        size_t litSize = (size_t)(lend - lp);
-+        size_t seqCount = (size_t)(send - sp);
-+        const size_t decompressedSize =
-+                ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
-+        size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-+                                            sp, seqCount,
-+                                            lp, litSize,
-+                                            llCodePtr, mlCodePtr, ofCodePtr,
-+                                            cctxParams,
-+                                            op, (size_t)(oend-op),
-+                                            bmi2, writeLitEntropy, writeSeqEntropy,
-+                                            &litEntropyWritten, &seqEntropyWritten,
-+                                            lastBlock);
-+        FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-+
-+        /* update pointers, the nb of literals borrowed from next sequence must be preserved */
-+        if (cSize > 0 && cSize < decompressedSize) {
-+            DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
-+                        (unsigned)decompressedSize, (unsigned)cSize);
-+            assert(ip + decompressedSize <= iend);
-+            ip += decompressedSize;
-+            lp += litSize;
-+            op += cSize;
-+            llCodePtr += seqCount;
-+            mlCodePtr += seqCount;
-+            ofCodePtr += seqCount;
-+            /* Entropy only needs to be written once */
-+            if (litEntropyWritten) {
-+                writeLitEntropy = 0;
-             }
-+            if (seqEntropyWritten) {
-+                writeSeqEntropy = 0;
-+            }
-+            sp += seqCount;
-         }
--    } while (!lastSequence);
-+    }
-+
-+
-     if (writeLitEntropy) {
--        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
-+        DEBUGLOG(5, "Literal entropy tables were never written");
-         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
-     }
-     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
-         /* If we haven't written our entropy tables, then we've violated our contract and
-          * must emit an uncompressed block.
-          */
--        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
-+        DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
-         return 0;
-     }
-+
-     if (ip < iend) {
--        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
--        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
-+        /* some data left : last part of the block sent uncompressed */
-+        size_t const rSize = (size_t)((iend - ip));
-+        size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
-+        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
-         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-         assert(cSize != 0);
-         op += cSize;
-         /* We have to regenerate the repcodes because we've skipped some sequences */
-         if (sp < send) {
--            seqDef const* seq;
-+            const seqDef* seq;
-             repcodes_t rep;
-             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
-             for (seq = sstart; seq < sp; ++seq) {
--                ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
-+                ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
-             }
-             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
-         }
-     }
--    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
--    return op-ostart;
-+
-+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
-+                (unsigned)(op-ostart));
-+    return (size_t)(op-ostart);
- }
- 
- size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
-                                void* dst, size_t dstCapacity,
--                               void const* src, size_t srcSize,
--                               unsigned lastBlock) {
-+                               const void* src, size_t srcSize,
-+                               unsigned lastBlock)
-+{
-     ZSTD_entropyCTablesMetadata_t entropyMetadata;
- 
-     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
-diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h
-index 224ece79546e..826bbc9e029b 100644
---- a/lib/zstd/compress/zstd_compress_superblock.h
-+++ b/lib/zstd/compress/zstd_compress_superblock.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
-index 349fc923c355..86bc3c2c23c7 100644
---- a/lib/zstd/compress/zstd_cwksp.h
-+++ b/lib/zstd/compress/zstd_cwksp.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -14,7 +15,9 @@
- /*-*************************************
- *  Dependencies
- ***************************************/
-+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
- #include "../common/zstd_internal.h"
-+#include "../common/portability_macros.h"
- 
- 
- /*-*************************************
-@@ -41,8 +44,9 @@
- ***************************************/
- typedef enum {
-     ZSTD_cwksp_alloc_objects,
--    ZSTD_cwksp_alloc_buffers,
--    ZSTD_cwksp_alloc_aligned
-+    ZSTD_cwksp_alloc_aligned_init_once,
-+    ZSTD_cwksp_alloc_aligned,
-+    ZSTD_cwksp_alloc_buffers
- } ZSTD_cwksp_alloc_phase_e;
- 
- /*
-@@ -95,8 +99,8 @@ typedef enum {
-  *
-  * Workspace Layout:
-  *
-- * [                        ... workspace ...                         ]
-- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
-+ * [                        ... workspace ...                           ]
-+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
-  *
-  * The various objects that live in the workspace are divided into the
-  * following categories, and are allocated separately:
-@@ -120,9 +124,18 @@ typedef enum {
-  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
-  *   Their sizes depend on the cparams. These tables are 64-byte aligned.
-  *
-- * - Aligned: these buffers are used for various purposes that require 4 byte
-- *   alignment, but don't require any initialization before they're used. These
-- *   buffers are each aligned to 64 bytes.
-+ * - Init once: these buffers require to be initialized at least once before
-+ *   use. They should be used when we want to skip memory initialization
-+ *   while not triggering memory checkers (like Valgrind) when reading from
-+ *   from this memory without writing to it first.
-+ *   These buffers should be used carefully as they might contain data
-+ *   from previous compressions.
-+ *   Buffers are aligned to 64 bytes.
-+ *
-+ * - Aligned: these buffers don't require any initialization before they're
-+ *   used. The user of the buffer should make sure they write into a buffer
-+ *   location before reading from it.
-+ *   Buffers are aligned to 64 bytes.
-  *
-  * - Buffers: these buffers are used for various purposes that don't require
-  *   any alignment or initialization before they're used. This means they can
-@@ -134,8 +147,9 @@ typedef enum {
-  * correctly packed into the workspace buffer. That order is:
-  *
-  * 1. Objects
-- * 2. Buffers
-- * 3. Aligned/Tables
-+ * 2. Init once / Tables
-+ * 3. Aligned / Tables
-+ * 4. Buffers / Tables
-  *
-  * Attempts to reserve objects of different types out of order will fail.
-  */
-@@ -147,6 +161,7 @@ typedef struct {
-     void* tableEnd;
-     void* tableValidEnd;
-     void* allocStart;
-+    void* initOnceStart;
- 
-     BYTE allocFailed;
-     int workspaceOversizedDuration;
-@@ -159,6 +174,7 @@ typedef struct {
- ***************************************/
- 
- MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
-+MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
- 
- MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
-     (void)ws;
-@@ -168,6 +184,8 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
-     assert(ws->tableEnd <= ws->allocStart);
-     assert(ws->tableValidEnd <= ws->allocStart);
-     assert(ws->allocStart <= ws->workspaceEnd);
-+    assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
-+    assert(ws->workspace <= ws->initOnceStart);
- }
- 
- /*
-@@ -210,14 +228,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
-  * for internal purposes (currently only alignment).
-  */
- MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
--    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
--     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
--     * to align the beginning of the aligned section.
--     *
--     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
--     * aligneds being sized in multiples of 64 bytes.
-+    /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
-+     * bytes to align the beginning of tables section and end of buffers;
-      */
--    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
-+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
-     return slackSpace;
- }
- 
-@@ -230,10 +244,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
-     size_t const alignBytesMask = alignBytes - 1;
-     size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
-     assert((alignBytes & alignBytesMask) == 0);
--    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
-+    assert(bytes < alignBytes);
-     return bytes;
- }
- 
-+/*
-+ * Returns the initial value for allocStart which is used to determine the position from
-+ * which we can allocate from the end of the workspace.
-+ */
-+MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
-+    return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
-+}
-+
- /*
-  * Internal function. Do not use directly.
-  * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
-@@ -274,27 +296,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
- {
-     assert(phase >= ws->phase);
-     if (phase > ws->phase) {
--        /* Going from allocating objects to allocating buffers */
--        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
--                phase >= ZSTD_cwksp_alloc_buffers) {
-+        /* Going from allocating objects to allocating initOnce / tables */
-+        if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
-+            phase >= ZSTD_cwksp_alloc_aligned_init_once) {
-             ws->tableValidEnd = ws->objectEnd;
--        }
-+            ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
- 
--        /* Going from allocating buffers to allocating aligneds/tables */
--        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
--                phase >= ZSTD_cwksp_alloc_aligned) {
--            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
--                size_t const bytesToAlign =
--                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
--                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
--                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
--                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
--                                memory_allocation, "aligned phase - alignment initial allocation failed!");
--            }
-             {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
--                void* const alloc = ws->objectEnd;
-+                void *const alloc = ws->objectEnd;
-                 size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
--                void* const objectEnd = (BYTE*)alloc + bytesToAlign;
-+                void *const objectEnd = (BYTE *) alloc + bytesToAlign;
-                 DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
-                 RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
-                                 "table phase - alignment initial allocation failed!");
-@@ -302,7 +313,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
-                 ws->tableEnd = objectEnd;  /* table area starts being empty */
-                 if (ws->tableValidEnd < ws->tableEnd) {
-                     ws->tableValidEnd = ws->tableEnd;
--        }   }   }
-+                }
-+            }
-+        }
-         ws->phase = phase;
-         ZSTD_cwksp_assert_internal_consistency(ws);
-     }
-@@ -314,7 +327,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
-  */
- MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
- {
--    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
-+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
- }
- 
- /*
-@@ -343,6 +356,33 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
-     return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
- }
- 
-+/*
-+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
-+ * This memory has been initialized at least once in the past.
-+ * This doesn't mean it has been initialized this time, and it might contain data from previous
-+ * operations.
-+ * The main usage is for algorithms that might need read access into uninitialized memory.
-+ * The algorithm must maintain safety under these conditions and must make sure it doesn't
-+ * leak any of the past data (directly or in side channels).
-+ */
-+MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
-+{
-+    size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
-+    void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
-+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
-+    if(ptr && ptr < ws->initOnceStart) {
-+        /* We assume the memory following the current allocation is either:
-+         * 1. Not usable as initOnce memory (end of workspace)
-+         * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
-+         * 3. An ASAN redzone, in which case we don't want to write on it
-+         * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
-+         * Note that we assume here that MSAN and ASAN cannot run in the same time. */
-+        ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
-+        ws->initOnceStart = ptr;
-+    }
-+    return ptr;
-+}
-+
- /*
-  * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
-  */
-@@ -356,18 +396,22 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
- 
- /*
-  * Aligned on 64 bytes. These buffers have the special property that
-- * their values remain constrained, allowing us to re-use them without
-+ * their values remain constrained, allowing us to reuse them without
-  * memset()-ing them.
-  */
- MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
- {
--    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
-     void* alloc;
-     void* end;
-     void* top;
- 
--    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
--        return NULL;
-+    /* We can only start allocating tables after we are done reserving space for objects at the
-+     * start of the workspace */
-+    if(ws->phase < phase) {
-+        if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
-+            return NULL;
-+        }
-     }
-     alloc = ws->tableEnd;
-     end = (BYTE *)alloc + bytes;
-@@ -451,7 +495,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
-     assert(ws->tableValidEnd >= ws->objectEnd);
-     assert(ws->tableValidEnd <= ws->allocStart);
-     if (ws->tableValidEnd < ws->tableEnd) {
--        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
-+        ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
-     }
-     ZSTD_cwksp_mark_tables_clean(ws);
- }
-@@ -478,14 +522,23 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
- 
- 
-     ws->tableEnd = ws->objectEnd;
--    ws->allocStart = ws->workspaceEnd;
-+    ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
-     ws->allocFailed = 0;
--    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
--        ws->phase = ZSTD_cwksp_alloc_buffers;
-+    if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
-+        ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
-     }
-     ZSTD_cwksp_assert_internal_consistency(ws);
- }
- 
-+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
-+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
-+}
-+
-+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
-+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
-+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
-+}
-+
- /*
-  * The provided workspace takes ownership of the buffer [start, start+size).
-  * Any existing values in the workspace are ignored (the previously managed
-@@ -498,6 +551,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
-     ws->workspaceEnd = (BYTE*)start + size;
-     ws->objectEnd = ws->workspace;
-     ws->tableValidEnd = ws->objectEnd;
-+    ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
-     ws->phase = ZSTD_cwksp_alloc_objects;
-     ws->isStatic = isStatic;
-     ZSTD_cwksp_clear(ws);
-@@ -529,15 +583,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
-     ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
- }
- 
--MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
--    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
--}
--
--MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
--    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
--         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
--}
--
- MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
-     return ws->allocFailed;
- }
-@@ -550,17 +595,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
-  * Returns if the estimated space needed for a wksp is within an acceptable limit of the
-  * actual amount of space used.
-  */
--MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
--                                                        size_t const estimatedSpace, int resizedWorkspace) {
--    if (resizedWorkspace) {
--        /* Resized/newly allocated wksp should have exact bounds */
--        return ZSTD_cwksp_used(ws) == estimatedSpace;
--    } else {
--        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
--         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
--         */
--        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
--    }
-+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
-+    /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
-+     * the alignment bytes difference between estimation and actual usage */
-+    return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
-+           ZSTD_cwksp_used(ws) <= estimatedSpace;
- }
- 
- 
-diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
-index 76933dea2624..5ff54f17d92f 100644
---- a/lib/zstd/compress/zstd_double_fast.c
-+++ b/lib/zstd/compress/zstd_double_fast.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -11,8 +12,49 @@
- #include "zstd_compress_internal.h"
- #include "zstd_double_fast.h"
- 
-+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
- 
--void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
-+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
-+{
-+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-+    U32* const hashLarge = ms->hashTable;
-+    U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
-+    U32  const mls = cParams->minMatch;
-+    U32* const hashSmall = ms->chainTable;
-+    U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
-+    const BYTE* const base = ms->window.base;
-+    const BYTE* ip = base + ms->nextToUpdate;
-+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
-+    const U32 fastHashFillStep = 3;
-+
-+    /* Always insert every fastHashFillStep position into the hash tables.
-+     * Insert the other positions into the large hash table if their entry
-+     * is empty.
-+     */
-+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
-+        U32 const curr = (U32)(ip - base);
-+        U32 i;
-+        for (i = 0; i < fastHashFillStep; ++i) {
-+            size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
-+            size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
-+            if (i == 0) {
-+                ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
-+            }
-+            if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
-+                ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
-+            }
-+            /* Only load extra positions for ZSTD_dtlm_full */
-+            if (dtlm == ZSTD_dtlm_fast)
-+                break;
-+    }   }
-+}
-+
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
-                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
- {
-     const ZSTD_compressionParameters* const cParams = &ms->cParams;
-@@ -43,11 +85,24 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
-             /* Only load extra positions for ZSTD_dtlm_full */
-             if (dtlm == ZSTD_dtlm_fast)
-                 break;
--    }   }
-+        }   }
-+}
-+
-+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
-+                        const void* const end,
-+                        ZSTD_dictTableLoadMethod_e dtlm,
-+                        ZSTD_tableFillPurpose_e tfp)
-+{
-+    if (tfp == ZSTD_tfp_forCDict) {
-+        ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
-+    } else {
-+        ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
-+    }
- }
- 
- 
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize, U32 const mls /* template */)
-@@ -67,7 +122,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-     const BYTE* const iend = istart + srcSize;
-     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-     U32 offset_1=rep[0], offset_2=rep[1];
--    U32 offsetSaved = 0;
-+    U32 offsetSaved1 = 0, offsetSaved2 = 0;
- 
-     size_t mLength;
-     U32 offset;
-@@ -100,8 +155,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-         U32 const current = (U32)(ip - base);
-         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
-         U32 const maxRep = current - windowLow;
--        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
--        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
-+        if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
-+        if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
-     }
- 
-     /* Outer Loop: one iteration per match found and stored */
-@@ -131,7 +186,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
-                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
-                 ip++;
--                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
-                 goto _match_stored;
-             }
- 
-@@ -175,9 +230,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-         } while (ip1 <= ilimit);
- 
- _cleanup:
-+        /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
-+         * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
-+        offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
-+
-         /* save reps for next block */
--        rep[0] = offset_1 ? offset_1 : offsetSaved;
--        rep[1] = offset_2 ? offset_2 : offsetSaved;
-+        rep[0] = offset_1 ? offset_1 : offsetSaved1;
-+        rep[1] = offset_2 ? offset_2 : offsetSaved2;
- 
-         /* Return the last literals size */
-         return (size_t)(iend - anchor);
-@@ -217,7 +276,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-             hashLong[hl1] = (U32)(ip1 - base);
-         }
- 
--        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
- 
- _match_stored:
-         /* match found */
-@@ -243,7 +302,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
-                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
-                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
--                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
-+                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
-                 ip += rLength;
-                 anchor = ip;
-                 continue;   /* faster when present ... (?) */
-@@ -254,6 +313,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
- 
- 
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize,
-@@ -275,7 +335,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-     const BYTE* const iend = istart + srcSize;
-     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-     U32 offset_1=rep[0], offset_2=rep[1];
--    U32 offsetSaved = 0;
- 
-     const ZSTD_matchState_t* const dms = ms->dictMatchState;
-     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
-@@ -286,8 +345,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-     const BYTE* const dictStart    = dictBase + dictStartIndex;
-     const BYTE* const dictEnd      = dms->window.nextSrc;
-     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
--    const U32 dictHBitsL           = dictCParams->hashLog;
--    const U32 dictHBitsS           = dictCParams->chainLog;
-+    const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
-+    const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
-     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
- 
-     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
-@@ -295,6 +354,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-     /* if a dictionary is attached, it must be within window range */
-     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
- 
-+    if (ms->prefetchCDictTables) {
-+        size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
-+        size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
-+        PREFETCH_AREA(dictHashLong, hashTableBytes);
-+        PREFETCH_AREA(dictHashSmall, chainTableBytes);
-+    }
-+
-     /* init */
-     ip += (dictAndPrefixLength == 0);
- 
-@@ -309,8 +375,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-         U32 offset;
-         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
-         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
--        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
--        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
-+        size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
-+        size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
-+        U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
-+        U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
-+        int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
-+        int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
-         U32 const curr = (U32)(ip-base);
-         U32 const matchIndexL = hashLong[h2];
-         U32 matchIndexS = hashSmall[h];
-@@ -328,7 +398,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-             ip++;
--            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
-             goto _match_stored;
-         }
- 
-@@ -340,9 +410,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
-                 goto _match_found;
-             }
--        } else {
-+        } else if (dictTagsMatchL) {
-             /* check dictMatchState long match */
--            U32 const dictMatchIndexL = dictHashLong[dictHL];
-+            U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
-             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
-             assert(dictMatchL < dictEnd);
- 
-@@ -358,9 +428,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-             if (MEM_read32(match) == MEM_read32(ip)) {
-                 goto _search_next_long;
-             }
--        } else {
-+        } else if (dictTagsMatchS) {
-             /* check dictMatchState short match */
--            U32 const dictMatchIndexS = dictHashSmall[dictHS];
-+            U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
-             match = dictBase + dictMatchIndexS;
-             matchIndexS = dictMatchIndexS + dictIndexDelta;
- 
-@@ -375,10 +445,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-         continue;
- 
- _search_next_long:
--
-         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
--            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
-+            size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
-             U32 const matchIndexL3 = hashLong[hl3];
-+            U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
-+            int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
-             const BYTE* matchL3 = base + matchIndexL3;
-             hashLong[hl3] = curr + 1;
- 
-@@ -391,9 +462,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
-                     goto _match_found;
-                 }
--            } else {
-+            } else if (dictTagsMatchL3) {
-                 /* check dict long +1 match */
--                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
-+                U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
-                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
-                 assert(dictMatchL3 < dictEnd);
-                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
-@@ -419,7 +490,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-         offset_2 = offset_1;
-         offset_1 = offset;
- 
--        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
- 
- _match_stored:
-         /* match found */
-@@ -448,7 +519,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
-                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
-                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
--                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
-+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
-                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
-                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
-                     ip += repLength2;
-@@ -461,8 +532,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-     }   /* while (ip < ilimit) */
- 
-     /* save reps for next block */
--    rep[0] = offset_1 ? offset_1 : offsetSaved;
--    rep[1] = offset_2 ? offset_2 : offsetSaved;
-+    rep[0] = offset_1;
-+    rep[1] = offset_2;
- 
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
-@@ -527,7 +598,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
- }
- 
- 
--static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize,
-         U32 const mls /* template */)
-@@ -585,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
-             ip++;
--            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
-         } else {
-             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
-                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
-@@ -596,7 +669,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
-                 offset_2 = offset_1;
-                 offset_1 = offset;
--                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
- 
-             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
-                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
-@@ -621,7 +694,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-                 }
-                 offset_2 = offset_1;
-                 offset_1 = offset;
--                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
- 
-             } else {
-                 ip += ((ip-anchor) >> kSearchStrength) + 1;
-@@ -653,7 +726,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
--                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
-+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
-                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
-                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
-                     ip += repLength2;
-@@ -694,3 +767,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
-         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
-     }
- }
-+
-+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
-diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h
-index 6822bde65a1d..b7ddc714f13e 100644
---- a/lib/zstd/compress/zstd_double_fast.h
-+++ b/lib/zstd/compress/zstd_double_fast.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -15,8 +16,12 @@
- #include "../common/mem.h"      /* U32 */
- #include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
- 
-+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
-+
- void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
--                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
-+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm,
-+                              ZSTD_tableFillPurpose_e tfp);
-+
- size_t ZSTD_compressBlock_doubleFast(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
-@@ -27,6 +32,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
- 
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
-+#else
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
-+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
- 
- 
- #endif /* ZSTD_DOUBLE_FAST_H */
-diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
-index a752e6beab52..b7a63ba4ce56 100644
---- a/lib/zstd/compress/zstd_fast.c
-+++ b/lib/zstd/compress/zstd_fast.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -11,8 +12,46 @@
- #include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
- #include "zstd_fast.h"
- 
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
-+                        const void* const end,
-+                        ZSTD_dictTableLoadMethod_e dtlm)
-+{
-+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-+    U32* const hashTable = ms->hashTable;
-+    U32  const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
-+    U32  const mls = cParams->minMatch;
-+    const BYTE* const base = ms->window.base;
-+    const BYTE* ip = base + ms->nextToUpdate;
-+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
-+    const U32 fastHashFillStep = 3;
- 
--void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-+    /* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
-+     * Feel free to remove this assert if there's a good reason! */
-+    assert(dtlm == ZSTD_dtlm_full);
-+
-+    /* Always insert every fastHashFillStep position into the hash table.
-+     * Insert the other positions if their hash entry is empty.
-+     */
-+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
-+        U32 const curr = (U32)(ip - base);
-+        {   size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
-+            ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr);   }
-+
-+        if (dtlm == ZSTD_dtlm_fast) continue;
-+        /* Only load extra positions for ZSTD_dtlm_full */
-+        {   U32 p;
-+            for (p = 1; p < fastHashFillStep; ++p) {
-+                size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
-+                if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {  /* not yet filled */
-+                    ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
-+                }   }   }   }
-+}
-+
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
-                         const void* const end,
-                         ZSTD_dictTableLoadMethod_e dtlm)
- {
-@@ -25,6 +64,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
-     const U32 fastHashFillStep = 3;
- 
-+    /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
-+     * Feel free to remove this assert if there's a good reason! */
-+    assert(dtlm == ZSTD_dtlm_fast);
-+
-     /* Always insert every fastHashFillStep position into the hash table.
-      * Insert the other positions if their hash entry is empty.
-      */
-@@ -42,6 +85,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-     }   }   }   }
- }
- 
-+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-+                        const void* const end,
-+                        ZSTD_dictTableLoadMethod_e dtlm,
-+                        ZSTD_tableFillPurpose_e tfp)
-+{
-+    if (tfp == ZSTD_tfp_forCDict) {
-+        ZSTD_fillHashTableForCDict(ms, end, dtlm);
-+    } else {
-+        ZSTD_fillHashTableForCCtx(ms, end, dtlm);
-+    }
-+}
-+
- 
- /*
-  * If you squint hard enough (and ignore repcodes), the search operation at any
-@@ -89,8 +144,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-  *
-  * This is also the work we do at the beginning to enter the loop initially.
-  */
--FORCE_INLINE_TEMPLATE size_t
--ZSTD_compressBlock_fast_noDict_generic(
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_compressBlock_fast_noDict_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize,
-         U32 const mls, U32 const hasStep)
-@@ -117,7 +173,7 @@ ZSTD_compressBlock_fast_noDict_generic(
- 
-     U32 rep_offset1 = rep[0];
-     U32 rep_offset2 = rep[1];
--    U32 offsetSaved = 0;
-+    U32 offsetSaved1 = 0, offsetSaved2 = 0;
- 
-     size_t hash0; /* hash for ip0 */
-     size_t hash1; /* hash for ip1 */
-@@ -141,8 +197,8 @@ ZSTD_compressBlock_fast_noDict_generic(
-     {   U32 const curr = (U32)(ip0 - base);
-         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
-         U32 const maxRep = curr - windowLow;
--        if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
--        if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
-+        if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
-+        if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
-     }
- 
-     /* start each op */
-@@ -180,8 +236,14 @@ ZSTD_compressBlock_fast_noDict_generic(
-             mLength = ip0[-1] == match0[-1];
-             ip0 -= mLength;
-             match0 -= mLength;
--            offcode = STORE_REPCODE_1;
-+            offcode = REPCODE1_TO_OFFBASE;
-             mLength += 4;
-+
-+            /* First write next hash table entry; we've already calculated it.
-+             * This write is known to be safe because the ip1 is before the
-+             * repcode (ip2). */
-+            hashTable[hash1] = (U32)(ip1 - base);
-+
-             goto _match;
-         }
- 
-@@ -195,6 +257,12 @@ ZSTD_compressBlock_fast_noDict_generic(
-         /* check match at ip[0] */
-         if (MEM_read32(ip0) == mval) {
-             /* found a match! */
-+
-+            /* First write next hash table entry; we've already calculated it.
-+             * This write is known to be safe because the ip1 == ip0 + 1, so
-+             * we know we will resume searching after ip1 */
-+            hashTable[hash1] = (U32)(ip1 - base);
-+
-             goto _offset;
-         }
- 
-@@ -224,6 +292,21 @@ ZSTD_compressBlock_fast_noDict_generic(
-         /* check match at ip[0] */
-         if (MEM_read32(ip0) == mval) {
-             /* found a match! */
-+
-+            /* first write next hash table entry; we've already calculated it */
-+            if (step <= 4) {
-+                /* We need to avoid writing an index into the hash table >= the
-+                 * position at which we will pick up our searching after we've
-+                 * taken this match.
-+                 *
-+                 * The minimum possible match has length 4, so the earliest ip0
-+                 * can be after we take this match will be the current ip0 + 4.
-+                 * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
-+                 * write this position.
-+                 */
-+                hashTable[hash1] = (U32)(ip1 - base);
-+            }
-+
-             goto _offset;
-         }
- 
-@@ -254,9 +337,24 @@ ZSTD_compressBlock_fast_noDict_generic(
-      * However, it seems to be a meaningful performance hit to try to search
-      * them. So let's not. */
- 
-+    /* When the repcodes are outside of the prefix, we set them to zero before the loop.
-+     * When the offsets are still zero, we need to restore them after the block to have a correct
-+     * repcode history. If only one offset was invalid, it is easy. The tricky case is when both
-+     * offsets were invalid. We need to figure out which offset to refill with.
-+     *     - If both offsets are zero they are in the same order.
-+     *     - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
-+     *     - If only one is zero, we need to decide which offset to restore.
-+     *         - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
-+     *         - It is impossible for rep_offset2 to be non-zero.
-+     *
-+     * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
-+     * set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
-+     */
-+    offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
-+
-     /* save reps for next block */
--    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
--    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
-+    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
-+    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
- 
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
-@@ -267,7 +365,7 @@ ZSTD_compressBlock_fast_noDict_generic(
-     match0 = base + idx;
-     rep_offset2 = rep_offset1;
-     rep_offset1 = (U32)(ip0-match0);
--    offcode = STORE_OFFSET(rep_offset1);
-+    offcode = OFFSET_TO_OFFBASE(rep_offset1);
-     mLength = 4;
- 
-     /* Count the backwards match length. */
-@@ -287,11 +385,6 @@ ZSTD_compressBlock_fast_noDict_generic(
-     ip0 += mLength;
-     anchor = ip0;
- 
--    /* write next hash table entry */
--    if (ip1 < ip0) {
--        hashTable[hash1] = (U32)(ip1 - base);
--    }
--
-     /* Fill table and check for immediate repcode. */
-     if (ip0 <= ilimit) {
-         /* Fill Table */
-@@ -306,7 +399,7 @@ ZSTD_compressBlock_fast_noDict_generic(
-                 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
-                 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
-                 ip0 += rLength;
--                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
-+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
-                 anchor = ip0;
-                 continue;   /* faster when present (confirmed on gcc-8) ... (?) */
-     }   }   }
-@@ -369,6 +462,7 @@ size_t ZSTD_compressBlock_fast(
- }
- 
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
-@@ -380,14 +474,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
-     const BYTE* const base = ms->window.base;
-     const BYTE* const istart = (const BYTE*)src;
--    const BYTE* ip = istart;
-+    const BYTE* ip0 = istart;
-+    const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
-     const BYTE* anchor = istart;
-     const U32   prefixStartIndex = ms->window.dictLimit;
-     const BYTE* const prefixStart = base + prefixStartIndex;
-     const BYTE* const iend = istart + srcSize;
-     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-     U32 offset_1=rep[0], offset_2=rep[1];
--    U32 offsetSaved = 0;
- 
-     const ZSTD_matchState_t* const dms = ms->dictMatchState;
-     const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
-@@ -397,13 +491,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-     const BYTE* const dictStart    = dictBase + dictStartIndex;
-     const BYTE* const dictEnd      = dms->window.nextSrc;
-     const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
--    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
--    const U32 dictHLog             = dictCParams->hashLog;
-+    const U32 dictAndPrefixLength  = (U32)(istart - prefixStart + dictEnd - dictStart);
-+    const U32 dictHBits            = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
- 
-     /* if a dictionary is still attached, it necessarily means that
-      * it is within window size. So we just check it. */
-     const U32 maxDistance = 1U << cParams->windowLog;
--    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
-+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
-     assert(endIndex - prefixStartIndex <= maxDistance);
-     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
- 
-@@ -413,106 +507,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-      * when translating a dict index into a local index */
-     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
- 
-+    if (ms->prefetchCDictTables) {
-+        size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
-+        PREFETCH_AREA(dictHashTable, hashTableBytes);
-+    }
-+
-     /* init */
-     DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
--    ip += (dictAndPrefixLength == 0);
-+    ip0 += (dictAndPrefixLength == 0);
-     /* dictMatchState repCode checks don't currently handle repCode == 0
-      * disabling. */
-     assert(offset_1 <= dictAndPrefixLength);
-     assert(offset_2 <= dictAndPrefixLength);
- 
--    /* Main Search Loop */
--    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
-+    /* Outer search loop */
-+    assert(stepSize >= 1);
-+    while (ip1 <= ilimit) {   /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
-         size_t mLength;
--        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
--        U32 const curr = (U32)(ip-base);
--        U32 const matchIndex = hashTable[h];
--        const BYTE* match = base + matchIndex;
--        const U32 repIndex = curr + 1 - offset_1;
--        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
--                               dictBase + (repIndex - dictIndexDelta) :
--                               base + repIndex;
--        hashTable[h] = curr;   /* update hash table */
--
--        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
--          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
--            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
--            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
--            ip++;
--            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
--        } else if ( (matchIndex <= prefixStartIndex) ) {
--            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
--            U32 const dictMatchIndex = dictHashTable[dictHash];
--            const BYTE* dictMatch = dictBase + dictMatchIndex;
--            if (dictMatchIndex <= dictStartIndex ||
--                MEM_read32(dictMatch) != MEM_read32(ip)) {
--                assert(stepSize >= 1);
--                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
--                continue;
--            } else {
--                /* found a dict match */
--                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
--                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
--                while (((ip>anchor) & (dictMatch>dictStart))
--                     && (ip[-1] == dictMatch[-1])) {
--                    ip--; dictMatch--; mLength++;
-+        size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
-+
-+        size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
-+        U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
-+        int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
-+
-+        U32 matchIndex = hashTable[hash0];
-+        U32 curr = (U32)(ip0 - base);
-+        size_t step = stepSize;
-+        const size_t kStepIncr = 1 << kSearchStrength;
-+        const BYTE* nextStep = ip0 + kStepIncr;
-+
-+        /* Inner search loop */
-+        while (1) {
-+            const BYTE* match = base + matchIndex;
-+            const U32 repIndex = curr + 1 - offset_1;
-+            const BYTE* repMatch = (repIndex < prefixStartIndex) ?
-+                                   dictBase + (repIndex - dictIndexDelta) :
-+                                   base + repIndex;
-+            const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
-+            size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
-+            hashTable[hash0] = curr;   /* update hash table */
-+
-+            if (((U32) ((prefixStartIndex - 1) - repIndex) >=
-+                 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
-+                && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
-+                const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-+                mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
-+                ip0++;
-+                ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
-+                break;
-+            }
-+
-+            if (dictTagsMatch) {
-+                /* Found a possible dict match */
-+                const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
-+                const BYTE* dictMatch = dictBase + dictMatchIndex;
-+                if (dictMatchIndex > dictStartIndex &&
-+                    MEM_read32(dictMatch) == MEM_read32(ip0)) {
-+                    /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
-+                    if (matchIndex <= prefixStartIndex) {
-+                        U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
-+                        mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
-+                        while (((ip0 > anchor) & (dictMatch > dictStart))
-+                            && (ip0[-1] == dictMatch[-1])) {
-+                            ip0--;
-+                            dictMatch--;
-+                            mLength++;
-+                        } /* catch up */
-+                        offset_2 = offset_1;
-+                        offset_1 = offset;
-+                        ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
-+                        break;
-+                    }
-+                }
-+            }
-+
-+            if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
-+                /* found a regular match */
-+                U32 const offset = (U32) (ip0 - match);
-+                mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
-+                while (((ip0 > anchor) & (match > prefixStart))
-+                       && (ip0[-1] == match[-1])) {
-+                    ip0--;
-+                    match--;
-+                    mLength++;
-                 } /* catch up */
-                 offset_2 = offset_1;
-                 offset_1 = offset;
--                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-+                ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
-+                break;
-             }
--        } else if (MEM_read32(match) != MEM_read32(ip)) {
--            /* it's not a match, and we're not going to check the dictionary */
--            assert(stepSize >= 1);
--            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
--            continue;
--        } else {
--            /* found a regular match */
--            U32 const offset = (U32)(ip-match);
--            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
--            while (((ip>anchor) & (match>prefixStart))
--                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
--            offset_2 = offset_1;
--            offset_1 = offset;
--            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
--        }
-+
-+            /* Prepare for next iteration */
-+            dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
-+            dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
-+            matchIndex = hashTable[hash1];
-+
-+            if (ip1 >= nextStep) {
-+                step++;
-+                nextStep += kStepIncr;
-+            }
-+            ip0 = ip1;
-+            ip1 = ip1 + step;
-+            if (ip1 > ilimit) goto _cleanup;
-+
-+            curr = (U32)(ip0 - base);
-+            hash0 = hash1;
-+        }   /* end inner search loop */
- 
-         /* match found */
--        ip += mLength;
--        anchor = ip;
-+        assert(mLength);
-+        ip0 += mLength;
-+        anchor = ip0;
- 
--        if (ip <= ilimit) {
-+        if (ip0 <= ilimit) {
-             /* Fill Table */
-             assert(base+curr+2 > istart);  /* check base overflow */
-             hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
--            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
-+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
- 
-             /* check immediate repcode */
--            while (ip <= ilimit) {
--                U32 const current2 = (U32)(ip-base);
-+            while (ip0 <= ilimit) {
-+                U32 const current2 = (U32)(ip0-base);
-                 U32 const repIndex2 = current2 - offset_2;
-                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
-                         dictBase - dictIndexDelta + repIndex2 :
-                         base + repIndex2;
-                 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
--                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-+                   && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
-                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
--                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-+                    size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
--                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
--                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
--                    ip += repLength2;
--                    anchor = ip;
-+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
-+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
-+                    ip0 += repLength2;
-+                    anchor = ip0;
-                     continue;
-                 }
-                 break;
-             }
-         }
-+
-+        /* Prepare for next iteration */
-+        assert(ip0 == anchor);
-+        ip1 = ip0 + stepSize;
-     }
- 
-+_cleanup:
-     /* save reps for next block */
--    rep[0] = offset_1 ? offset_1 : offsetSaved;
--    rep[1] = offset_2 ? offset_2 : offsetSaved;
-+    rep[0] = offset_1;
-+    rep[1] = offset_2;
- 
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
-@@ -545,7 +688,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
- }
- 
- 
--static size_t ZSTD_compressBlock_fast_extDict_generic(
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_compressBlock_fast_extDict_generic(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
- {
-@@ -553,11 +698,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
-     U32* const hashTable = ms->hashTable;
-     U32 const hlog = cParams->hashLog;
-     /* support stepSize of 0 */
--    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
-+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
-     const BYTE* const base = ms->window.base;
-     const BYTE* const dictBase = ms->window.dictBase;
-     const BYTE* const istart = (const BYTE*)src;
--    const BYTE* ip = istart;
-     const BYTE* anchor = istart;
-     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
-     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
-@@ -570,6 +714,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
-     const BYTE* const iend = istart + srcSize;
-     const BYTE* const ilimit = iend - 8;
-     U32 offset_1=rep[0], offset_2=rep[1];
-+    U32 offsetSaved1 = 0, offsetSaved2 = 0;
-+
-+    const BYTE* ip0 = istart;
-+    const BYTE* ip1;
-+    const BYTE* ip2;
-+    const BYTE* ip3;
-+    U32 current0;
-+
-+
-+    size_t hash0; /* hash for ip0 */
-+    size_t hash1; /* hash for ip1 */
-+    U32 idx; /* match idx for ip0 */
-+    const BYTE* idxBase; /* base pointer for idx */
-+
-+    U32 offcode;
-+    const BYTE* match0;
-+    size_t mLength;
-+    const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
-+
-+    size_t step;
-+    const BYTE* nextStep;
-+    const size_t kStepIncr = (1 << (kSearchStrength - 1));
- 
-     (void)hasStep; /* not currently specialized on whether it's accelerated */
- 
-@@ -579,75 +745,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
-     if (prefixStartIndex == dictStartIndex)
-         return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
- 
--    /* Search Loop */
--    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
--        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
--        const U32    matchIndex = hashTable[h];
--        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
--        const BYTE*  match = matchBase + matchIndex;
--        const U32    curr = (U32)(ip-base);
--        const U32    repIndex = curr + 1 - offset_1;
--        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
--        const BYTE* const repMatch = repBase + repIndex;
--        hashTable[h] = curr;   /* update hash table */
--        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
--
--        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
--             & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
--           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
--            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
--            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
--            ip++;
--            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
--            ip += rLength;
--            anchor = ip;
--        } else {
--            if ( (matchIndex < dictStartIndex) ||
--                 (MEM_read32(match) != MEM_read32(ip)) ) {
--                assert(stepSize >= 1);
--                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
--                continue;
-+    {   U32 const curr = (U32)(ip0 - base);
-+        U32 const maxRep = curr - dictStartIndex;
-+        if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
-+        if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
-+    }
-+
-+    /* start each op */
-+_start: /* Requires: ip0 */
-+
-+    step = stepSize;
-+    nextStep = ip0 + kStepIncr;
-+
-+    /* calculate positions, ip0 - anchor == 0, so we skip step calc */
-+    ip1 = ip0 + 1;
-+    ip2 = ip0 + step;
-+    ip3 = ip2 + 1;
-+
-+    if (ip3 >= ilimit) {
-+        goto _cleanup;
-+    }
-+
-+    hash0 = ZSTD_hashPtr(ip0, hlog, mls);
-+    hash1 = ZSTD_hashPtr(ip1, hlog, mls);
-+
-+    idx = hashTable[hash0];
-+    idxBase = idx < prefixStartIndex ? dictBase : base;
-+
-+    do {
-+        {   /* load repcode match for ip[2] */
-+            U32 const current2 = (U32)(ip2 - base);
-+            U32 const repIndex = current2 - offset_1;
-+            const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
-+            U32 rval;
-+            if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
-+                 & (offset_1 > 0) ) {
-+                rval = MEM_read32(repBase + repIndex);
-+            } else {
-+                rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
-             }
--            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
--                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
--                U32 const offset = curr - matchIndex;
--                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
--                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
--                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
--                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
--                ip += mLength;
--                anchor = ip;
-+
-+            /* write back hash table entry */
-+            current0 = (U32)(ip0 - base);
-+            hashTable[hash0] = current0;
-+
-+            /* check repcode at ip[2] */
-+            if (MEM_read32(ip2) == rval) {
-+                ip0 = ip2;
-+                match0 = repBase + repIndex;
-+                matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-+                assert((match0 != prefixStart) & (match0 != dictStart));
-+                mLength = ip0[-1] == match0[-1];
-+                ip0 -= mLength;
-+                match0 -= mLength;
-+                offcode = REPCODE1_TO_OFFBASE;
-+                mLength += 4;
-+                goto _match;
-         }   }
- 
--        if (ip <= ilimit) {
--            /* Fill Table */
--            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
--            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
--            /* check immediate repcode */
--            while (ip <= ilimit) {
--                U32 const current2 = (U32)(ip-base);
--                U32 const repIndex2 = current2 - offset_2;
--                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
--                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
--                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
--                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
--                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
--                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
--                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
--                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
--                    ip += repLength2;
--                    anchor = ip;
--                    continue;
--                }
--                break;
--    }   }   }
-+        {   /* load match for ip[0] */
-+            U32 const mval = idx >= dictStartIndex ?
-+                    MEM_read32(idxBase + idx) :
-+                    MEM_read32(ip0) ^ 1; /* guaranteed not to match */
-+
-+            /* check match at ip[0] */
-+            if (MEM_read32(ip0) == mval) {
-+                /* found a match! */
-+                goto _offset;
-+        }   }
-+
-+        /* lookup ip[1] */
-+        idx = hashTable[hash1];
-+        idxBase = idx < prefixStartIndex ? dictBase : base;
-+
-+        /* hash ip[2] */
-+        hash0 = hash1;
-+        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
-+
-+        /* advance to next positions */
-+        ip0 = ip1;
-+        ip1 = ip2;
-+        ip2 = ip3;
-+
-+        /* write back hash table entry */
-+        current0 = (U32)(ip0 - base);
-+        hashTable[hash0] = current0;
-+
-+        {   /* load match for ip[0] */
-+            U32 const mval = idx >= dictStartIndex ?
-+                    MEM_read32(idxBase + idx) :
-+                    MEM_read32(ip0) ^ 1; /* guaranteed not to match */
-+
-+            /* check match at ip[0] */
-+            if (MEM_read32(ip0) == mval) {
-+                /* found a match! */
-+                goto _offset;
-+        }   }
-+
-+        /* lookup ip[1] */
-+        idx = hashTable[hash1];
-+        idxBase = idx < prefixStartIndex ? dictBase : base;
-+
-+        /* hash ip[2] */
-+        hash0 = hash1;
-+        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
-+
-+        /* advance to next positions */
-+        ip0 = ip1;
-+        ip1 = ip2;
-+        ip2 = ip0 + step;
-+        ip3 = ip1 + step;
-+
-+        /* calculate step */
-+        if (ip2 >= nextStep) {
-+            step++;
-+            PREFETCH_L1(ip1 + 64);
-+            PREFETCH_L1(ip1 + 128);
-+            nextStep += kStepIncr;
-+        }
-+    } while (ip3 < ilimit);
-+
-+_cleanup:
-+    /* Note that there are probably still a couple positions we could search.
-+     * However, it seems to be a meaningful performance hit to try to search
-+     * them. So let's not. */
-+
-+    /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
-+     * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
-+    offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
- 
-     /* save reps for next block */
--    rep[0] = offset_1;
--    rep[1] = offset_2;
-+    rep[0] = offset_1 ? offset_1 : offsetSaved1;
-+    rep[1] = offset_2 ? offset_2 : offsetSaved2;
- 
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
-+
-+_offset: /* Requires: ip0, idx, idxBase */
-+
-+    /* Compute the offset code. */
-+    {   U32 const offset = current0 - idx;
-+        const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
-+        matchEnd = idx < prefixStartIndex ? dictEnd : iend;
-+        match0 = idxBase + idx;
-+        offset_2 = offset_1;
-+        offset_1 = offset;
-+        offcode = OFFSET_TO_OFFBASE(offset);
-+        mLength = 4;
-+
-+        /* Count the backwards match length. */
-+        while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
-+            ip0--;
-+            match0--;
-+            mLength++;
-+    }   }
-+
-+_match: /* Requires: ip0, match0, offcode, matchEnd */
-+
-+    /* Count the forward length. */
-+    assert(matchEnd != 0);
-+    mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
-+
-+    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
-+
-+    ip0 += mLength;
-+    anchor = ip0;
-+
-+    /* write next hash table entry */
-+    if (ip1 < ip0) {
-+        hashTable[hash1] = (U32)(ip1 - base);
-+    }
-+
-+    /* Fill table and check for immediate repcode. */
-+    if (ip0 <= ilimit) {
-+        /* Fill Table */
-+        assert(base+current0+2 > istart);  /* check base overflow */
-+        hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
-+        hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
-+
-+        while (ip0 <= ilimit) {
-+            U32 const repIndex2 = (U32)(ip0-base) - offset_2;
-+            const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-+            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0))  /* intentional underflow */
-+                 && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
-+                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-+                size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-+                { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-+                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
-+                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
-+                ip0 += repLength2;
-+                anchor = ip0;
-+                continue;
-+            }
-+            break;
-+    }   }
-+
-+    goto _start;
- }
- 
- ZSTD_GEN_FAST_FN(extDict, 4, 0)
-@@ -660,6 +953,7 @@ size_t ZSTD_compressBlock_fast_extDict(
-         void const* src, size_t srcSize)
- {
-     U32 const mls = ms->cParams.minMatch;
-+    assert(ms->dictMatchState == NULL);
-     switch(mls)
-     {
-     default: /* includes case 3 */
-diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h
-index fddc2f532d21..e64d9e1b2d39 100644
---- a/lib/zstd/compress/zstd_fast.h
-+++ b/lib/zstd/compress/zstd_fast.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -16,7 +17,8 @@
- #include "zstd_compress_internal.h"
- 
- void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
--                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
-+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm,
-+                        ZSTD_tableFillPurpose_e tfp);
- size_t ZSTD_compressBlock_fast(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
-diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
-index 0298a01a7504..3e88d8a1a136 100644
---- a/lib/zstd/compress/zstd_lazy.c
-+++ b/lib/zstd/compress/zstd_lazy.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -10,14 +11,23 @@
- 
- #include "zstd_compress_internal.h"
- #include "zstd_lazy.h"
-+#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
-+
-+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
-+
-+#define kLazySkippingStep 8
- 
- 
- /*-*************************************
- *  Binary Tree search
- ***************************************/
- 
--static void
--ZSTD_updateDUBT(ZSTD_matchState_t* ms,
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
-                 const BYTE* ip, const BYTE* iend,
-                 U32 mls)
- {
-@@ -60,8 +70,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
-  *  sort one already inserted but unsorted position
-  *  assumption : curr >= btlow == (curr - btmask)
-  *  doesn't fail */
--static void
--ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
-                  U32 curr, const BYTE* inputEnd,
-                  U32 nbCompares, U32 btLow,
-                  const ZSTD_dictMode_e dictMode)
-@@ -149,8 +160,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
- }
- 
- 
--static size_t
--ZSTD_DUBT_findBetterDictMatch (
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_DUBT_findBetterDictMatch (
-         const ZSTD_matchState_t* ms,
-         const BYTE* const ip, const BYTE* const iend,
-         size_t* offsetPtr,
-@@ -197,8 +209,8 @@ ZSTD_DUBT_findBetterDictMatch (
-             U32 matchIndex = dictMatchIndex + dictIndexDelta;
-             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
-                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
--                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
--                bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
-+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
-+                bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
-             }
-             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
-                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
-@@ -218,7 +230,7 @@ ZSTD_DUBT_findBetterDictMatch (
-     }
- 
-     if (bestLength >= MINMATCH) {
--        U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
-+        U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
-         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
-                     curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
-     }
-@@ -227,10 +239,11 @@ ZSTD_DUBT_findBetterDictMatch (
- }
- 
- 
--static size_t
--ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
-                         const BYTE* const ip, const BYTE* const iend,
--                        size_t* offsetPtr,
-+                        size_t* offBasePtr,
-                         U32 const mls,
-                         const ZSTD_dictMode_e dictMode)
- {
-@@ -327,8 +340,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
-             if (matchLength > bestLength) {
-                 if (matchLength > matchEndIdx - matchIndex)
-                     matchEndIdx = matchIndex + (U32)matchLength;
--                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
--                    bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
-+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
-+                    bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
-                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
-                     if (dictMode == ZSTD_dictMatchState) {
-                         nbCompares = 0; /* in addition to avoiding checking any
-@@ -361,16 +374,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
-         if (dictMode == ZSTD_dictMatchState && nbCompares) {
-             bestLength = ZSTD_DUBT_findBetterDictMatch(
-                     ms, ip, iend,
--                    offsetPtr, bestLength, nbCompares,
-+                    offBasePtr, bestLength, nbCompares,
-                     mls, dictMode);
-         }
- 
-         assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
-         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
-         if (bestLength >= MINMATCH) {
--            U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
-+            U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
-             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
--                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
-+                        curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
-         }
-         return bestLength;
-     }
-@@ -378,17 +391,18 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
- 
- 
- /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
--FORCE_INLINE_TEMPLATE size_t
--ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
-                 const BYTE* const ip, const BYTE* const iLimit,
--                      size_t* offsetPtr,
-+                      size_t* offBasePtr,
-                 const U32 mls /* template */,
-                 const ZSTD_dictMode_e dictMode)
- {
-     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
-     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
-     ZSTD_updateDUBT(ms, ip, iLimit, mls);
--    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
-+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
- }
- 
- /* *********************************
-@@ -561,7 +575,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
-         /* save best solution */
-         if (currentMl > ml) {
-             ml = currentMl;
--            *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
-+            *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
-             if (ip+currentMl == iLimit) {
-                 /* best possible, avoids read overflow on next attempt */
-                 return ml;
-@@ -598,7 +612,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
-             /* save best solution */
-             if (currentMl > ml) {
-                 ml = currentMl;
--                *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
-+                *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
-                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-             }
-         }
-@@ -614,10 +628,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
- 
- /* Update chains up to ip (excluded)
-    Assumption : always within prefix (i.e. not within extDict) */
--FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_insertAndFindFirstIndex_internal(
-                         ZSTD_matchState_t* ms,
-                         const ZSTD_compressionParameters* const cParams,
--                        const BYTE* ip, U32 const mls)
-+                        const BYTE* ip, U32 const mls, U32 const lazySkipping)
- {
-     U32* const hashTable  = ms->hashTable;
-     const U32 hashLog = cParams->hashLog;
-@@ -632,6 +648,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-         hashTable[h] = idx;
-         idx++;
-+        /* Stop inserting every position when in the lazy skipping mode. */
-+        if (lazySkipping)
-+            break;
-     }
- 
-     ms->nextToUpdate = target;
-@@ -640,11 +659,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
- 
- U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
-     const ZSTD_compressionParameters* const cParams = &ms->cParams;
--    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
- }
- 
- /* inlining is important to hardwire a hot branch (template emulation) */
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_HcFindBestMatch(
-                         ZSTD_matchState_t* ms,
-                         const BYTE* const ip, const BYTE* const iLimit,
-@@ -684,14 +704,15 @@ size_t ZSTD_HcFindBestMatch(
-     }
- 
-     /* HC4 match finder */
--    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
-+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
- 
-     for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
-         size_t currentMl=0;
-         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
-             const BYTE* const match = base + matchIndex;
-             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
--            if (match[ml] == ip[ml])   /* potentially better */
-+            /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
-+            if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
-                 currentMl = ZSTD_count(ip, match, iLimit);
-         } else {
-             const BYTE* const match = dictBase + matchIndex;
-@@ -703,7 +724,7 @@ size_t ZSTD_HcFindBestMatch(
-         /* save best solution */
-         if (currentMl > ml) {
-             ml = currentMl;
--            *offsetPtr = STORE_OFFSET(curr - matchIndex);
-+            *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
-             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-         }
- 
-@@ -739,7 +760,7 @@ size_t ZSTD_HcFindBestMatch(
-             if (currentMl > ml) {
-                 ml = currentMl;
-                 assert(curr > matchIndex + dmsIndexDelta);
--                *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
-+                *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
-                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-             }
- 
-@@ -756,8 +777,6 @@ size_t ZSTD_HcFindBestMatch(
- * (SIMD) Row-based matchfinder
- ***********************************/
- /* Constants for row-based hash */
--#define ZSTD_ROW_HASH_TAG_OFFSET 16     /* byte offset of hashes in the match state's tagTable from the beginning of a row */
--#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
- #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
- #define ZSTD_ROW_HASH_MAX_ENTRIES 64    /* absolute maximum number of entries per row, for all configurations */
- 
-@@ -769,64 +788,19 @@ typedef U64 ZSTD_VecMask;   /* Clarifies when we are interacting with a U64 repr
-  * Starting from the LSB, returns the idx of the next non-zero bit.
-  * Basically counting the nb of trailing zeroes.
-  */
--static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
--    assert(val != 0);
--#   if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
--    if (sizeof(size_t) == 4) {
--        U32 mostSignificantWord = (U32)(val >> 32);
--        U32 leastSignificantWord = (U32)val;
--        if (leastSignificantWord == 0) {
--            return 32 + (U32)__builtin_ctz(mostSignificantWord);
--        } else {
--            return (U32)__builtin_ctz(leastSignificantWord);
--        }
--    } else {
--        return (U32)__builtin_ctzll(val);
--    }
--#   else
--    /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
--     * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
--     */
--    val = ~val & (val - 1ULL); /* Lowest set bit mask */
--    val = val - ((val >> 1) & 0x5555555555555555);
--    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
--    return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
--#   endif
--}
--
--/* ZSTD_rotateRight_*():
-- * Rotates a bitfield to the right by "count" bits.
-- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
-- */
--FORCE_INLINE_TEMPLATE
--U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
--    assert(count < 64);
--    count &= 0x3F; /* for fickle pattern recognition */
--    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
--}
--
--FORCE_INLINE_TEMPLATE
--U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
--    assert(count < 32);
--    count &= 0x1F; /* for fickle pattern recognition */
--    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
--}
--
--FORCE_INLINE_TEMPLATE
--U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
--    assert(count < 16);
--    count &= 0x0F; /* for fickle pattern recognition */
--    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
-+MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
-+    return ZSTD_countTrailingZeros64(val);
- }
- 
- /* ZSTD_row_nextIndex():
-  * Returns the next index to insert at within a tagTable row, and updates the "head"
-- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
-+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
-  */
- FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
--  U32 const next = (*tagRow - 1) & rowMask;
--  *tagRow = (BYTE)next;
--  return next;
-+    U32 next = (*tagRow-1) & rowMask;
-+    next += (next == 0) ? rowMask : 0; /* skip first position */
-+    *tagRow = (BYTE)next;
-+    return next;
- }
- 
- /* ZSTD_isAligned():
-@@ -840,7 +814,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
- /* ZSTD_row_prefetch():
-  * Performs prefetching for the hashTable and tagTable at a given row.
-  */
--FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
-+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
-     PREFETCH_L1(hashTable + relRow);
-     if (rowLog >= 5) {
-         PREFETCH_L1(hashTable + relRow + 16);
-@@ -859,18 +833,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta
-  * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
-  * but not beyond iLimit.
-  */
--FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
-                                    U32 const rowLog, U32 const mls,
-                                    U32 idx, const BYTE* const iLimit)
- {
-     U32 const* const hashTable = ms->hashTable;
--    U16 const* const tagTable = ms->tagTable;
-+    BYTE const* const tagTable = ms->tagTable;
-     U32 const hashLog = ms->rowHashLog;
-     U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
-     U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
- 
-     for (; idx < lim; ++idx) {
--        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-+        U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
-         U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-         ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
-         ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
-@@ -885,12 +861,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
-  * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
-  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
-  */
--FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
--                                                  U16 const* tagTable, BYTE const* base,
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
-+                                                  BYTE const* tagTable, BYTE const* base,
-                                                   U32 idx, U32 const hashLog,
--                                                  U32 const rowLog, U32 const mls)
-+                                                  U32 const rowLog, U32 const mls,
-+                                                  U64 const hashSalt)
- {
--    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-+    U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
-     U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-     ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
-     {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
-@@ -902,28 +881,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
- /* ZSTD_row_update_internalImpl():
-  * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
-  */
--FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
--                                                        U32 updateStartIdx, U32 const updateEndIdx,
--                                                        U32 const mls, U32 const rowLog,
--                                                        U32 const rowMask, U32 const useCache)
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
-+                                  U32 updateStartIdx, U32 const updateEndIdx,
-+                                  U32 const mls, U32 const rowLog,
-+                                  U32 const rowMask, U32 const useCache)
- {
-     U32* const hashTable = ms->hashTable;
--    U16* const tagTable = ms->tagTable;
-+    BYTE* const tagTable = ms->tagTable;
-     U32 const hashLog = ms->rowHashLog;
-     const BYTE* const base = ms->window.base;
- 
-     DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
-     for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
--        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
--                                  : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-+        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
-+                                  : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
-         U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-         U32* const row = hashTable + relRow;
--        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
--                                                       Explicit cast allows us to get exact desired position within each row */
-+        BYTE* tagRow = tagTable + relRow;
-         U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
- 
--        assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
--        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
-+        assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
-+        tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
-         row[pos] = updateStartIdx;
-     }
- }
-@@ -932,9 +912,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
-  * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
-  * Skips sections of long matches as is necessary.
-  */
--FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
--                                                    U32 const mls, U32 const rowLog,
--                                                    U32 const rowMask, U32 const useCache)
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
-+                              U32 const mls, U32 const rowLog,
-+                              U32 const rowMask, U32 const useCache)
- {
-     U32 idx = ms->nextToUpdate;
-     const BYTE* const base = ms->window.base;
-@@ -971,7 +953,35 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
-     const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
- 
-     DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
--    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
-+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
-+}
-+
-+/* Returns the mask width of bits group of which will be set to 1. Given not all
-+ * architectures have easy movemask instruction, this helps to iterate over
-+ * groups of bits easier and faster.
-+ */
-+FORCE_INLINE_TEMPLATE U32
-+ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
-+{
-+    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
-+    assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
-+    (void)rowEntries;
-+#if defined(ZSTD_ARCH_ARM_NEON)
-+    /* NEON path only works for little endian */
-+    if (!MEM_isLittleEndian()) {
-+        return 1;
-+    }
-+    if (rowEntries == 16) {
-+        return 4;
-+    }
-+    if (rowEntries == 32) {
-+        return 2;
-+    }
-+    if (rowEntries == 64) {
-+        return 1;
-+    }
-+#endif
-+    return 1;
- }
- 
- #if defined(ZSTD_ARCH_X86_SSE2)
-@@ -994,71 +1004,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U
- }
- #endif
- 
--/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
-- * the hash at the nth position in a row of the tagTable.
-- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
-- * to match up with the actual layout of the entries within the hashTable */
-+#if defined(ZSTD_ARCH_ARM_NEON)
-+FORCE_INLINE_TEMPLATE ZSTD_VecMask
-+ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
-+{
-+    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
-+    if (rowEntries == 16) {
-+        /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
-+         * After that groups of 4 bits represent the equalMask. We lower
-+         * all bits except the highest in these groups by doing AND with
-+         * 0x88 = 0b10001000.
-+         */
-+        const uint8x16_t chunk = vld1q_u8(src);
-+        const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
-+        const uint8x8_t res = vshrn_n_u16(equalMask, 4);
-+        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
-+        return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
-+    } else if (rowEntries == 32) {
-+        /* Same idea as with rowEntries == 16 but doing AND with
-+         * 0x55 = 0b01010101.
-+         */
-+        const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
-+        const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
-+        const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
-+        const uint8x16_t dup = vdupq_n_u8(tag);
-+        const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
-+        const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
-+        const uint8x8_t res = vsli_n_u8(t0, t1, 4);
-+        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
-+        return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
-+    } else { /* rowEntries == 64 */
-+        const uint8x16x4_t chunk = vld4q_u8(src);
-+        const uint8x16_t dup = vdupq_n_u8(tag);
-+        const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
-+        const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
-+        const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
-+        const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
-+
-+        const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
-+        const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
-+        const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
-+        const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
-+        const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
-+        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
-+        return ZSTD_rotateRight_U64(matches, headGrouped);
-+    }
-+}
-+#endif
-+
-+/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
-+ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
-+ * matches the hash at the nth position in a row of the tagTable.
-+ * Each row is a circular buffer beginning at the value of "headGrouped". So we
-+ * must rotate the "matches" bitfield to match up with the actual layout of the
-+ * entries within the hashTable */
- FORCE_INLINE_TEMPLATE ZSTD_VecMask
--ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries)
-+ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
- {
--    const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
-+    const BYTE* const src = tagRow;
-     assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
-     assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
-+    assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
- 
- #if defined(ZSTD_ARCH_X86_SSE2)
- 
--    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head);
-+    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
- 
- #else /* SW or NEON-LE */
- 
- # if defined(ZSTD_ARCH_ARM_NEON)
-   /* This NEON path only works for little endian - otherwise use SWAR below */
-     if (MEM_isLittleEndian()) {
--        if (rowEntries == 16) {
--            const uint8x16_t chunk = vld1q_u8(src);
--            const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
--            const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
--            const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
--            const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
--            const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
--            const U16 hi = (U16)vgetq_lane_u8(t3, 8);
--            const U16 lo = (U16)vgetq_lane_u8(t3, 0);
--            return ZSTD_rotateRight_U16((hi << 8) | lo, head);
--        } else if (rowEntries == 32) {
--            const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
--            const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
--            const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
--            const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
--            const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
--            const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
--            const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
--            const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
--            const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
--            const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
--            const uint8x8x2_t t3 = vuzp_u8(t2, t0);
--            const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
--            const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
--            return ZSTD_rotateRight_U32(matches, head);
--        } else { /* rowEntries == 64 */
--            const uint8x16x4_t chunk = vld4q_u8(src);
--            const uint8x16_t dup = vdupq_n_u8(tag);
--            const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
--            const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
--            const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
--            const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
--
--            const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
--            const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
--            const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
--            const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
--            const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
--            const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
--            return ZSTD_rotateRight_U64(matches, head);
--        }
-+        return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
-     }
- # endif /* ZSTD_ARCH_ARM_NEON */
-     /* SWAR */
--    {   const size_t chunkSize = sizeof(size_t);
-+    {   const int chunkSize = sizeof(size_t);
-         const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
-         const size_t xFF = ~((size_t)0);
-         const size_t x01 = xFF / 0xFF;
-@@ -1091,11 +1112,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
-         }
-         matches = ~matches;
-         if (rowEntries == 16) {
--            return ZSTD_rotateRight_U16((U16)matches, head);
-+            return ZSTD_rotateRight_U16((U16)matches, headGrouped);
-         } else if (rowEntries == 32) {
--            return ZSTD_rotateRight_U32((U32)matches, head);
-+            return ZSTD_rotateRight_U32((U32)matches, headGrouped);
-         } else {
--            return ZSTD_rotateRight_U64((U64)matches, head);
-+            return ZSTD_rotateRight_U64((U64)matches, headGrouped);
-         }
-     }
- #endif
-@@ -1103,20 +1124,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
- 
- /* The high-level approach of the SIMD row based match finder is as follows:
-  * - Figure out where to insert the new entry:
-- *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
-- *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
-+ *      - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
-+ *           - The hash is salted by a value that changes on every contex reset, so when the same table is used
-+ *             we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
-+ *      - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
-  *        which row to insert into.
-- *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
-- *        be considered as a circular buffer with a "head" index that resides in the tagTable.
-- *      - Also insert the "tag" into the equivalent row and position in the tagTable.
-- *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
-- *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
-- *                  for alignment/performance reasons, leaving some bytes unused.
-- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
-+ *      - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
-+ *        be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
-+ *        per row).
-+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
-  *   generate a bitfield that we can cycle through to check the collisions in the hash table.
-  * - Pick the longest match.
-+ * - Insert the tag into the equivalent row and position in the tagTable.
-  */
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_RowFindBestMatch(
-                         ZSTD_matchState_t* ms,
-                         const BYTE* const ip, const BYTE* const iLimit,
-@@ -1125,7 +1147,7 @@ size_t ZSTD_RowFindBestMatch(
-                         const U32 rowLog)
- {
-     U32* const hashTable = ms->hashTable;
--    U16* const tagTable = ms->tagTable;
-+    BYTE* const tagTable = ms->tagTable;
-     U32* const hashCache = ms->hashCache;
-     const U32 hashLog = ms->rowHashLog;
-     const ZSTD_compressionParameters* const cParams = &ms->cParams;
-@@ -1143,8 +1165,11 @@ size_t ZSTD_RowFindBestMatch(
-     const U32 rowEntries = (1U << rowLog);
-     const U32 rowMask = rowEntries - 1;
-     const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
-+    const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
-+    const U64 hashSalt = ms->hashSalt;
-     U32 nbAttempts = 1U << cappedSearchLog;
-     size_t ml=4-1;
-+    U32 hash;
- 
-     /* DMS/DDS variables that may be referenced laster */
-     const ZSTD_matchState_t* const dms = ms->dictMatchState;
-@@ -1168,7 +1193,7 @@ size_t ZSTD_RowFindBestMatch(
-     if (dictMode == ZSTD_dictMatchState) {
-         /* Prefetch DMS rows */
-         U32* const dmsHashTable = dms->hashTable;
--        U16* const dmsTagTable = dms->tagTable;
-+        BYTE* const dmsTagTable = dms->tagTable;
-         U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-         U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-         dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
-@@ -1178,23 +1203,34 @@ size_t ZSTD_RowFindBestMatch(
-     }
- 
-     /* Update the hashTable and tagTable up to (but not including) ip */
--    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
-+    if (!ms->lazySkipping) {
-+        ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
-+        hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
-+    } else {
-+        /* Stop inserting every position when in the lazy skipping mode.
-+         * The hash cache is also not kept up to date in this mode.
-+         */
-+        hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
-+        ms->nextToUpdate = curr;
-+    }
-+    ms->hashSaltEntropy += hash; /* collect salt entropy */
-+
-     {   /* Get the hash for ip, compute the appropriate row */
--        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
-         U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-         U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
-         U32* const row = hashTable + relRow;
-         BYTE* tagRow = (BYTE*)(tagTable + relRow);
--        U32 const head = *tagRow & rowMask;
-+        U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
-         U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
-         size_t numMatches = 0;
-         size_t currMatch = 0;
--        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
-+        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
- 
-         /* Cycle through the matches and prefetch */
--        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
--            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
-+        for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
-+            U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
-             U32 const matchIndex = row[matchPos];
-+            if(matchPos == 0) continue;
-             assert(numMatches < rowEntries);
-             if (matchIndex < lowLimit)
-                 break;
-@@ -1204,13 +1240,14 @@ size_t ZSTD_RowFindBestMatch(
-                 PREFETCH_L1(dictBase + matchIndex);
-             }
-             matchBuffer[numMatches++] = matchIndex;
-+            --nbAttempts;
-         }
- 
-         /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
-            in ZSTD_row_update_internal() at the next search. */
-         {
-             U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
--            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
-+            tagRow[pos] = (BYTE)tag;
-             row[pos] = ms->nextToUpdate++;
-         }
- 
-@@ -1224,7 +1261,8 @@ size_t ZSTD_RowFindBestMatch(
-             if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
-                 const BYTE* const match = base + matchIndex;
-                 assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
--                if (match[ml] == ip[ml])   /* potentially better */
-+                /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
-+                if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
-                     currentMl = ZSTD_count(ip, match, iLimit);
-             } else {
-                 const BYTE* const match = dictBase + matchIndex;
-@@ -1236,7 +1274,7 @@ size_t ZSTD_RowFindBestMatch(
-             /* Save best solution */
-             if (currentMl > ml) {
-                 ml = currentMl;
--                *offsetPtr = STORE_OFFSET(curr - matchIndex);
-+                *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
-                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-             }
-         }
-@@ -1254,19 +1292,21 @@ size_t ZSTD_RowFindBestMatch(
-         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
-         const U32 dmsIndexDelta        = dictLimit - dmsSize;
- 
--        {   U32 const head = *dmsTagRow & rowMask;
-+        {   U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
-             U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
-             size_t numMatches = 0;
-             size_t currMatch = 0;
--            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
-+            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
- 
--            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
--                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
-+            for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
-+                U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
-                 U32 const matchIndex = dmsRow[matchPos];
-+                if(matchPos == 0) continue;
-                 if (matchIndex < dmsLowestIndex)
-                     break;
-                 PREFETCH_L1(dmsBase + matchIndex);
-                 matchBuffer[numMatches++] = matchIndex;
-+                --nbAttempts;
-             }
- 
-             /* Return the longest match */
-@@ -1285,7 +1325,7 @@ size_t ZSTD_RowFindBestMatch(
-                 if (currentMl > ml) {
-                     ml = currentMl;
-                     assert(curr > matchIndex + dmsIndexDelta);
--                    *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
-+                    *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
-                     if (ip+currentMl == iLimit) break;
-                 }
-             }
-@@ -1472,8 +1512,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
- *  Common parser - lazy strategy
- *********************************/
- 
--FORCE_INLINE_TEMPLATE size_t
--ZSTD_compressBlock_lazy_generic(
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_compressBlock_lazy_generic(
-                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
-                         U32 rep[ZSTD_REP_NUM],
-                         const void* src, size_t srcSize,
-@@ -1491,7 +1532,8 @@ ZSTD_compressBlock_lazy_generic(
-     const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
-     const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
- 
--    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
-+    U32 offset_1 = rep[0], offset_2 = rep[1];
-+    U32 offsetSaved1 = 0, offsetSaved2 = 0;
- 
-     const int isDMS = dictMode == ZSTD_dictMatchState;
-     const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
-@@ -1512,8 +1554,8 @@ ZSTD_compressBlock_lazy_generic(
-         U32 const curr = (U32)(ip - base);
-         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
-         U32 const maxRep = curr - windowLow;
--        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
--        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
-+        if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
-+        if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
-     }
-     if (isDxS) {
-         /* dictMatchState repCode checks don't currently handle repCode == 0
-@@ -1522,10 +1564,11 @@ ZSTD_compressBlock_lazy_generic(
-         assert(offset_2 <= dictAndPrefixLength);
-     }
- 
-+    /* Reset the lazy skipping state */
-+    ms->lazySkipping = 0;
-+
-     if (searchMethod == search_rowHash) {
--        ZSTD_row_fillHashCache(ms, base, rowLog,
--                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
--                            ms->nextToUpdate, ilimit);
-+        ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
-     }
- 
-     /* Match Loop */
-@@ -1537,7 +1580,7 @@ ZSTD_compressBlock_lazy_generic(
- #endif
-     while (ip < ilimit) {
-         size_t matchLength=0;
--        size_t offcode=STORE_REPCODE_1;
-+        size_t offBase = REPCODE1_TO_OFFBASE;
-         const BYTE* start=ip+1;
-         DEBUGLOG(7, "search baseline (depth 0)");
- 
-@@ -1562,14 +1605,23 @@ ZSTD_compressBlock_lazy_generic(
-         }
- 
-         /* first search (depth 0) */
--        {   size_t offsetFound = 999999999;
--            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode);
-+        {   size_t offbaseFound = 999999999;
-+            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
-             if (ml2 > matchLength)
--                matchLength = ml2, start = ip, offcode=offsetFound;
-+                matchLength = ml2, start = ip, offBase = offbaseFound;
-         }
- 
-         if (matchLength < 4) {
--            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
-+            size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */;
-+            ip += step;
-+            /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
-+             * In this mode we stop inserting every position into our tables, and only insert
-+             * positions that we search, which is one in step positions.
-+             * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
-+             * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
-+             * triggered once we've gone 2KB without finding any matches.
-+             */
-+            ms->lazySkipping = step > kLazySkippingStep;
-             continue;
-         }
- 
-@@ -1579,12 +1631,12 @@ ZSTD_compressBlock_lazy_generic(
-             DEBUGLOG(7, "search depth 1");
-             ip ++;
-             if ( (dictMode == ZSTD_noDict)
--              && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-+              && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
-                 int const gain2 = (int)(mlRep * 3);
--                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
-                 if ((mlRep >= 4) && (gain2 > gain1))
--                    matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-+                    matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
-             }
-             if (isDxS) {
-                 const U32 repIndex = (U32)(ip - base) - offset_1;
-@@ -1596,17 +1648,17 @@ ZSTD_compressBlock_lazy_generic(
-                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-                     int const gain2 = (int)(mlRep * 3);
--                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
-                     if ((mlRep >= 4) && (gain2 > gain1))
--                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-+                        matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
-                 }
-             }
--            {   size_t offset2=999999999;
--                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
--                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
--                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
-+            {   size_t ofbCandidate=999999999;
-+                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
-+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
-+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
-                 if ((ml2 >= 4) && (gain2 > gain1)) {
--                    matchLength = ml2, offcode = offset2, start = ip;
-+                    matchLength = ml2, offBase = ofbCandidate, start = ip;
-                     continue;   /* search a better one */
-             }   }
- 
-@@ -1615,12 +1667,12 @@ ZSTD_compressBlock_lazy_generic(
-                 DEBUGLOG(7, "search depth 2");
-                 ip ++;
-                 if ( (dictMode == ZSTD_noDict)
--                  && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-+                  && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
-                     int const gain2 = (int)(mlRep * 4);
--                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
-                     if ((mlRep >= 4) && (gain2 > gain1))
--                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-+                        matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
-                 }
-                 if (isDxS) {
-                     const U32 repIndex = (U32)(ip - base) - offset_1;
-@@ -1632,17 +1684,17 @@ ZSTD_compressBlock_lazy_generic(
-                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-                         int const gain2 = (int)(mlRep * 4);
--                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
-                         if ((mlRep >= 4) && (gain2 > gain1))
--                            matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-+                            matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
-                     }
-                 }
--                {   size_t offset2=999999999;
--                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
--                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
--                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
-+                {   size_t ofbCandidate=999999999;
-+                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
-+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
-+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
-                     if ((ml2 >= 4) && (gain2 > gain1)) {
--                        matchLength = ml2, offcode = offset2, start = ip;
-+                        matchLength = ml2, offBase = ofbCandidate, start = ip;
-                         continue;
-             }   }   }
-             break;  /* nothing found : store previous solution */
-@@ -1653,26 +1705,33 @@ ZSTD_compressBlock_lazy_generic(
-          * notably if `value` is unsigned, resulting in a large positive `-value`.
-          */
-         /* catch up */
--        if (STORED_IS_OFFSET(offcode)) {
-+        if (OFFBASE_IS_OFFSET(offBase)) {
-             if (dictMode == ZSTD_noDict) {
--                while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
--                     && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
-+                while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
-+                     && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) )  /* only search for offset within prefix */
-                     { start--; matchLength++; }
-             }
-             if (isDxS) {
--                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
-+                U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
-                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
-                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
-                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-             }
--            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
-+            offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
-         }
-         /* store sequence */
- _storeSequence:
-         {   size_t const litLength = (size_t)(start - anchor);
--            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
-+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
-             anchor = ip = start + matchLength;
-         }
-+        if (ms->lazySkipping) {
-+            /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
-+            if (searchMethod == search_rowHash) {
-+                ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
-+            }
-+            ms->lazySkipping = 0;
-+        }
- 
-         /* check immediate repcode */
-         if (isDxS) {
-@@ -1686,8 +1745,8 @@ ZSTD_compressBlock_lazy_generic(
-                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
-                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
-                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
--                    offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset_2 <=> offset_1 */
--                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-+                    offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset_2 <=> offset_1 */
-+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
-                     ip += matchLength;
-                     anchor = ip;
-                     continue;
-@@ -1701,166 +1760,181 @@ ZSTD_compressBlock_lazy_generic(
-                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
-                 /* store sequence */
-                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
--                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
--                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-+                offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
-+                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
-                 ip += matchLength;
-                 anchor = ip;
-                 continue;   /* faster when present ... (?) */
-     }   }   }
- 
--    /* Save reps for next block */
--    rep[0] = offset_1 ? offset_1 : savedOffset;
--    rep[1] = offset_2 ? offset_2 : savedOffset;
-+    /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
-+     * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
-+    offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
-+
-+    /* save reps for next block */
-+    rep[0] = offset_1 ? offset_1 : offsetSaved1;
-+    rep[1] = offset_2 ? offset_2 : offsetSaved2;
- 
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
- }
-+#endif /* build exclusions */
- 
- 
--size_t ZSTD_compressBlock_btlazy2(
-+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_greedy(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_lazy2(
-+size_t ZSTD_compressBlock_greedy_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
- }
- 
--size_t ZSTD_compressBlock_lazy(
-+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
- }
- 
--size_t ZSTD_compressBlock_greedy(
-+size_t ZSTD_compressBlock_greedy_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
- }
- 
--size_t ZSTD_compressBlock_lazy2_dictMatchState(
-+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_lazy_dictMatchState(
-+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_greedy_dictMatchState(
-+size_t ZSTD_compressBlock_lazy_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
- }
- 
--
--size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
- }
- 
--size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-+size_t ZSTD_compressBlock_lazy_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
- }
- 
--/* Row-based matchfinder */
--size_t ZSTD_compressBlock_lazy2_row(
-+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_lazy_row(
-+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy2(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_greedy_row(
-+size_t ZSTD_compressBlock_lazy2_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
- }
- 
--size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
- }
- 
--size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy2_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
- }
- 
--
- size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
-     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_btlazy2(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
- }
- 
--size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
-+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
- }
-+#endif
- 
-+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_compressBlock_lazy_extDict_generic(
-                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
-                         U32 rep[ZSTD_REP_NUM],
-@@ -1886,12 +1960,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
- 
-     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
- 
-+    /* Reset the lazy skipping state */
-+    ms->lazySkipping = 0;
-+
-     /* init */
-     ip += (ip == prefixStart);
-     if (searchMethod == search_rowHash) {
--        ZSTD_row_fillHashCache(ms, base, rowLog,
--                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
--                               ms->nextToUpdate, ilimit);
-+        ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
-     }
- 
-     /* Match Loop */
-@@ -1903,7 +1978,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
- #endif
-     while (ip < ilimit) {
-         size_t matchLength=0;
--        size_t offcode=STORE_REPCODE_1;
-+        size_t offBase = REPCODE1_TO_OFFBASE;
-         const BYTE* start=ip+1;
-         U32 curr = (U32)(ip-base);
- 
-@@ -1922,14 +1997,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-         }   }
- 
-         /* first search (depth 0) */
--        {   size_t offsetFound = 999999999;
--            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict);
-+        {   size_t ofbCandidate = 999999999;
-+            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
-             if (ml2 > matchLength)
--                matchLength = ml2, start = ip, offcode=offsetFound;
-+                matchLength = ml2, start = ip, offBase = ofbCandidate;
-         }
- 
-         if (matchLength < 4) {
--            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
-+            size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
-+            ip += step + 1;   /* jump faster over incompressible sections */
-+            /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
-+             * In this mode we stop inserting every position into our tables, and only insert
-+             * positions that we search, which is one in step positions.
-+             * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
-+             * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
-+             * triggered once we've gone 2KB without finding any matches.
-+             */
-+            ms->lazySkipping = step > kLazySkippingStep;
-             continue;
-         }
- 
-@@ -1939,7 +2023,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-             ip ++;
-             curr++;
-             /* check repCode */
--            if (offcode) {
-+            if (offBase) {
-                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
-                 const U32 repIndex = (U32)(curr - offset_1);
-                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-@@ -1951,18 +2035,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                     int const gain2 = (int)(repLength * 3);
--                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
-                     if ((repLength >= 4) && (gain2 > gain1))
--                        matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
-+                        matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
-             }   }
- 
-             /* search match, depth 1 */
--            {   size_t offset2=999999999;
--                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
--                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
--                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
-+            {   size_t ofbCandidate = 999999999;
-+                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
-+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
-+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
-                 if ((ml2 >= 4) && (gain2 > gain1)) {
--                    matchLength = ml2, offcode = offset2, start = ip;
-+                    matchLength = ml2, offBase = ofbCandidate, start = ip;
-                     continue;   /* search a better one */
-             }   }
- 
-@@ -1971,7 +2055,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-                 ip ++;
-                 curr++;
-                 /* check repCode */
--                if (offcode) {
-+                if (offBase) {
-                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
-                     const U32 repIndex = (U32)(curr - offset_1);
-                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-@@ -1983,38 +2067,45 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                         int const gain2 = (int)(repLength * 4);
--                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
-                         if ((repLength >= 4) && (gain2 > gain1))
--                            matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
-+                            matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
-                 }   }
- 
-                 /* search match, depth 2 */
--                {   size_t offset2=999999999;
--                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
--                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
--                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
-+                {   size_t ofbCandidate = 999999999;
-+                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
-+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
-+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
-                     if ((ml2 >= 4) && (gain2 > gain1)) {
--                        matchLength = ml2, offcode = offset2, start = ip;
-+                        matchLength = ml2, offBase = ofbCandidate, start = ip;
-                         continue;
-             }   }   }
-             break;  /* nothing found : store previous solution */
-         }
- 
-         /* catch up */
--        if (STORED_IS_OFFSET(offcode)) {
--            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
-+        if (OFFBASE_IS_OFFSET(offBase)) {
-+            U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
-             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
-             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
-             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
--            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
-+            offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
-         }
- 
-         /* store sequence */
- _storeSequence:
-         {   size_t const litLength = (size_t)(start - anchor);
--            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
-+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
-             anchor = ip = start + matchLength;
-         }
-+        if (ms->lazySkipping) {
-+            /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
-+            if (searchMethod == search_rowHash) {
-+                ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
-+            }
-+            ms->lazySkipping = 0;
-+        }
- 
-         /* check immediate repcode */
-         while (ip <= ilimit) {
-@@ -2029,8 +2120,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-                 /* repcode detected we should take it */
-                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
--                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset history */
--                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-+                offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset history */
-+                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
-                 ip += matchLength;
-                 anchor = ip;
-                 continue;   /* faster when present ... (?) */
-@@ -2045,8 +2136,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
- }
-+#endif /* build exclusions */
- 
--
-+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
- size_t ZSTD_compressBlock_greedy_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
-@@ -2054,49 +2146,55 @@ size_t ZSTD_compressBlock_greedy_extDict(
-     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
- }
- 
--size_t ZSTD_compressBlock_lazy_extDict(
-+size_t ZSTD_compressBlock_greedy_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
--
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_lazy2_extDict(
-+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- 
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
- }
- 
--size_t ZSTD_compressBlock_btlazy2_extDict(
-+size_t ZSTD_compressBlock_lazy_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- 
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_greedy_extDict_row(
-+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy2_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
-+
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
- }
- 
--size_t ZSTD_compressBlock_lazy_extDict_row(
-+size_t ZSTD_compressBlock_lazy2_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
--
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_lazy2_extDict_row(
-+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_btlazy2_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize)
- 
- {
--    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
-+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
- }
-+#endif
-diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
-index e5bdf4df8dde..22c9201f4e63 100644
---- a/lib/zstd/compress/zstd_lazy.h
-+++ b/lib/zstd/compress/zstd_lazy.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -22,98 +23,175 @@
-  */
- #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
- 
-+#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
-+
-+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
- U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
- void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
- 
- void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
- 
- void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
-+#endif
- 
--size_t ZSTD_compressBlock_btlazy2(
-+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_greedy(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2(
-+size_t ZSTD_compressBlock_greedy_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy(
-+size_t ZSTD_compressBlock_greedy_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy(
-+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2_row(
-+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_row(
-+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_row(
-+size_t ZSTD_compressBlock_greedy_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--
--size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-+size_t ZSTD_compressBlock_greedy_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2_dictMatchState(
-+
-+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
-+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
-+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
-+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
-+#else
-+#define ZSTD_COMPRESSBLOCK_GREEDY NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
-+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
-+#endif
-+
-+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_dictMatchState(
-+size_t ZSTD_compressBlock_lazy_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_dictMatchState(
-+size_t ZSTD_compressBlock_lazy_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--
--size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-+size_t ZSTD_compressBlock_lazy_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-+size_t ZSTD_compressBlock_lazy_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-+
-+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
-+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
-+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
-+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
-+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
-+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
-+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
-+#else
-+#define ZSTD_COMPRESSBLOCK_LAZY NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
-+#endif
-+
-+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_lazy2(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-+size_t ZSTD_compressBlock_lazy2_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-+size_t ZSTD_compressBlock_lazy2_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--
--size_t ZSTD_compressBlock_greedy_extDict(
-+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_extDict(
-+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
- size_t ZSTD_compressBlock_lazy2_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_greedy_extDict_row(
-+size_t ZSTD_compressBlock_lazy2_extDict_row(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy_extDict_row(
-+
-+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
-+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
-+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
-+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
-+#else
-+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
-+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
-+#endif
-+
-+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_btlazy2(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_lazy2_extDict_row(
-+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
- size_t ZSTD_compressBlock_btlazy2_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--        
-+
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
-+#else
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
-+#endif
-+
- 
- 
- #endif /* ZSTD_LAZY_H */
-diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
-index dd86fc83e7dd..07f3bc6437ce 100644
---- a/lib/zstd/compress/zstd_ldm.c
-+++ b/lib/zstd/compress/zstd_ldm.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -242,11 +243,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
-     switch(ms->cParams.strategy)
-     {
-     case ZSTD_fast:
--        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
-+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
-         break;
- 
-     case ZSTD_dfast:
--        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
-+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
-+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
-+#else
-+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
-+#endif
-         break;
- 
-     case ZSTD_greedy:
-@@ -318,7 +323,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
-     }
- }
- 
--static size_t ZSTD_ldm_generateSequences_internal(
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_ldm_generateSequences_internal(
-         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
-         ldmParams_t const* params, void const* src, size_t srcSize)
- {
-@@ -549,7 +556,7 @@ size_t ZSTD_ldm_generateSequences(
-          * the window through early invalidation.
-          * TODO: * Test the chunk size.
-          *       * Try invalidation after the sequence generation and test the
--         *         the offset against maxDist directly.
-+         *         offset against maxDist directly.
-          *
-          * NOTE: Because of dictionaries + sequence splitting we MUST make sure
-          * that any offset used is valid at the END of the sequence, since it may
-@@ -689,7 +696,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-         /* maybeSplitSequence updates rawSeqStore->pos */
-         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
-                                                    (U32)(iend - ip), minMatch);
--        int i;
-         /* End signal */
-         if (sequence.offset == 0)
-             break;
-@@ -702,6 +708,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-         /* Run the block compressor */
-         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
-         {
-+            int i;
-             size_t const newLitLength =
-                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
-             ip += sequence.litLength;
-@@ -711,7 +718,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-             rep[0] = sequence.offset;
-             /* Store the sequence */
-             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
--                          STORE_OFFSET(sequence.offset),
-+                          OFFSET_TO_OFFBASE(sequence.offset),
-                           sequence.matchLength);
-             ip += sequence.matchLength;
-         }
-diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
-index fbc6a5e88fd7..c540731abde7 100644
---- a/lib/zstd/compress/zstd_ldm.h
-+++ b/lib/zstd/compress/zstd_ldm.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
-index 647f865be290..cfccfc46f6f7 100644
---- a/lib/zstd/compress/zstd_ldm_geartab.h
-+++ b/lib/zstd/compress/zstd_ldm_geartab.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
-index fd82acfda62f..a87b66ac8d24 100644
---- a/lib/zstd/compress/zstd_opt.c
-+++ b/lib/zstd/compress/zstd_opt.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -12,11 +13,14 @@
- #include "hist.h"
- #include "zstd_opt.h"
- 
-+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
- 
- #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
- #define ZSTD_MAX_PRICE     (1<<30)
- 
--#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
-+#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
- 
- 
- /*-*************************************
-@@ -26,27 +30,35 @@
- #if 0    /* approximation at bit level (for tests) */
- #  define BITCOST_ACCURACY 0
- #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
--#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
-+#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
- #elif 0  /* fractional bit accuracy (for tests) */
- #  define BITCOST_ACCURACY 8
- #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
--#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
-+#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
- #else    /* opt==approx, ultra==accurate */
- #  define BITCOST_ACCURACY 8
- #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
--#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
-+#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
- #endif
- 
-+/* ZSTD_bitWeight() :
-+ * provide estimated "cost" of a stat in full bits only */
- MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
- {
-     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
- }
- 
-+/* ZSTD_fracWeight() :
-+ * provide fractional-bit "cost" of a stat,
-+ * using linear interpolation approximation */
- MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
- {
-     U32 const stat = rawStat + 1;
-     U32 const hb = ZSTD_highbit32(stat);
-     U32 const BWeight = hb * BITCOST_MULTIPLIER;
-+    /* Fweight was meant for "Fractional weight"
-+     * but it's effectively a value between 1 and 2
-+     * using fixed point arithmetic */
-     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
-     U32 const weight = BWeight + FWeight;
-     assert(hb + BITCOST_ACCURACY < 31);
-@@ -57,7 +69,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
- /* debugging function,
-  * @return price in bytes as fractional value
-  * for debug messages only */
--MEM_STATIC double ZSTD_fCost(U32 price)
-+MEM_STATIC double ZSTD_fCost(int price)
- {
-     return (double)price / (BITCOST_MULTIPLIER*8);
- }
-@@ -88,20 +100,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
-     return total;
- }
- 
--static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
-+typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
-+
-+static U32
-+ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
- {
-     U32 s, sum=0;
--    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
-+    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
-+            (unsigned)lastEltIndex+1, (unsigned)shift );
-     assert(shift < 30);
-     for (s=0; s<lastEltIndex+1; s++) {
--        table[s] = 1 + (table[s] >> shift);
--        sum += table[s];
-+        unsigned const base = base1 ? 1 : (table[s]>0);
-+        unsigned const newStat = base + (table[s] >> shift);
-+        sum += newStat;
-+        table[s] = newStat;
-     }
-     return sum;
- }
- 
- /* ZSTD_scaleStats() :
-- * reduce all elements in table is sum too large
-+ * reduce all elt frequencies in table if sum too large
-  * return the resulting sum of elements */
- static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
- {
-@@ -110,7 +128,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
-     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
-     assert(logTarget < 30);
-     if (factor <= 1) return prevsum;
--    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
-+    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
- }
- 
- /* ZSTD_rescaleFreqs() :
-@@ -129,18 +147,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
-     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
-     optPtr->priceType = zop_dynamic;
- 
--    if (optPtr->litLengthSum == 0) {  /* first block : init */
--        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
--            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
-+    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
-+
-+        /* heuristic: use pre-defined stats for too small inputs */
-+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
-+            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
-             optPtr->priceType = zop_predef;
-         }
- 
-         assert(optPtr->symbolCosts != NULL);
-         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
--            /* huffman table presumed generated by dictionary */
-+
-+            /* huffman stats covering the full value set : table presumed generated by dictionary */
-             optPtr->priceType = zop_dynamic;
- 
-             if (compressedLiterals) {
-+                /* generate literals statistics from huffman table */
-                 unsigned lit;
-                 assert(optPtr->litFreq != NULL);
-                 optPtr->litSum = 0;
-@@ -188,13 +210,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
-                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
-             }   }
- 
--        } else {  /* not a dictionary */
-+        } else {  /* first block, no dictionary */
- 
-             assert(optPtr->litFreq != NULL);
-             if (compressedLiterals) {
-+                /* base initial cost of literals on direct frequency within src */
-                 unsigned lit = MaxLit;
-                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
--                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
-+                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
-             }
- 
-             {   unsigned const baseLLfreqs[MaxLL+1] = {
-@@ -224,10 +247,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
-                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
-             }
- 
--
-         }
- 
--    } else {   /* new block : re-use previous statistics, scaled down */
-+    } else {   /* new block : scale down accumulated statistics */
- 
-         if (compressedLiterals)
-             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
-@@ -246,6 +268,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
-                                 const optState_t* const optPtr,
-                                 int optLevel)
- {
-+    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
-     if (litLength == 0) return 0;
- 
-     if (!ZSTD_compressedLiterals(optPtr))
-@@ -255,11 +278,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
-         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
- 
-     /* dynamic statistics */
--    {   U32 price = litLength * optPtr->litSumBasePrice;
-+    {   U32 price = optPtr->litSumBasePrice * litLength;
-+        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
-         U32 u;
-+        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
-         for (u=0; u < litLength; u++) {
--            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
--            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
-+            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
-+            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
-+            price -= litPrice;
-         }
-         return price;
-     }
-@@ -272,10 +298,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
-     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
-     if (optPtr->priceType == zop_predef)
-         return WEIGHT(litLength, optLevel);
--    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
--     * because it isn't representable in the zstd format. So instead just
--     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
--     * would be all literals.
-+
-+    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
-+     * because it isn't representable in the zstd format.
-+     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
-+     * In such a case, the block would be all literals.
-      */
-     if (litLength == ZSTD_BLOCKSIZE_MAX)
-         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
-@@ -289,24 +316,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
- }
- 
- /* ZSTD_getMatchPrice() :
-- * Provides the cost of the match part (offset + matchLength) of a sequence
-+ * Provides the cost of the match part (offset + matchLength) of a sequence.
-  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
-- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
-+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
-  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
-  */
- FORCE_INLINE_TEMPLATE U32
--ZSTD_getMatchPrice(U32 const offcode,
-+ZSTD_getMatchPrice(U32 const offBase,
-                    U32 const matchLength,
-              const optState_t* const optPtr,
-                    int const optLevel)
- {
-     U32 price;
--    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
-+    U32 const offCode = ZSTD_highbit32(offBase);
-     U32 const mlBase = matchLength - MINMATCH;
-     assert(matchLength >= MINMATCH);
- 
--    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
--        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
-+    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
-+        return WEIGHT(mlBase, optLevel)
-+             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
- 
-     /* dynamic statistics */
-     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
-@@ -325,10 +353,10 @@ ZSTD_getMatchPrice(U32 const offcode,
- }
- 
- /* ZSTD_updateStats() :
-- * assumption : literals + litLengtn <= iend */
-+ * assumption : literals + litLength <= iend */
- static void ZSTD_updateStats(optState_t* const optPtr,
-                              U32 litLength, const BYTE* literals,
--                             U32 offsetCode, U32 matchLength)
-+                             U32 offBase, U32 matchLength)
- {
-     /* literals */
-     if (ZSTD_compressedLiterals(optPtr)) {
-@@ -344,8 +372,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
-         optPtr->litLengthSum++;
-     }
- 
--    /* offset code : expected to follow storeSeq() numeric representation */
--    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
-+    /* offset code : follows storeSeq() numeric representation */
-+    {   U32 const offCode = ZSTD_highbit32(offBase);
-         assert(offCode <= MaxOff);
-         optPtr->offCodeFreq[offCode]++;
-         optPtr->offCodeSum++;
-@@ -379,9 +407,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
- 
- /* Update hashTable3 up to ip (excluded)
-    Assumption : always within prefix (i.e. not within extDict) */
--static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
--                                              U32* nextToUpdate3,
--                                              const BYTE* const ip)
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
-+                                       U32* nextToUpdate3,
-+                                       const BYTE* const ip)
- {
-     U32* const hashTable3 = ms->hashTable3;
-     U32 const hashLog3 = ms->hashLog3;
-@@ -408,7 +438,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
-  * @param ip assumed <= iend-8 .
-  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
-  * @return : nb of positions added */
--static U32 ZSTD_insertBt1(
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_insertBt1(
-                 const ZSTD_matchState_t* ms,
-                 const BYTE* const ip, const BYTE* const iend,
-                 U32 const target,
-@@ -527,6 +559,7 @@ static U32 ZSTD_insertBt1(
- }
- 
- FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- void ZSTD_updateTree_internal(
-                 ZSTD_matchState_t* ms,
-                 const BYTE* const ip, const BYTE* const iend,
-@@ -535,7 +568,7 @@ void ZSTD_updateTree_internal(
-     const BYTE* const base = ms->window.base;
-     U32 const target = (U32)(ip - base);
-     U32 idx = ms->nextToUpdate;
--    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
-+    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
-                 idx, target, dictMode);
- 
-     while(idx < target) {
-@@ -553,15 +586,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
- }
- 
- FORCE_INLINE_TEMPLATE
--U32 ZSTD_insertBtAndGetAllMatches (
--                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
--                    ZSTD_matchState_t* ms,
--                    U32* nextToUpdate3,
--                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
--                    const U32 rep[ZSTD_REP_NUM],
--                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
--                    const U32 lengthToBeat,
--                    U32 const mls /* template */)
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32
-+ZSTD_insertBtAndGetAllMatches (
-+                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
-+                ZSTD_matchState_t* ms,
-+                U32* nextToUpdate3,
-+                const BYTE* const ip, const BYTE* const iLimit,
-+                const ZSTD_dictMode_e dictMode,
-+                const U32 rep[ZSTD_REP_NUM],
-+                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
-+                const U32 lengthToBeat,
-+                const U32 mls /* template */)
- {
-     const ZSTD_compressionParameters* const cParams = &ms->cParams;
-     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
-@@ -644,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
-                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
-                             repCode, ll0, repOffset, repLen);
-                 bestLength = repLen;
--                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
-+                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
-                 matches[mnum].len = (U32)repLen;
-                 mnum++;
-                 if ( (repLen > sufficient_len)
-@@ -673,7 +709,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
-                 bestLength = mlen;
-                 assert(curr > matchIndex3);
-                 assert(mnum==0);  /* no prior solution */
--                matches[0].off = STORE_OFFSET(curr - matchIndex3);
-+                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
-                 matches[0].len = (U32)mlen;
-                 mnum = 1;
-                 if ( (mlen > sufficient_len) |
-@@ -706,13 +742,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
-         }
- 
-         if (matchLength > bestLength) {
--            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
--                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
-+            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
-+                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
-             assert(matchEndIdx > matchIndex);
-             if (matchLength > matchEndIdx - matchIndex)
-                 matchEndIdx = matchIndex + (U32)matchLength;
-             bestLength = matchLength;
--            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
-+            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
-             matches[mnum].len = (U32)matchLength;
-             mnum++;
-             if ( (matchLength > ZSTD_OPT_NUM)
-@@ -754,12 +790,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
- 
-             if (matchLength > bestLength) {
-                 matchIndex = dictMatchIndex + dmsIndexDelta;
--                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
--                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
-+                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
-+                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
-                 if (matchLength > matchEndIdx - matchIndex)
-                     matchEndIdx = matchIndex + (U32)matchLength;
-                 bestLength = matchLength;
--                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
-+                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
-                 matches[mnum].len = (U32)matchLength;
-                 mnum++;
-                 if ( (matchLength > ZSTD_OPT_NUM)
-@@ -792,7 +828,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
-     U32 const ll0,
-     U32 const lengthToBeat);
- 
--FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+U32 ZSTD_btGetAllMatches_internal(
-         ZSTD_match_t* matches,
-         ZSTD_matchState_t* ms,
-         U32* nextToUpdate3,
-@@ -960,7 +998,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
- {
-     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
--    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
-+    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
-     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
- 
-     /* Ensure that current block position is not outside of the match */
-@@ -971,11 +1009,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-     }
- 
-     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
--        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
--        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
--                 candidateOffCode, candidateMatchLength, currPosInBlock);
-+        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
-+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
-+                 candidateOffBase, candidateMatchLength, currPosInBlock);
-         matches[*nbMatches].len = candidateMatchLength;
--        matches[*nbMatches].off = candidateOffCode;
-+        matches[*nbMatches].off = candidateOffBase;
-         (*nbMatches)++;
-     }
- }
-@@ -1011,11 +1049,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
- *  Optimal parser
- *********************************/
- 
--static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
--{
--    return sol.litlen + sol.mlen;
--}
--
- #if 0 /* debug */
- 
- static void
-@@ -1033,7 +1066,13 @@ listStats(const U32* table, int lastEltID)
- 
- #endif
- 
--FORCE_INLINE_TEMPLATE size_t
-+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
-+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
-+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
-+
-+FORCE_INLINE_TEMPLATE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t
- ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                                seqStore_t* seqStore,
-                                U32 rep[ZSTD_REP_NUM],
-@@ -1059,9 +1098,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
- 
-     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
-     ZSTD_match_t* const matches = optStatePtr->matchTable;
--    ZSTD_optimal_t lastSequence;
-+    ZSTD_optimal_t lastStretch;
-     ZSTD_optLdm_t optLdm;
- 
-+    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
-+
-     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
-     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
-     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
-@@ -1082,103 +1123,139 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-             U32 const ll0 = !litlen;
-             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
-             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
--                                              (U32)(ip-istart), (U32)(iend - ip));
--            if (!nbMatches) { ip++; continue; }
-+                                              (U32)(ip-istart), (U32)(iend-ip));
-+            if (!nbMatches) {
-+                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
-+                ip++;
-+                continue;
-+            }
-+
-+            /* Match found: let's store this solution, and eventually find more candidates.
-+             * During this forward pass, @opt is used to store stretches,
-+             * defined as "a match followed by N literals".
-+             * Note how this is different from a Sequence, which is "N literals followed by a match".
-+             * Storing stretches allows us to store different match predecessors
-+             * for each literal position part of a literals run. */
- 
-             /* initialize opt[0] */
--            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
--            opt[0].mlen = 0;  /* means is_a_literal */
-+            opt[0].mlen = 0;  /* there are only literals so far */
-             opt[0].litlen = litlen;
--            /* We don't need to include the actual price of the literals because
--             * it is static for the duration of the forward pass, and is included
--             * in every price. We include the literal length to avoid negative
--             * prices when we subtract the previous literal length.
-+            /* No need to include the actual price of the literals before the first match
-+             * because it is static for the duration of the forward pass, and is included
-+             * in every subsequent price. But, we include the literal length because
-+             * the cost variation of litlen depends on the value of litlen.
-              */
--            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
-+            opt[0].price = LL_PRICE(litlen);
-+            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
-+            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
- 
-             /* large match -> immediate encoding */
-             {   U32 const maxML = matches[nbMatches-1].len;
--                U32 const maxOffcode = matches[nbMatches-1].off;
--                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
--                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
-+                U32 const maxOffBase = matches[nbMatches-1].off;
-+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
-+                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
- 
-                 if (maxML > sufficient_len) {
--                    lastSequence.litlen = litlen;
--                    lastSequence.mlen = maxML;
--                    lastSequence.off = maxOffcode;
--                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
-+                    lastStretch.litlen = 0;
-+                    lastStretch.mlen = maxML;
-+                    lastStretch.off = maxOffBase;
-+                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
-                                 maxML, sufficient_len);
-                     cur = 0;
--                    last_pos = ZSTD_totalLen(lastSequence);
-+                    last_pos = maxML;
-                     goto _shortestPath;
-             }   }
- 
-             /* set prices for first matches starting position == 0 */
-             assert(opt[0].price >= 0);
--            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
--                U32 pos;
-+            {   U32 pos;
-                 U32 matchNb;
-                 for (pos = 1; pos < minMatch; pos++) {
--                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
-+                    opt[pos].price = ZSTD_MAX_PRICE;
-+                    opt[pos].mlen = 0;
-+                    opt[pos].litlen = litlen + pos;
-                 }
-                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
--                    U32 const offcode = matches[matchNb].off;
-+                    U32 const offBase = matches[matchNb].off;
-                     U32 const end = matches[matchNb].len;
-                     for ( ; pos <= end ; pos++ ) {
--                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
--                        U32 const sequencePrice = literalsPrice + matchPrice;
-+                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
-+                        int const sequencePrice = opt[0].price + matchPrice;
-                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
-                                     pos, ZSTD_fCost(sequencePrice));
-                         opt[pos].mlen = pos;
--                        opt[pos].off = offcode;
--                        opt[pos].litlen = litlen;
--                        opt[pos].price = (int)sequencePrice;
--                }   }
-+                        opt[pos].off = offBase;
-+                        opt[pos].litlen = 0; /* end of match */
-+                        opt[pos].price = sequencePrice + LL_PRICE(0);
-+                    }
-+                }
-                 last_pos = pos-1;
-+                opt[pos].price = ZSTD_MAX_PRICE;
-             }
-         }
- 
-         /* check further positions */
-         for (cur = 1; cur <= last_pos; cur++) {
-             const BYTE* const inr = ip + cur;
--            assert(cur < ZSTD_OPT_NUM);
--            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
-+            assert(cur <= ZSTD_OPT_NUM);
-+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
- 
-             /* Fix current position with one literal if cheaper */
--            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
-+            {   U32 const litlen = opt[cur-1].litlen + 1;
-                 int const price = opt[cur-1].price
--                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
--                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
--                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
-+                                + LIT_PRICE(ip+cur-1)
-+                                + LL_INCPRICE(litlen);
-                 assert(price < 1000000000); /* overflow check */
-                 if (price <= opt[cur].price) {
-+                    ZSTD_optimal_t const prevMatch = opt[cur];
-                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
-                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
-                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
--                    opt[cur].mlen = 0;
--                    opt[cur].off = 0;
-+                    opt[cur] = opt[cur-1];
-                     opt[cur].litlen = litlen;
-                     opt[cur].price = price;
-+                    if ( (optLevel >= 1) /* additional check only for higher modes */
-+                      && (prevMatch.litlen == 0) /* replace a match */
-+                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
-+                      && LIKELY(ip + cur < iend)
-+                    ) {
-+                        /* check next position, in case it would be cheaper */
-+                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
-+                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
-+                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
-+                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
-+                        if ( (with1literal < withMoreLiterals)
-+                          && (with1literal < opt[cur+1].price) ) {
-+                            /* update offset history - before it disappears */
-+                            U32 const prev = cur - prevMatch.mlen;
-+                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
-+                            assert(cur >= prevMatch.mlen);
-+                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
-+                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
-+                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
-+                            opt[cur+1] = prevMatch;  /* mlen & offbase */
-+                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
-+                            opt[cur+1].litlen = 1;
-+                            opt[cur+1].price = with1literal;
-+                            if (last_pos < cur+1) last_pos = cur+1;
-+                        }
-+                    }
-                 } else {
--                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
--                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
--                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
-+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
-+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
-                 }
-             }
- 
--            /* Set the repcodes of the current position. We must do it here
--             * because we rely on the repcodes of the 2nd to last sequence being
--             * correct to set the next chunks repcodes during the backward
--             * traversal.
-+            /* Offset history is not updated during match comparison.
-+             * Do it here, now that the match is selected and confirmed.
-              */
-             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
-             assert(cur >= opt[cur].mlen);
--            if (opt[cur].mlen != 0) {
-+            if (opt[cur].litlen == 0) {
-+                /* just finished a match => alter offset history */
-                 U32 const prev = cur - opt[cur].mlen;
--                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
-+                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
-                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
--            } else {
--                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
-             }
- 
-             /* last match must start at a minimum distance of 8 from oend */
-@@ -1188,15 +1265,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
- 
-             if ( (optLevel==0) /*static_test*/
-               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
--                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
-+                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
-                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
-             }
- 
-             assert(opt[cur].price >= 0);
--            {   U32 const ll0 = (opt[cur].mlen != 0);
--                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
--                U32 const previousPrice = (U32)opt[cur].price;
--                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-+            {   U32 const ll0 = (opt[cur].litlen == 0);
-+                int const previousPrice = opt[cur].price;
-+                int const basePrice = previousPrice + LL_PRICE(0);
-                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
-                 U32 matchNb;
- 
-@@ -1208,18 +1284,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                     continue;
-                 }
- 
--                {   U32 const maxML = matches[nbMatches-1].len;
--                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
--                                inr-istart, cur, nbMatches, maxML);
--
--                    if ( (maxML > sufficient_len)
--                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
--                        lastSequence.mlen = maxML;
--                        lastSequence.off = matches[nbMatches-1].off;
--                        lastSequence.litlen = litlen;
--                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
--                        last_pos = cur + ZSTD_totalLen(lastSequence);
--                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
-+                {   U32 const longestML = matches[nbMatches-1].len;
-+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
-+                                inr-istart, cur, nbMatches, longestML);
-+
-+                    if ( (longestML > sufficient_len)
-+                      || (cur + longestML >= ZSTD_OPT_NUM)
-+                      || (ip + cur + longestML >= iend) ) {
-+                        lastStretch.mlen = longestML;
-+                        lastStretch.off = matches[nbMatches-1].off;
-+                        lastStretch.litlen = 0;
-+                        last_pos = cur + longestML;
-                         goto _shortestPath;
-                 }   }
- 
-@@ -1230,20 +1305,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
-                     U32 mlen;
- 
--                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
--                                matchNb, matches[matchNb].off, lastML, litlen);
-+                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
-+                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
- 
-                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
-                         U32 const pos = cur + mlen;
--                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
-+                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
- 
-                         if ((pos > last_pos) || (price < opt[pos].price)) {
-                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
-                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
--                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
-+                            while (last_pos < pos) {
-+                                /* fill empty positions, for future comparisons */
-+                                last_pos++;
-+                                opt[last_pos].price = ZSTD_MAX_PRICE;
-+                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
-+                            }
-                             opt[pos].mlen = mlen;
-                             opt[pos].off = offset;
--                            opt[pos].litlen = litlen;
-+                            opt[pos].litlen = 0;
-                             opt[pos].price = price;
-                         } else {
-                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
-@@ -1251,52 +1331,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
-                         }
-             }   }   }
-+            opt[last_pos+1].price = ZSTD_MAX_PRICE;
-         }  /* for (cur = 1; cur <= last_pos; cur++) */
- 
--        lastSequence = opt[last_pos];
--        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
--        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
-+        lastStretch = opt[last_pos];
-+        assert(cur >= lastStretch.mlen);
-+        cur = last_pos - lastStretch.mlen;
- 
- _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
-         assert(opt[0].mlen == 0);
-+        assert(last_pos >= lastStretch.mlen);
-+        assert(cur == last_pos - lastStretch.mlen);
- 
--        /* Set the next chunk's repcodes based on the repcodes of the beginning
--         * of the last match, and the last sequence. This avoids us having to
--         * update them while traversing the sequences.
--         */
--        if (lastSequence.mlen != 0) {
--            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
--            ZSTD_memcpy(rep, &reps, sizeof(reps));
-+        if (lastStretch.mlen==0) {
-+            /* no solution : all matches have been converted into literals */
-+            assert(lastStretch.litlen == (ip - anchor) + last_pos);
-+            ip += last_pos;
-+            continue;
-+        }
-+        assert(lastStretch.off > 0);
-+
-+        /* Update offset history */
-+        if (lastStretch.litlen == 0) {
-+            /* finishing on a match : update offset history */
-+            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
-+            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
-         } else {
--            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
-+            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
-+            assert(cur >= lastStretch.litlen);
-+            cur -= lastStretch.litlen;
-         }
- 
--        {   U32 const storeEnd = cur + 1;
-+        /* Let's write the shortest path solution.
-+         * It is stored in @opt in reverse order,
-+         * starting from @storeEnd (==cur+2),
-+         * effectively partially @opt overwriting.
-+         * Content is changed too:
-+         * - So far, @opt stored stretches, aka a match followed by literals
-+         * - Now, it will store sequences, aka literals followed by a match
-+         */
-+        {   U32 const storeEnd = cur + 2;
-             U32 storeStart = storeEnd;
--            U32 seqPos = cur;
-+            U32 stretchPos = cur;
- 
-             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
-                         last_pos, cur); (void)last_pos;
--            assert(storeEnd < ZSTD_OPT_NUM);
--            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
--                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
--            opt[storeEnd] = lastSequence;
--            while (seqPos > 0) {
--                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
-+            assert(storeEnd < ZSTD_OPT_SIZE);
-+            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-+                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
-+            if (lastStretch.litlen > 0) {
-+                /* last "sequence" is unfinished: just a bunch of literals */
-+                opt[storeEnd].litlen = lastStretch.litlen;
-+                opt[storeEnd].mlen = 0;
-+                storeStart = storeEnd-1;
-+                opt[storeStart] = lastStretch;
-+            } {
-+                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
-+                storeStart = storeEnd;
-+            }
-+            while (1) {
-+                ZSTD_optimal_t nextStretch = opt[stretchPos];
-+                opt[storeStart].litlen = nextStretch.litlen;
-+                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
-+                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
-+                if (nextStretch.mlen == 0) {
-+                    /* reaching beginning of segment */
-+                    break;
-+                }
-                 storeStart--;
--                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
--                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
--                opt[storeStart] = opt[seqPos];
--                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
-+                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
-+                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
-+                stretchPos -= nextStretch.litlen + nextStretch.mlen;
-             }
- 
-             /* save sequences */
--            DEBUGLOG(6, "sending selected sequences into seqStore")
-+            DEBUGLOG(6, "sending selected sequences into seqStore");
-             {   U32 storePos;
-                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
-                     U32 const llen = opt[storePos].litlen;
-                     U32 const mlen = opt[storePos].mlen;
--                    U32 const offCode = opt[storePos].off;
-+                    U32 const offBase = opt[storePos].off;
-                     U32 const advance = llen + mlen;
-                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
-                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
-@@ -1308,11 +1422,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                     }
- 
-                     assert(anchor + llen <= iend);
--                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
--                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
-+                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
-+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
-                     anchor += advance;
-                     ip = anchor;
-             }   }
-+            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
-+
-+            /* update all costs */
-             ZSTD_setBasePrices(optStatePtr, optLevel);
-         }
-     }   /* while (ip < ilimit) */
-@@ -1320,21 +1437,27 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-     /* Return the last literals size */
-     return (size_t)(iend - anchor);
- }
-+#endif /* build exclusions */
- 
-+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
- static size_t ZSTD_compressBlock_opt0(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
- {
-     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
- }
-+#endif
- 
-+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
- static size_t ZSTD_compressBlock_opt2(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
- {
-     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
- }
-+#endif
- 
-+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
- size_t ZSTD_compressBlock_btopt(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize)
-@@ -1342,20 +1465,23 @@ size_t ZSTD_compressBlock_btopt(
-     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
-     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
- }
-+#endif
- 
- 
- 
- 
-+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
- /* ZSTD_initStats_ultra():
-  * make a first compression pass, just to seed stats with more accurate starting values.
-  * only works on first block, with no dictionary and no ldm.
-- * this function cannot error, hence its contract must be respected.
-+ * this function cannot error out, its narrow contract must be respected.
-  */
--static void
--ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
--                     seqStore_t* seqStore,
--                     U32 rep[ZSTD_REP_NUM],
--               const void* src, size_t srcSize)
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
-+                          seqStore_t* seqStore,
-+                          U32 rep[ZSTD_REP_NUM],
-+                    const void* src, size_t srcSize)
- {
-     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
-     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
-@@ -1368,7 +1494,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
- 
-     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
- 
--    /* invalidate first scan from history */
-+    /* invalidate first scan from history, only keep entropy stats */
-     ZSTD_resetSeqStore(seqStore);
-     ms->window.base -= srcSize;
-     ms->window.dictLimit += (U32)srcSize;
-@@ -1392,10 +1518,10 @@ size_t ZSTD_compressBlock_btultra2(
-     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
-     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
- 
--    /* 2-pass strategy:
-+    /* 2-passes strategy:
-      * this strategy makes a first pass over first block to collect statistics
--     * and seed next round's statistics with it.
--     * After 1st pass, function forgets everything, and starts a new block.
-+     * in order to seed next round's statistics with it.
-+     * After 1st pass, function forgets history, and starts a new block.
-      * Consequently, this can only work if no data has been previously loaded in tables,
-      * aka, no dictionary, no prefix, no ldm preprocessing.
-      * The compression ratio gain is generally small (~0.5% on first block),
-@@ -1404,15 +1530,17 @@ size_t ZSTD_compressBlock_btultra2(
-     if ( (ms->opt.litLengthSum==0)   /* first block */
-       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
-       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
--      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
--      && (srcSize > ZSTD_PREDEF_THRESHOLD)
-+      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
-+      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
-       ) {
-         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
-     }
- 
-     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
- }
-+#endif
- 
-+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
- size_t ZSTD_compressBlock_btopt_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize)
-@@ -1420,18 +1548,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
-     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
- }
- 
--size_t ZSTD_compressBlock_btultra_dictMatchState(
-+size_t ZSTD_compressBlock_btopt_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
-+    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
- }
-+#endif
- 
--size_t ZSTD_compressBlock_btopt_extDict(
-+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_btultra_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         const void* src, size_t srcSize)
- {
--    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
-+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
- }
- 
- size_t ZSTD_compressBlock_btultra_extDict(
-@@ -1440,6 +1570,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
- {
-     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
- }
-+#endif
- 
- /* note : no btultra2 variant for extDict nor dictMatchState,
-  * because btultra2 is not meant to work with dictionaries
-diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h
-index 22b862858ba7..ac1b743d27cd 100644
---- a/lib/zstd/compress/zstd_opt.h
-+++ b/lib/zstd/compress/zstd_opt.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -14,30 +15,40 @@
- 
- #include "zstd_compress_internal.h"
- 
-+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
-+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
- /* used in ZSTD_loadDictionaryContent() */
- void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
-+#endif
- 
-+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
- size_t ZSTD_compressBlock_btopt(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_btultra(
-+size_t ZSTD_compressBlock_btopt_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--size_t ZSTD_compressBlock_btultra2(
-+size_t ZSTD_compressBlock_btopt_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
- 
-+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
-+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
-+#else
-+#define ZSTD_COMPRESSBLOCK_BTOPT NULL
-+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
-+#endif
- 
--size_t ZSTD_compressBlock_btopt_dictMatchState(
-+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
-+size_t ZSTD_compressBlock_btultra(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
- size_t ZSTD_compressBlock_btultra_dictMatchState(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
--
--size_t ZSTD_compressBlock_btopt_extDict(
--        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
--        void const* src, size_t srcSize);
- size_t ZSTD_compressBlock_btultra_extDict(
-         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-         void const* src, size_t srcSize);
-@@ -45,6 +56,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
-         /* note : no btultra2 variant for extDict nor dictMatchState,
-          * because btultra2 is not meant to work with dictionaries
-          * and is only specific for the first block (no prefix) */
-+size_t ZSTD_compressBlock_btultra2(
-+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-+        void const* src, size_t srcSize);
-+
-+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
-+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
-+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
-+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
-+#else
-+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
-+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
-+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
-+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
-+#endif
- 
- 
- #endif /* ZSTD_OPT_H */
-diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
-index 60958afebc41..ac8b87f48f84 100644
---- a/lib/zstd/decompress/huf_decompress.c
-+++ b/lib/zstd/decompress/huf_decompress.c
-@@ -1,7 +1,8 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /* ******************************************************************
-  * huff0 huffman decoder,
-  * part of Finite State Entropy library
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  *
-  *  You can contact the author at :
-  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
-@@ -19,10 +20,10 @@
- #include "../common/compiler.h"
- #include "../common/bitstream.h"  /* BIT_* */
- #include "../common/fse.h"        /* to compress headers */
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include "../common/error_private.h"
- #include "../common/zstd_internal.h"
-+#include "../common/bits.h"       /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
- 
- /* **************************************************************
- *  Constants
-@@ -34,6 +35,12 @@
- *  Macros
- ****************************************************************/
- 
-+#ifdef HUF_DISABLE_FAST_DECODE
-+# define HUF_ENABLE_FAST_DECODE 0
-+#else
-+# define HUF_ENABLE_FAST_DECODE 1
-+#endif
-+
- /* These two optional macros force the use one way or another of the two
-  * Huffman decompression implementations. You can't force in both directions
-  * at the same time.
-@@ -43,27 +50,25 @@
- #error "Cannot force the use of the X1 and X2 decoders at the same time!"
- #endif
- 
--#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
--# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
-+/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
-+ * supported at runtime, so we can add the BMI2 target attribute.
-+ * When it is disabled, we will still get BMI2 if it is enabled statically.
-+ */
-+#if DYNAMIC_BMI2
-+# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
- #else
--# define HUF_ASM_X86_64_BMI2_ATTRS
-+# define HUF_FAST_BMI2_ATTRS
- #endif
- 
- #define HUF_EXTERN_C
- #define HUF_ASM_DECL HUF_EXTERN_C
- 
--#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
-+#if DYNAMIC_BMI2
- # define HUF_NEED_BMI2_FUNCTION 1
- #else
- # define HUF_NEED_BMI2_FUNCTION 0
- #endif
- 
--#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
--# define HUF_NEED_DEFAULT_FUNCTION 1
--#else
--# define HUF_NEED_DEFAULT_FUNCTION 0
--#endif
--
- /* **************************************************************
- *  Error Management
- ****************************************************************/
-@@ -80,6 +85,11 @@
- /* **************************************************************
- *  BMI2 Variant Wrappers
- ****************************************************************/
-+typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
-+                                              const void *cSrc,
-+                                              size_t cSrcSize,
-+                                              const HUF_DTable *DTable);
-+
- #if DYNAMIC_BMI2
- 
- #define HUF_DGEN(fn)                                                        \
-@@ -101,9 +111,9 @@
-     }                                                                       \
-                                                                             \
-     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
--                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-+                     size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
-     {                                                                       \
--        if (bmi2) {                                                         \
-+        if (flags & HUF_flags_bmi2) {                                       \
-             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
-         }                                                                   \
-         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
-@@ -113,9 +123,9 @@
- 
- #define HUF_DGEN(fn)                                                        \
-     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
--                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-+                     size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
-     {                                                                       \
--        (void)bmi2;                                                         \
-+        (void)flags;                                                        \
-         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-     }
- 
-@@ -134,43 +144,66 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
-     return dtd;
- }
- 
--#if ZSTD_ENABLE_ASM_X86_64_BMI2
--
--static size_t HUF_initDStream(BYTE const* ip) {
-+static size_t HUF_initFastDStream(BYTE const* ip) {
-     BYTE const lastByte = ip[7];
--    size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-+    size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
-     size_t const value = MEM_readLEST(ip) | 1;
-     assert(bitsConsumed <= 8);
-+    assert(sizeof(size_t) == 8);
-     return value << bitsConsumed;
- }
-+
-+
-+/*
-+ * The input/output arguments to the Huffman fast decoding loop:
-+ *
-+ * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
-+ * op [in/out] - The output pointers, must be updated to reflect what is written.
-+ * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
-+ * dt [in] - The decoding table.
-+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
-+ *                down to this pointer. It may be below iend[0].
-+ * oend [in] - The end of the output stream. op[3] must not cross oend.
-+ * iend [in] - The end of each input stream. ip[i] may cross iend[i],
-+ *             as long as it is above ilowest, but that indicates corruption.
-+ */
- typedef struct {
-     BYTE const* ip[4];
-     BYTE* op[4];
-     U64 bits[4];
-     void const* dt;
--    BYTE const* ilimit;
-+    BYTE const* ilowest;
-     BYTE* oend;
-     BYTE const* iend[4];
--} HUF_DecompressAsmArgs;
-+} HUF_DecompressFastArgs;
-+
-+typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
- 
- /*
-- * Initializes args for the asm decoding loop.
-- * @returns 0 on success
-- *          1 if the fallback implementation should be used.
-+ * Initializes args for the fast decoding loop.
-+ * @returns 1 on success
-+ *          0 if the fallback implementation should be used.
-  *          Or an error code on failure.
-  */
--static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
-+static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
- {
-     void const* dt = DTable + 1;
-     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
- 
--    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
-+    const BYTE* const istart = (const BYTE*)src;
- 
--    BYTE* const oend = (BYTE*)dst + dstSize;
-+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
- 
--    /* The following condition is false on x32 platform,
--     * but HUF_asm is not compatible with this ABI */
--    if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
-+    /* The fast decoding loop assumes 64-bit little-endian.
-+     * This condition is false on x32.
-+     */
-+    if (!MEM_isLittleEndian() || MEM_32bits())
-+        return 0;
-+
-+    /* Avoid nullptr addition */
-+    if (dstSize == 0)
-+        return 0;
-+    assert(dst != NULL);
- 
-     /* strict minimum : jump table + 1 byte per stream */
-     if (srcSize < 10)
-@@ -181,11 +214,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
-      * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
-      */
-     if (dtLog != HUF_DECODER_FAST_TABLELOG)
--        return 1;
-+        return 0;
- 
-     /* Read the jump table. */
-     {
--        const BYTE* const istart = (const BYTE*)src;
-         size_t const length1 = MEM_readLE16(istart);
-         size_t const length2 = MEM_readLE16(istart+2);
-         size_t const length3 = MEM_readLE16(istart+4);
-@@ -195,13 +227,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
-         args->iend[2] = args->iend[1] + length2;
-         args->iend[3] = args->iend[2] + length3;
- 
--        /* HUF_initDStream() requires this, and this small of an input
-+        /* HUF_initFastDStream() requires this, and this small of an input
-          * won't benefit from the ASM loop anyways.
--         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
--         * starts.
-          */
--        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
--            return 1;
-+        if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
-+            return 0;
-         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
-     }
-     /* ip[] contains the position that is currently loaded into bits[]. */
-@@ -218,7 +248,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
- 
-     /* No point to call the ASM loop for tiny outputs. */
-     if (args->op[3] >= oend)
--        return 1;
-+        return 0;
- 
-     /* bits[] is the bit container.
-         * It is read from the MSB down to the LSB.
-@@ -227,24 +257,25 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
-         * set, so that CountTrailingZeros(bits[]) can be used
-         * to count how many bits we've consumed.
-         */
--    args->bits[0] = HUF_initDStream(args->ip[0]);
--    args->bits[1] = HUF_initDStream(args->ip[1]);
--    args->bits[2] = HUF_initDStream(args->ip[2]);
--    args->bits[3] = HUF_initDStream(args->ip[3]);
--
--    /* If ip[] >= ilimit, it is guaranteed to be safe to
--        * reload bits[]. It may be beyond its section, but is
--        * guaranteed to be valid (>= istart).
--        */
--    args->ilimit = ilimit;
-+    args->bits[0] = HUF_initFastDStream(args->ip[0]);
-+    args->bits[1] = HUF_initFastDStream(args->ip[1]);
-+    args->bits[2] = HUF_initFastDStream(args->ip[2]);
-+    args->bits[3] = HUF_initFastDStream(args->ip[3]);
-+
-+    /* The decoders must be sure to never read beyond ilowest.
-+     * This is lower than iend[0], but allowing decoders to read
-+     * down to ilowest can allow an extra iteration or two in the
-+     * fast loop.
-+     */
-+    args->ilowest = istart;
- 
-     args->oend = oend;
-     args->dt = dt;
- 
--    return 0;
-+    return 1;
- }
- 
--static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
-+static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
- {
-     /* Validate that we haven't overwritten. */
-     if (args->op[stream] > segmentEnd)
-@@ -258,15 +289,33 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
-         return ERROR(corruption_detected);
- 
-     /* Construct the BIT_DStream_t. */
--    bit->bitContainer = MEM_readLE64(args->ip[stream]);
--    bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
--    bit->start = (const char*)args->iend[0];
-+    assert(sizeof(size_t) == 8);
-+    bit->bitContainer = MEM_readLEST(args->ip[stream]);
-+    bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
-+    bit->start = (const char*)args->ilowest;
-     bit->limitPtr = bit->start + sizeof(size_t);
-     bit->ptr = (const char*)args->ip[stream];
- 
-     return 0;
- }
--#endif
-+
-+/* Calls X(N) for each stream 0, 1, 2, 3. */
-+#define HUF_4X_FOR_EACH_STREAM(X) \
-+    do {                          \
-+        X(0);                     \
-+        X(1);                     \
-+        X(2);                     \
-+        X(3);                     \
-+    } while (0)
-+
-+/* Calls X(N, var) for each stream 0, 1, 2, 3. */
-+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
-+    do {                                        \
-+        X(0, (var));                            \
-+        X(1, (var));                            \
-+        X(2, (var));                            \
-+        X(3, (var));                            \
-+    } while (0)
- 
- 
- #ifndef HUF_FORCE_DECOMPRESS_X2
-@@ -283,10 +332,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decodi
- static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
-     U64 D4;
-     if (MEM_isLittleEndian()) {
--        D4 = (symbol << 8) + nbBits;
-+        D4 = (U64)((symbol << 8) + nbBits);
-     } else {
--        D4 = symbol + (nbBits << 8);
-+        D4 = (U64)(symbol + (nbBits << 8));
-     }
-+    assert(D4 < (1U << 16));
-     D4 *= 0x0001000100010001ULL;
-     return D4;
- }
-@@ -329,13 +379,7 @@ typedef struct {
-         BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
- } HUF_ReadDTableX1_Workspace;
- 
--
--size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
--{
--    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
--}
--
--size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
-+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
- {
-     U32 tableLog = 0;
-     U32 nbSymbols = 0;
-@@ -350,7 +394,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
-     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-     /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
- 
--    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
-+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
-     if (HUF_isError(iSize)) return iSize;
- 
- 
-@@ -377,9 +421,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
-      * rankStart[0] is not filled because there are no entries in the table for
-      * weight 0.
-      */
--    {
--        int n;
--        int nextRankStart = 0;
-+    {   int n;
-+        U32 nextRankStart = 0;
-         int const unroll = 4;
-         int const nLimit = (int)nbSymbols - unroll + 1;
-         for (n=0; n<(int)tableLog+1; n++) {
-@@ -406,10 +449,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
-      * We can switch based on the length to a different inner loop which is
-      * optimized for that particular case.
-      */
--    {
--        U32 w;
--        int symbol=wksp->rankVal[0];
--        int rankStart=0;
-+    {   U32 w;
-+        int symbol = wksp->rankVal[0];
-+        int rankStart = 0;
-         for (w=1; w<tableLog+1; ++w) {
-             int const symbolCount = wksp->rankVal[w];
-             int const length = (1 << w) >> 1;
-@@ -483,15 +525,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
- }
- 
- #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
--    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
-+    do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
- 
--#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
--    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
--        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
-+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
-+    do {                                            \
-+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
-+    } while (0)
- 
--#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
--    if (MEM_64bits()) \
--        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
-+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
-+    do {                                            \
-+        if (MEM_64bits())                           \
-+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
-+    } while (0)
- 
- HINT_INLINE size_t
- HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
-@@ -519,7 +565,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
-     while (p < pEnd)
-         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
- 
--    return pEnd-pStart;
-+    return (size_t)(pEnd-pStart);
- }
- 
- FORCE_INLINE_TEMPLATE size_t
-@@ -529,7 +575,7 @@ HUF_decompress1X1_usingDTable_internal_body(
-     const HUF_DTable* DTable)
- {
-     BYTE* op = (BYTE*)dst;
--    BYTE* const oend = op + dstSize;
-+    BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
-     const void* dtPtr = DTable + 1;
-     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
-     BIT_DStream_t bitD;
-@@ -545,6 +591,10 @@ HUF_decompress1X1_usingDTable_internal_body(
-     return dstSize;
- }
- 
-+/* HUF_decompress4X1_usingDTable_internal_body():
-+ * Conditions :
-+ * @dstSize >= 6
-+ */
- FORCE_INLINE_TEMPLATE size_t
- HUF_decompress4X1_usingDTable_internal_body(
-           void* dst,  size_t dstSize,
-@@ -553,6 +603,7 @@ HUF_decompress4X1_usingDTable_internal_body(
- {
-     /* Check */
-     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
-+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
- 
-     {   const BYTE* const istart = (const BYTE*) cSrc;
-         BYTE* const ostart = (BYTE*) dst;
-@@ -588,6 +639,7 @@ HUF_decompress4X1_usingDTable_internal_body(
- 
-         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-+        assert(dstSize >= 6); /* validated above */
-         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-@@ -650,52 +702,173 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
- }
- #endif
- 
--#if HUF_NEED_DEFAULT_FUNCTION
- static
- size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
-                     size_t cSrcSize, HUF_DTable const* DTable) {
-     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
- }
--#endif
- 
- #if ZSTD_ENABLE_ASM_X86_64_BMI2
- 
--HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
-+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
-+
-+#endif
-+
-+static HUF_FAST_BMI2_ATTRS
-+void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
-+{
-+    U64 bits[4];
-+    BYTE const* ip[4];
-+    BYTE* op[4];
-+    U16 const* const dtable = (U16 const*)args->dt;
-+    BYTE* const oend = args->oend;
-+    BYTE const* const ilowest = args->ilowest;
-+
-+    /* Copy the arguments to local variables */
-+    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
-+    ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
-+    ZSTD_memcpy(&op, &args->op, sizeof(op));
-+
-+    assert(MEM_isLittleEndian());
-+    assert(!MEM_32bits());
-+
-+    for (;;) {
-+        BYTE* olimit;
-+        int stream;
-+
-+        /* Assert loop preconditions */
-+#ifndef NDEBUG
-+        for (stream = 0; stream < 4; ++stream) {
-+            assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
-+            assert(ip[stream] >= ilowest);
-+        }
-+#endif
-+        /* Compute olimit */
-+        {
-+            /* Each iteration produces 5 output symbols per stream */
-+            size_t const oiters = (size_t)(oend - op[3]) / 5;
-+            /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
-+             * per stream.
-+             */
-+            size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
-+            /* We can safely run iters iterations before running bounds checks */
-+            size_t const iters = MIN(oiters, iiters);
-+            size_t const symbols = iters * 5;
-+
-+            /* We can simply check that op[3] < olimit, instead of checking all
-+             * of our bounds, since we can't hit the other bounds until we've run
-+             * iters iterations, which only happens when op[3] == olimit.
-+             */
-+            olimit = op[3] + symbols;
-+
-+            /* Exit fast decoding loop once we reach the end. */
-+            if (op[3] == olimit)
-+                break;
-+
-+            /* Exit the decoding loop if any input pointer has crossed the
-+             * previous one. This indicates corruption, and a precondition
-+             * to our loop is that ip[i] >= ip[0].
-+             */
-+            for (stream = 1; stream < 4; ++stream) {
-+                if (ip[stream] < ip[stream - 1])
-+                    goto _out;
-+            }
-+        }
-+
-+#ifndef NDEBUG
-+        for (stream = 1; stream < 4; ++stream) {
-+            assert(ip[stream] >= ip[stream - 1]);
-+        }
-+#endif
-+
-+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
-+    do {                                                        \
-+        int const index = (int)(bits[(_stream)] >> 53);         \
-+        int const entry = (int)dtable[index];                   \
-+        bits[(_stream)] <<= (entry & 0x3F);                     \
-+        op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
-+    } while (0)
-+
-+#define HUF_4X1_RELOAD_STREAM(_stream)                              \
-+    do {                                                            \
-+        int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
-+        int const nbBits = ctz & 7;                                 \
-+        int const nbBytes = ctz >> 3;                               \
-+        op[(_stream)] += 5;                                         \
-+        ip[(_stream)] -= nbBytes;                                   \
-+        bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
-+        bits[(_stream)] <<= nbBits;                                 \
-+    } while (0)
-+
-+        /* Manually unroll the loop because compilers don't consistently
-+         * unroll the inner loops, which destroys performance.
-+         */
-+        do {
-+            /* Decode 5 symbols in each of the 4 streams */
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
-+
-+            /* Reload each of the 4 the bitstreams */
-+            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
-+        } while (op[3] < olimit);
-+
-+#undef HUF_4X1_DECODE_SYMBOL
-+#undef HUF_4X1_RELOAD_STREAM
-+    }
- 
--static HUF_ASM_X86_64_BMI2_ATTRS
-+_out:
-+
-+    /* Save the final values of each of the state variables back to args. */
-+    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
-+    ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
-+    ZSTD_memcpy(&args->op, &op, sizeof(op));
-+}
-+
-+/*
-+ * @returns @p dstSize on success (>= 6)
-+ *          0 if the fallback implementation should be used
-+ *          An error if an error occurred
-+ */
-+static HUF_FAST_BMI2_ATTRS
- size_t
--HUF_decompress4X1_usingDTable_internal_bmi2_asm(
-+HUF_decompress4X1_usingDTable_internal_fast(
-           void* dst,  size_t dstSize,
-     const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable)
-+    const HUF_DTable* DTable,
-+    HUF_DecompressFastLoopFn loopFn)
- {
-     void const* dt = DTable + 1;
--    const BYTE* const iend = (const BYTE*)cSrc + 6;
--    BYTE* const oend = (BYTE*)dst + dstSize;
--    HUF_DecompressAsmArgs args;
--    {
--        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
--        FORWARD_IF_ERROR(ret, "Failed to init asm args");
--        if (ret != 0)
--            return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-+    BYTE const* const ilowest = (BYTE const*)cSrc;
-+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
-+    HUF_DecompressFastArgs args;
-+    {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
-+        FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
-+        if (ret == 0)
-+            return 0;
-     }
- 
--    assert(args.ip[0] >= args.ilimit);
--    HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
-+    assert(args.ip[0] >= args.ilowest);
-+    loopFn(&args);
- 
--    /* Our loop guarantees that ip[] >= ilimit and that we haven't
-+    /* Our loop guarantees that ip[] >= ilowest and that we haven't
-     * overwritten any op[].
-     */
--    assert(args.ip[0] >= iend);
--    assert(args.ip[1] >= iend);
--    assert(args.ip[2] >= iend);
--    assert(args.ip[3] >= iend);
-+    assert(args.ip[0] >= ilowest);
-+    assert(args.ip[0] >= ilowest);
-+    assert(args.ip[1] >= ilowest);
-+    assert(args.ip[2] >= ilowest);
-+    assert(args.ip[3] >= ilowest);
-     assert(args.op[3] <= oend);
--    (void)iend;
-+
-+    assert(ilowest == args.ilowest);
-+    assert(ilowest + 6 == args.iend[0]);
-+    (void)ilowest;
- 
-     /* finish bit streams one by one. */
--    {
--        size_t const segmentSize = (dstSize+3) / 4;
-+    {   size_t const segmentSize = (dstSize+3) / 4;
-         BYTE* segmentEnd = (BYTE*)dst;
-         int i;
-         for (i = 0; i < 4; ++i) {
-@@ -712,97 +885,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
-     }
- 
-     /* decoded size */
-+    assert(dstSize != 0);
-     return dstSize;
- }
--#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
--
--typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
--                                               const void *cSrc,
--                                               size_t cSrcSize,
--                                               const HUF_DTable *DTable);
- 
- HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
- 
- static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
--                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
-+                    size_t cSrcSize, HUF_DTable const* DTable, int flags)
- {
-+    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
-+    HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
-+
- #if DYNAMIC_BMI2
--    if (bmi2) {
-+    if (flags & HUF_flags_bmi2) {
-+        fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
- # if ZSTD_ENABLE_ASM_X86_64_BMI2
--        return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
--# else
--        return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-+        if (!(flags & HUF_flags_disableAsm)) {
-+            loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
-+        }
- # endif
-+    } else {
-+        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
-     }
--#else
--    (void)bmi2;
- #endif
- 
- #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
--    return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
--#else
--    return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
-+    if (!(flags & HUF_flags_disableAsm)) {
-+        loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
-+    }
- #endif
--}
--
--
--size_t HUF_decompress1X1_usingDTable(
--          void* dst,  size_t dstSize,
--    const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable)
--{
--    DTableDesc dtd = HUF_getDTableDesc(DTable);
--    if (dtd.tableType != 0) return ERROR(GENERIC);
--    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--}
- 
--size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
--                                   const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize)
--{
--    const BYTE* ip = (const BYTE*) cSrc;
--
--    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
--    if (HUF_isError(hSize)) return hSize;
--    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
--    ip += hSize; cSrcSize -= hSize;
--
--    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
--}
--
--
--size_t HUF_decompress4X1_usingDTable(
--          void* dst,  size_t dstSize,
--    const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable)
--{
--    DTableDesc dtd = HUF_getDTableDesc(DTable);
--    if (dtd.tableType != 0) return ERROR(GENERIC);
--    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
-+        size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
-+        if (ret != 0)
-+            return ret;
-+    }
-+    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
- }
- 
--static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-+static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                    const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize, int bmi2)
-+                                   void* workSpace, size_t wkspSize, int flags)
- {
-     const BYTE* ip = (const BYTE*) cSrc;
- 
--    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-+    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
-     if (HUF_isError(hSize)) return hSize;
-     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-     ip += hSize; cSrcSize -= hSize;
- 
--    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
--}
--
--size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
--                                   const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize)
--{
--    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
-+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
- }
- 
--
- #endif /* HUF_FORCE_DECOMPRESS_X2 */
- 
- 
-@@ -985,7 +1120,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
- 
- static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
-                            const sortedSymbol_t* sortedList,
--                           const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight,
-+                           const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
-                            const U32 nbBitsBaseline)
- {
-     U32* const rankVal = rankValOrigin[0];
-@@ -1040,14 +1175,7 @@ typedef struct {
- 
- size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
-                        const void* src, size_t srcSize,
--                             void* workSpace, size_t wkspSize)
--{
--    return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
--}
--
--size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
--                       const void* src, size_t srcSize,
--                             void* workSpace, size_t wkspSize, int bmi2)
-+                             void* workSpace, size_t wkspSize, int flags)
- {
-     U32 tableLog, maxW, nbSymbols;
-     DTableDesc dtd = HUF_getDTableDesc(DTable);
-@@ -1069,7 +1197,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
-     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
- 
--    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
-+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
-     if (HUF_isError(iSize)) return iSize;
- 
-     /* check result */
-@@ -1159,15 +1287,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
- }
- 
- #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
--    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-+    do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
- 
--#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
--    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
--        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
-+    do {                                                           \
-+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
-+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
-+    } while (0)
- 
--#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
--    if (MEM_64bits()) \
--        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
-+    do {                                                           \
-+        if (MEM_64bits())                                          \
-+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
-+    } while (0)
- 
- HINT_INLINE size_t
- HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
-@@ -1227,7 +1359,7 @@ HUF_decompress1X2_usingDTable_internal_body(
- 
-     /* decode */
-     {   BYTE* const ostart = (BYTE*) dst;
--        BYTE* const oend = ostart + dstSize;
-+        BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
-         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
-         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
-         DTableDesc const dtd = HUF_getDTableDesc(DTable);
-@@ -1240,6 +1372,11 @@ HUF_decompress1X2_usingDTable_internal_body(
-     /* decoded size */
-     return dstSize;
- }
-+
-+/* HUF_decompress4X2_usingDTable_internal_body():
-+ * Conditions:
-+ * @dstSize >= 6
-+ */
- FORCE_INLINE_TEMPLATE size_t
- HUF_decompress4X2_usingDTable_internal_body(
-           void* dst,  size_t dstSize,
-@@ -1247,6 +1384,7 @@ HUF_decompress4X2_usingDTable_internal_body(
-     const HUF_DTable* DTable)
- {
-     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
-+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
- 
-     {   const BYTE* const istart = (const BYTE*) cSrc;
-         BYTE* const ostart = (BYTE*) dst;
-@@ -1280,8 +1418,9 @@ HUF_decompress4X2_usingDTable_internal_body(
-         DTableDesc const dtd = HUF_getDTableDesc(DTable);
-         U32 const dtLog = dtd.tableLog;
- 
--        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
--        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-+        if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
-+        if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
-+        assert(dstSize >= 6 /* validated above */);
-         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-@@ -1366,44 +1505,191 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
- }
- #endif
- 
--#if HUF_NEED_DEFAULT_FUNCTION
- static
- size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
-                     size_t cSrcSize, HUF_DTable const* DTable) {
-     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
- }
--#endif
- 
- #if ZSTD_ENABLE_ASM_X86_64_BMI2
- 
--HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
-+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
-+
-+#endif
-+
-+static HUF_FAST_BMI2_ATTRS
-+void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
-+{
-+    U64 bits[4];
-+    BYTE const* ip[4];
-+    BYTE* op[4];
-+    BYTE* oend[4];
-+    HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
-+    BYTE const* const ilowest = args->ilowest;
-+
-+    /* Copy the arguments to local registers. */
-+    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
-+    ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
-+    ZSTD_memcpy(&op, &args->op, sizeof(op));
-+
-+    oend[0] = op[1];
-+    oend[1] = op[2];
-+    oend[2] = op[3];
-+    oend[3] = args->oend;
-+
-+    assert(MEM_isLittleEndian());
-+    assert(!MEM_32bits());
-+
-+    for (;;) {
-+        BYTE* olimit;
-+        int stream;
-+
-+        /* Assert loop preconditions */
-+#ifndef NDEBUG
-+        for (stream = 0; stream < 4; ++stream) {
-+            assert(op[stream] <= oend[stream]);
-+            assert(ip[stream] >= ilowest);
-+        }
-+#endif
-+        /* Compute olimit */
-+        {
-+            /* Each loop does 5 table lookups for each of the 4 streams.
-+             * Each table lookup consumes up to 11 bits of input, and produces
-+             * up to 2 bytes of output.
-+             */
-+            /* We can consume up to 7 bytes of input per iteration per stream.
-+             * We also know that each input pointer is >= ip[0]. So we can run
-+             * iters loops before running out of input.
-+             */
-+            size_t iters = (size_t)(ip[0] - ilowest) / 7;
-+            /* Each iteration can produce up to 10 bytes of output per stream.
-+             * Each output stream my advance at different rates. So take the
-+             * minimum number of safe iterations among all the output streams.
-+             */
-+            for (stream = 0; stream < 4; ++stream) {
-+                size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
-+                iters = MIN(iters, oiters);
-+            }
-+
-+            /* Each iteration produces at least 5 output symbols. So until
-+             * op[3] crosses olimit, we know we haven't executed iters
-+             * iterations yet. This saves us maintaining an iters counter,
-+             * at the expense of computing the remaining # of iterations
-+             * more frequently.
-+             */
-+            olimit = op[3] + (iters * 5);
-+
-+            /* Exit the fast decoding loop once we reach the end. */
-+            if (op[3] == olimit)
-+                break;
-+
-+            /* Exit the decoding loop if any input pointer has crossed the
-+             * previous one. This indicates corruption, and a precondition
-+             * to our loop is that ip[i] >= ip[0].
-+             */
-+            for (stream = 1; stream < 4; ++stream) {
-+                if (ip[stream] < ip[stream - 1])
-+                    goto _out;
-+            }
-+        }
-+
-+#ifndef NDEBUG
-+        for (stream = 1; stream < 4; ++stream) {
-+            assert(ip[stream] >= ip[stream - 1]);
-+        }
-+#endif
- 
--static HUF_ASM_X86_64_BMI2_ATTRS size_t
--HUF_decompress4X2_usingDTable_internal_bmi2_asm(
-+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
-+    do {                                                              \
-+        if ((_decode3) || (_stream) != 3) {                           \
-+            int const index = (int)(bits[(_stream)] >> 53);           \
-+            HUF_DEltX2 const entry = dtable[index];                   \
-+            MEM_write16(op[(_stream)], entry.sequence); \
-+            bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
-+            op[(_stream)] += (entry.length);                          \
-+        }                                                             \
-+    } while (0)
-+
-+#define HUF_4X2_RELOAD_STREAM(_stream)                                  \
-+    do {                                                                \
-+        HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
-+        {                                                               \
-+            int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
-+            int const nbBits = ctz & 7;                                 \
-+            int const nbBytes = ctz >> 3;                               \
-+            ip[(_stream)] -= nbBytes;                                   \
-+            bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
-+            bits[(_stream)] <<= nbBits;                                 \
-+        }                                                               \
-+    } while (0)
-+
-+        /* Manually unroll the loop because compilers don't consistently
-+         * unroll the inner loops, which destroys performance.
-+         */
-+        do {
-+            /* Decode 5 symbols from each of the first 3 streams.
-+             * The final stream will be decoded during the reload phase
-+             * to reduce register pressure.
-+             */
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
-+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
-+
-+            /* Decode one symbol from the final stream */
-+            HUF_4X2_DECODE_SYMBOL(3, 1);
-+
-+            /* Decode 4 symbols from the final stream & reload bitstreams.
-+             * The final stream is reloaded last, meaning that all 5 symbols
-+             * are decoded from the final stream before it is reloaded.
-+             */
-+            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
-+        } while (op[3] < olimit);
-+    }
-+
-+#undef HUF_4X2_DECODE_SYMBOL
-+#undef HUF_4X2_RELOAD_STREAM
-+
-+_out:
-+
-+    /* Save the final values of each of the state variables back to args. */
-+    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
-+    ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
-+    ZSTD_memcpy(&args->op, &op, sizeof(op));
-+}
-+
-+
-+static HUF_FAST_BMI2_ATTRS size_t
-+HUF_decompress4X2_usingDTable_internal_fast(
-           void* dst,  size_t dstSize,
-     const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable) {
-+    const HUF_DTable* DTable,
-+    HUF_DecompressFastLoopFn loopFn) {
-     void const* dt = DTable + 1;
--    const BYTE* const iend = (const BYTE*)cSrc + 6;
--    BYTE* const oend = (BYTE*)dst + dstSize;
--    HUF_DecompressAsmArgs args;
-+    const BYTE* const ilowest = (const BYTE*)cSrc;
-+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
-+    HUF_DecompressFastArgs args;
-     {
--        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
-+        size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
-         FORWARD_IF_ERROR(ret, "Failed to init asm args");
--        if (ret != 0)
--            return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-+        if (ret == 0)
-+            return 0;
-     }
- 
--    assert(args.ip[0] >= args.ilimit);
--    HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
-+    assert(args.ip[0] >= args.ilowest);
-+    loopFn(&args);
- 
-     /* note : op4 already verified within main loop */
--    assert(args.ip[0] >= iend);
--    assert(args.ip[1] >= iend);
--    assert(args.ip[2] >= iend);
--    assert(args.ip[3] >= iend);
-+    assert(args.ip[0] >= ilowest);
-+    assert(args.ip[1] >= ilowest);
-+    assert(args.ip[2] >= ilowest);
-+    assert(args.ip[3] >= ilowest);
-     assert(args.op[3] <= oend);
--    (void)iend;
-+
-+    assert(ilowest == args.ilowest);
-+    assert(ilowest + 6 == args.iend[0]);
-+    (void)ilowest;
- 
-     /* finish bitStreams one by one */
-     {
-@@ -1426,91 +1712,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
-     /* decoded size */
-     return dstSize;
- }
--#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
- 
- static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
--                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
-+                    size_t cSrcSize, HUF_DTable const* DTable, int flags)
- {
-+    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
-+    HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
-+
- #if DYNAMIC_BMI2
--    if (bmi2) {
-+    if (flags & HUF_flags_bmi2) {
-+        fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
- # if ZSTD_ENABLE_ASM_X86_64_BMI2
--        return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
--# else
--        return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-+        if (!(flags & HUF_flags_disableAsm)) {
-+            loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
-+        }
- # endif
-+    } else {
-+        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
-     }
--#else
--    (void)bmi2;
- #endif
- 
- #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
--    return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
--#else
--    return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
-+    if (!(flags & HUF_flags_disableAsm)) {
-+        loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
-+    }
- #endif
-+
-+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
-+        size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
-+        if (ret != 0)
-+            return ret;
-+    }
-+    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
- }
- 
- HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
- 
--size_t HUF_decompress1X2_usingDTable(
--          void* dst,  size_t dstSize,
--    const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable)
--{
--    DTableDesc dtd = HUF_getDTableDesc(DTable);
--    if (dtd.tableType != 1) return ERROR(GENERIC);
--    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--}
--
- size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                                    const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize)
-+                                   void* workSpace, size_t wkspSize, int flags)
- {
-     const BYTE* ip = (const BYTE*) cSrc;
- 
-     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
--                                               workSpace, wkspSize);
-+                                               workSpace, wkspSize, flags);
-     if (HUF_isError(hSize)) return hSize;
-     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-     ip += hSize; cSrcSize -= hSize;
- 
--    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
-+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
- }
- 
--
--size_t HUF_decompress4X2_usingDTable(
--          void* dst,  size_t dstSize,
--    const void* cSrc, size_t cSrcSize,
--    const HUF_DTable* DTable)
--{
--    DTableDesc dtd = HUF_getDTableDesc(DTable);
--    if (dtd.tableType != 1) return ERROR(GENERIC);
--    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--}
--
--static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-+static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                    const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize, int bmi2)
-+                                   void* workSpace, size_t wkspSize, int flags)
- {
-     const BYTE* ip = (const BYTE*) cSrc;
- 
-     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
--                                         workSpace, wkspSize);
-+                                         workSpace, wkspSize, flags);
-     if (HUF_isError(hSize)) return hSize;
-     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-     ip += hSize; cSrcSize -= hSize;
- 
--    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
- }
- 
--size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
--                                   const void* cSrc, size_t cSrcSize,
--                                   void* workSpace, size_t wkspSize)
--{
--    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
--}
--
--
- #endif /* HUF_FORCE_DECOMPRESS_X1 */
- 
- 
-@@ -1518,44 +1785,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
- /* Universal decompression selectors */
- /* ***********************************/
- 
--size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
--                                    const void* cSrc, size_t cSrcSize,
--                                    const HUF_DTable* DTable)
--{
--    DTableDesc const dtd = HUF_getDTableDesc(DTable);
--#if defined(HUF_FORCE_DECOMPRESS_X1)
--    (void)dtd;
--    assert(dtd.tableType == 0);
--    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#elif defined(HUF_FORCE_DECOMPRESS_X2)
--    (void)dtd;
--    assert(dtd.tableType == 1);
--    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#else
--    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
--                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#endif
--}
--
--size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
--                                    const void* cSrc, size_t cSrcSize,
--                                    const HUF_DTable* DTable)
--{
--    DTableDesc const dtd = HUF_getDTableDesc(DTable);
--#if defined(HUF_FORCE_DECOMPRESS_X1)
--    (void)dtd;
--    assert(dtd.tableType == 0);
--    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#elif defined(HUF_FORCE_DECOMPRESS_X2)
--    (void)dtd;
--    assert(dtd.tableType == 1);
--    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#else
--    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
--                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
--#endif
--}
--
- 
- #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
- typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
-@@ -1610,36 +1839,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
- #endif
- }
- 
--
--size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
--                                     size_t dstSize, const void* cSrc,
--                                     size_t cSrcSize, void* workSpace,
--                                     size_t wkspSize)
--{
--    /* validation checks */
--    if (dstSize == 0) return ERROR(dstSize_tooSmall);
--    if (cSrcSize == 0) return ERROR(corruption_detected);
--
--    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
--#if defined(HUF_FORCE_DECOMPRESS_X1)
--        (void)algoNb;
--        assert(algoNb == 0);
--        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
--#elif defined(HUF_FORCE_DECOMPRESS_X2)
--        (void)algoNb;
--        assert(algoNb == 1);
--        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
--#else
--        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
--                            cSrcSize, workSpace, wkspSize):
--                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
--#endif
--    }
--}
--
- size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
--                                  void* workSpace, size_t wkspSize)
-+                                  void* workSpace, size_t wkspSize, int flags)
- {
-     /* validation checks */
-     if (dstSize == 0) return ERROR(dstSize_tooSmall);
-@@ -1652,71 +1854,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-         (void)algoNb;
-         assert(algoNb == 0);
-         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
--                                cSrcSize, workSpace, wkspSize);
-+                                cSrcSize, workSpace, wkspSize, flags);
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
-         (void)algoNb;
-         assert(algoNb == 1);
-         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
--                                cSrcSize, workSpace, wkspSize);
-+                                cSrcSize, workSpace, wkspSize, flags);
- #else
-         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
--                                cSrcSize, workSpace, wkspSize):
-+                                cSrcSize, workSpace, wkspSize, flags):
-                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
--                                cSrcSize, workSpace, wkspSize);
-+                                cSrcSize, workSpace, wkspSize, flags);
- #endif
-     }
- }
- 
- 
--size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
- {
-     DTableDesc const dtd = HUF_getDTableDesc(DTable);
- #if defined(HUF_FORCE_DECOMPRESS_X1)
-     (void)dtd;
-     assert(dtd.tableType == 0);
--    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
-     (void)dtd;
-     assert(dtd.tableType == 1);
--    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #else
--    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
--                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
-+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #endif
- }
- 
- #ifndef HUF_FORCE_DECOMPRESS_X2
--size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
- {
-     const BYTE* ip = (const BYTE*) cSrc;
- 
--    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-+    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
-     if (HUF_isError(hSize)) return hSize;
-     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-     ip += hSize; cSrcSize -= hSize;
- 
--    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
- }
- #endif
- 
--size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
- {
-     DTableDesc const dtd = HUF_getDTableDesc(DTable);
- #if defined(HUF_FORCE_DECOMPRESS_X1)
-     (void)dtd;
-     assert(dtd.tableType == 0);
--    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
-     (void)dtd;
-     assert(dtd.tableType == 1);
--    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #else
--    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
--                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
-+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
- #endif
- }
- 
--size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
- {
-     /* validation checks */
-     if (dstSize == 0) return ERROR(dstSize_tooSmall);
-@@ -1726,15 +1928,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
- #if defined(HUF_FORCE_DECOMPRESS_X1)
-         (void)algoNb;
-         assert(algoNb == 0);
--        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
-         (void)algoNb;
-         assert(algoNb == 1);
--        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
- #else
--        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
--                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
-+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
- #endif
-     }
- }
--
-diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
-index dbbc7919de53..30ef65e1ab5c 100644
---- a/lib/zstd/decompress/zstd_ddict.c
-+++ b/lib/zstd/decompress/zstd_ddict.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -14,12 +15,12 @@
- /*-*******************************************************
- *  Dependencies
- *********************************************************/
-+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
- #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
- #include "../common/cpu.h"         /* bmi2 */
- #include "../common/mem.h"         /* low level memory routines */
- #define FSE_STATIC_LINKING_ONLY
- #include "../common/fse.h"
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include "zstd_decompress_internal.h"
- #include "zstd_ddict.h"
-@@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
-         ZSTD_memcpy(internalBuffer, dict, dictSize);
-     }
-     ddict->dictSize = dictSize;
--    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
-+    ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
- 
-     /* parse dictionary content */
-     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
-@@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
- unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
- {
-     if (ddict==NULL) return 0;
--    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
-+    return ddict->dictID;
- }
-diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
-index 8c1a79d666f8..de459a0dacd1 100644
---- a/lib/zstd/decompress/zstd_ddict.h
-+++ b/lib/zstd/decompress/zstd_ddict.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
-index 6b3177c94711..c9cbc45f6ed9 100644
---- a/lib/zstd/decompress/zstd_decompress.c
-+++ b/lib/zstd/decompress/zstd_decompress.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -53,13 +54,15 @@
- *  Dependencies
- *********************************************************/
- #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
-+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
-+#include "../common/error_private.h"
-+#include "../common/zstd_internal.h"  /* blockProperties_t */
- #include "../common/mem.h"         /* low level memory routines */
-+#include "../common/bits.h"  /* ZSTD_highbit32 */
- #define FSE_STATIC_LINKING_ONLY
- #include "../common/fse.h"
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
--#include "../common/zstd_internal.h"  /* blockProperties_t */
- #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
- #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
- #include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
-@@ -72,11 +75,11 @@
-  *************************************/
- 
- #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
--#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
--                                                     * Currently, that means a 0.75 load factor.
--                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
--                                                     * the load factor of the ddict hash set.
--                                                     */
-+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3  /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
-+                                                    * Currently, that means a 0.75 load factor.
-+                                                    * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
-+                                                    * the load factor of the ddict hash set.
-+                                                    */
- 
- #define DDICT_HASHSET_TABLE_BASE_SIZE 64
- #define DDICT_HASHSET_RESIZE_FACTOR 2
-@@ -237,6 +240,8 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
-     dctx->outBufferMode = ZSTD_bm_buffered;
-     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
-     dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
-+    dctx->disableHufAsm = 0;
-+    dctx->maxBlockSizeParam = 0;
- }
- 
- static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
-@@ -253,6 +258,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
-     dctx->streamStage = zdss_init;
-     dctx->noForwardProgress = 0;
-     dctx->oversizedDuration = 0;
-+    dctx->isFrameDecompression = 1;
- #if DYNAMIC_BMI2
-     dctx->bmi2 = ZSTD_cpuSupportsBmi2();
- #endif
-@@ -421,16 +427,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
-  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
-  * @return : 0, `zfhPtr` is correctly filled,
-  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
-- *           or an error code, which can be tested using ZSTD_isError() */
-+**           or an error code, which can be tested using ZSTD_isError() */
- size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
- {
-     const BYTE* ip = (const BYTE*)src;
-     size_t const minInputSize = ZSTD_startingInputLength(format);
- 
--    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
--    if (srcSize < minInputSize) return minInputSize;
--    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
-+    DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
-+
-+    if (srcSize > 0) {
-+        /* note : technically could be considered an assert(), since it's an invalid entry */
-+        RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
-+    }
-+    if (srcSize < minInputSize) {
-+        if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
-+            /* when receiving less than @minInputSize bytes,
-+             * control these bytes at least correspond to a supported magic number
-+             * in order to error out early if they don't.
-+            **/
-+            size_t const toCopy = MIN(4, srcSize);
-+            unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
-+            assert(src != NULL);
-+            ZSTD_memcpy(hbuf, src, toCopy);
-+            if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
-+                /* not a zstd frame : let's check if it's a skippable frame */
-+                MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
-+                ZSTD_memcpy(hbuf, src, toCopy);
-+                if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
-+                    RETURN_ERROR(prefix_unknown,
-+                                "first bytes don't correspond to any supported magic number");
-+        }   }   }
-+        return minInputSize;
-+    }
- 
-+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
-     if ( (format != ZSTD_f_zstd1_magicless)
-       && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
-         if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-@@ -540,61 +570,62 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
-     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
-     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
-                     frameParameter_unsupported, "");
--    {
--        size_t const skippableSize = skippableHeaderSize + sizeU32;
-+    {   size_t const skippableSize = skippableHeaderSize + sizeU32;
-         RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
-         return skippableSize;
-     }
- }
- 
- /*! ZSTD_readSkippableFrame() :
-- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
-+ * Retrieves content of a skippable frame, and writes it to dst buffer.
-  *
-  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
-  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
-  * in the magicVariant.
-  *
-- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
-+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
-  *
-  * @return : number of bytes written or a ZSTD error.
-  */
--ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
--                                            const void* src, size_t srcSize)
-+size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
-+                               unsigned* magicVariant,  /* optional, can be NULL */
-+                         const void* src, size_t srcSize)
- {
--    U32 const magicNumber = MEM_readLE32(src);
--    size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
--    size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
--
--    /* check input validity */
--    RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
--    RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
--    RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
-+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
- 
--    /* deliver payload */
--    if (skippableContentSize > 0  && dst != NULL)
--        ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
--    if (magicVariant != NULL)
--        *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
--    return skippableContentSize;
-+    {   U32 const magicNumber = MEM_readLE32(src);
-+        size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
-+        size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
-+
-+        /* check input validity */
-+        RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
-+        RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
-+        RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
-+
-+        /* deliver payload */
-+        if (skippableContentSize > 0  && dst != NULL)
-+            ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
-+        if (magicVariant != NULL)
-+            *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
-+        return skippableContentSize;
-+    }
- }
- 
- /* ZSTD_findDecompressedSize() :
-- *  compatible with legacy mode
-  *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
-  *      skippable frames
-- *  @return : decompressed size of the frames contained */
-+ *  note: compatible with legacy mode
-+ * @return : decompressed size of the frames contained */
- unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
- {
--    unsigned long long totalDstSize = 0;
-+    U64 totalDstSize = 0;
- 
-     while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
-         U32 const magicNumber = MEM_readLE32(src);
- 
-         if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-             size_t const skippableSize = readSkippableFrameSize(src, srcSize);
--            if (ZSTD_isError(skippableSize)) {
--                return ZSTD_CONTENTSIZE_ERROR;
--            }
-+            if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
-             assert(skippableSize <= srcSize);
- 
-             src = (const BYTE *)src + skippableSize;
-@@ -602,17 +633,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
-             continue;
-         }
- 
--        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
--            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
-+        {   unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
-+            if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
- 
--            /* check for overflow */
--            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
--            totalDstSize += ret;
-+            if (U64_MAX - totalDstSize < fcs)
-+                return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
-+            totalDstSize += fcs;
-         }
-+        /* skip to next frame */
-         {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
--            if (ZSTD_isError(frameSrcSize)) {
--                return ZSTD_CONTENTSIZE_ERROR;
--            }
-+            if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
-+            assert(frameSrcSize <= srcSize);
- 
-             src = (const BYTE *)src + frameSrcSize;
-             srcSize -= frameSrcSize;
-@@ -676,13 +707,13 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
-     return frameSizeInfo;
- }
- 
--static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
-+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
- {
-     ZSTD_frameSizeInfo frameSizeInfo;
-     ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
- 
- 
--    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
-+    if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
-         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
-         assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
-@@ -696,7 +727,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
-         ZSTD_frameHeader zfh;
- 
-         /* Extract Frame Header */
--        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
-+        {   size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
-             if (ZSTD_isError(ret))
-                 return ZSTD_errorFrameSizeInfo(ret);
-             if (ret > 0)
-@@ -730,23 +761,26 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
-             ip += 4;
-         }
- 
-+        frameSizeInfo.nbBlocks = nbBlocks;
-         frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
-         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
-                                         ? zfh.frameContentSize
--                                        : nbBlocks * zfh.blockSizeMax;
-+                                        : (unsigned long long)nbBlocks * zfh.blockSizeMax;
-         return frameSizeInfo;
-     }
- }
- 
-+static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
-+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
-+    return frameSizeInfo.compressedSize;
-+}
-+
- /* ZSTD_findFrameCompressedSize() :
-- *  compatible with legacy mode
-- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
-- *  `srcSize` must be at least as large as the frame contained
-- *  @return : the compressed size of the frame starting at `src` */
-+ * See docs in zstd.h
-+ * Note: compatible with legacy mode */
- size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
- {
--    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
--    return frameSizeInfo.compressedSize;
-+    return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
- }
- 
- /* ZSTD_decompressBound() :
-@@ -760,7 +794,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
-     unsigned long long bound = 0;
-     /* Iterate over each frame */
-     while (srcSize > 0) {
--        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
-+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
-         size_t const compressedSize = frameSizeInfo.compressedSize;
-         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
-         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
-@@ -773,6 +807,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
-     return bound;
- }
- 
-+size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
-+{
-+    size_t margin = 0;
-+    unsigned maxBlockSize = 0;
-+
-+    /* Iterate over each frame */
-+    while (srcSize > 0) {
-+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
-+        size_t const compressedSize = frameSizeInfo.compressedSize;
-+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
-+        ZSTD_frameHeader zfh;
-+
-+        FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
-+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
-+            return ERROR(corruption_detected);
-+
-+        if (zfh.frameType == ZSTD_frame) {
-+            /* Add the frame header to our margin */
-+            margin += zfh.headerSize;
-+            /* Add the checksum to our margin */
-+            margin += zfh.checksumFlag ? 4 : 0;
-+            /* Add 3 bytes per block */
-+            margin += 3 * frameSizeInfo.nbBlocks;
-+
-+            /* Compute the max block size */
-+            maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
-+        } else {
-+            assert(zfh.frameType == ZSTD_skippableFrame);
-+            /* Add the entire skippable frame size to our margin. */
-+            margin += compressedSize;
-+        }
-+
-+        assert(srcSize >= compressedSize);
-+        src = (const BYTE*)src + compressedSize;
-+        srcSize -= compressedSize;
-+    }
-+
-+    /* Add the max block size back to the margin. */
-+    margin += maxBlockSize;
-+
-+    return margin;
-+}
- 
- /*-*************************************************************
-  *   Frame decoding
-@@ -856,6 +932,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
-     }
- 
-+    /* Shrink the blockSizeMax if enabled */
-+    if (dctx->maxBlockSizeParam != 0)
-+        dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
-+
-     /* Loop on each block */
-     while (1) {
-         BYTE* oBlockEnd = oend;
-@@ -888,7 +968,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-         switch(blockProperties.blockType)
-         {
-         case bt_compressed:
--            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
-+            assert(dctx->isFrameDecompression == 1);
-+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
-             break;
-         case bt_raw :
-             /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
-@@ -901,12 +982,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-         default:
-             RETURN_ERROR(corruption_detected, "invalid block type");
-         }
--
--        if (ZSTD_isError(decodedSize)) return decodedSize;
--        if (dctx->validateChecksum)
-+        FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
-+        DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
-+        if (dctx->validateChecksum) {
-             xxh64_update(&dctx->xxhState, op, decodedSize);
--        if (decodedSize != 0)
-+        }
-+        if (decodedSize) /* support dst = NULL,0 */ {
-             op += decodedSize;
-+        }
-         assert(ip != NULL);
-         ip += cBlockSize;
-         remainingSrcSize -= cBlockSize;
-@@ -930,12 +1013,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-     }
-     ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
-     /* Allow caller to get size read */
-+    DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
-     *srcPtr = ip;
-     *srcSizePtr = remainingSrcSize;
-     return (size_t)(op-ostart);
- }
- 
--static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
-+static
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
-+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
-                                         void* dst, size_t dstCapacity,
-                                   const void* src, size_t srcSize,
-                                   const void* dict, size_t dictSize,
-@@ -955,17 +1041,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
-     while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
- 
- 
--        {   U32 const magicNumber = MEM_readLE32(src);
--            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
--                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
-+        if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
-+            U32 const magicNumber = MEM_readLE32(src);
-+            DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
-             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-+                /* skippable frame detected : skip it */
-                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
--                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
-+                FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
-                 assert(skippableSize <= srcSize);
- 
-                 src = (const BYTE *)src + skippableSize;
-                 srcSize -= skippableSize;
--                continue;
-+                continue; /* check next frame */
-         }   }
- 
-         if (ddict) {
-@@ -1061,8 +1148,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
- size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
- 
- /*
-- * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed,
-- * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
-+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
-+ * allow taking a partial block as the input. Currently only raw uncompressed blocks can
-  * be streamed.
-  *
-  * For blocks that can be streamed, this allows us to reduce the latency until we produce
-@@ -1181,7 +1268,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
-             {
-             case bt_compressed:
-                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
--                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
-+                assert(dctx->isFrameDecompression == 1);
-+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
-                 dctx->expected = 0;  /* Streaming not supported */
-                 break;
-             case bt_raw :
-@@ -1250,6 +1338,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
-     case ZSTDds_decodeSkippableHeader:
-         assert(src != NULL);
-         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
-+        assert(dctx->format != ZSTD_f_zstd1_magicless);
-         ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
-         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
-         dctx->stage = ZSTDds_skipFrame;
-@@ -1262,7 +1351,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
- 
-     default:
-         assert(0);   /* impossible */
--        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
-+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
-     }
- }
- 
-@@ -1303,11 +1392,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
-         /* in minimal huffman, we always use X1 variants */
-         size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
-                                                 dictPtr, dictEnd - dictPtr,
--                                                workspace, workspaceSize);
-+                                                workspace, workspaceSize, /* flags */ 0);
- #else
-         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
-                                                 dictPtr, (size_t)(dictEnd - dictPtr),
--                                                workspace, workspaceSize);
-+                                                workspace, workspaceSize, /* flags */ 0);
- #endif
-         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
-         dictPtr += hSize;
-@@ -1403,10 +1492,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
-     dctx->prefixStart = NULL;
-     dctx->virtualStart = NULL;
-     dctx->dictEnd = NULL;
--    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
-+    dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
-     dctx->litEntropy = dctx->fseEntropy = 0;
-     dctx->dictID = 0;
-     dctx->bType = bt_reserved;
-+    dctx->isFrameDecompression = 1;
-     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-     ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
-     dctx->LLTptr = dctx->entropy.LLTable;
-@@ -1465,7 +1555,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
-  *  This could for one of the following reasons :
-  *  - The frame does not require a dictionary (most common case).
-  *  - The frame was built with dictID intentionally removed.
-- *    Needed dictionary is a hidden information.
-+ *    Needed dictionary is a hidden piece of information.
-  *    Note : this use case also happens when using a non-conformant dictionary.
-  *  - `srcSize` is too small, and as a result, frame header could not be decoded.
-  *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
-@@ -1474,7 +1564,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
-  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
- unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
- {
--    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
-+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
-     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
-     if (ZSTD_isError(hError)) return 0;
-     return zfp.dictID;
-@@ -1581,7 +1671,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
- size_t ZSTD_initDStream(ZSTD_DStream* zds)
- {
-     DEBUGLOG(4, "ZSTD_initDStream");
--    return ZSTD_initDStream_usingDDict(zds, NULL);
-+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
-+    FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
-+    return ZSTD_startingInputLength(zds->format);
- }
- 
- /* ZSTD_initDStream_usingDDict() :
-@@ -1589,6 +1681,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
-  * this function cannot fail */
- size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
- {
-+    DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
-     FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
-     FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
-     return ZSTD_startingInputLength(dctx->format);
-@@ -1599,6 +1692,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
-  * this function cannot fail */
- size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
- {
-+    DEBUGLOG(4, "ZSTD_resetDStream");
-     FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
-     return ZSTD_startingInputLength(dctx->format);
- }
-@@ -1670,6 +1764,15 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
-             bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
-             bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
-             return bounds;
-+        case ZSTD_d_disableHuffmanAssembly:
-+            bounds.lowerBound = 0;
-+            bounds.upperBound = 1;
-+            return bounds;
-+        case ZSTD_d_maxBlockSize:
-+            bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
-+            bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
-+            return bounds;
-+
-         default:;
-     }
-     bounds.error = ERROR(parameter_unsupported);
-@@ -1710,6 +1813,12 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
-         case ZSTD_d_refMultipleDDicts:
-             *value = (int)dctx->refMultipleDDicts;
-             return 0;
-+        case ZSTD_d_disableHuffmanAssembly:
-+            *value = (int)dctx->disableHufAsm;
-+            return 0;
-+        case ZSTD_d_maxBlockSize:
-+            *value = dctx->maxBlockSizeParam;
-+            return 0;
-         default:;
-     }
-     RETURN_ERROR(parameter_unsupported, "");
-@@ -1743,6 +1852,14 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
-             }
-             dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
-             return 0;
-+        case ZSTD_d_disableHuffmanAssembly:
-+            CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
-+            dctx->disableHufAsm = value != 0;
-+            return 0;
-+        case ZSTD_d_maxBlockSize:
-+            if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
-+            dctx->maxBlockSizeParam = value;
-+            return 0;
-         default:;
-     }
-     RETURN_ERROR(parameter_unsupported, "");
-@@ -1754,6 +1871,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
-       || (reset == ZSTD_reset_session_and_parameters) ) {
-         dctx->streamStage = zdss_init;
-         dctx->noForwardProgress = 0;
-+        dctx->isFrameDecompression = 1;
-     }
-     if ( (reset == ZSTD_reset_parameters)
-       || (reset == ZSTD_reset_session_and_parameters) ) {
-@@ -1770,11 +1888,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
-     return ZSTD_sizeof_DCtx(dctx);
- }
- 
--size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
-+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
- {
--    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
--    /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
--    unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
-+    size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
-+    /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
-+     * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
-+     * the block at the beginning of the output buffer, and maintain a full window.
-+     *
-+     * We need another blockSize worth of buffer so that we can store split
-+     * literals at the end of the block without overwriting the extDict window.
-+     */
-+    unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
-     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
-     size_t const minRBSize = (size_t) neededSize;
-     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
-@@ -1782,6 +1906,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
-     return minRBSize;
- }
- 
-+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
-+{
-+    return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
-+}
-+
- size_t ZSTD_estimateDStreamSize(size_t windowSize)
- {
-     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-@@ -1918,7 +2047,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                 if (zds->refMultipleDDicts && zds->ddictSet) {
-                     ZSTD_DCtx_selectFrameDDict(zds);
-                 }
--                DEBUGLOG(5, "header size : %u", (U32)hSize);
-                 if (ZSTD_isError(hSize)) {
-                     return hSize;   /* error */
-                 }
-@@ -1932,6 +2060,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                             zds->lhSize += remainingInput;
-                         }
-                         input->pos = input->size;
-+                        /* check first few bytes */
-+                        FORWARD_IF_ERROR(
-+                            ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
-+                            "First few bytes detected incorrect" );
-+                        /* return hint input size */
-                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
-                     }
-                     assert(ip != NULL);
-@@ -1943,14 +2076,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
-                 && zds->fParams.frameType != ZSTD_skippableFrame
-                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
--                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
-+                size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
-                 if (cSize <= (size_t)(iend-istart)) {
-                     /* shortcut : using single-pass mode */
-                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
-                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
--                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
-+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
-+                    assert(istart != NULL);
-                     ip = istart + cSize;
--                    op += decompressedSize;
-+                    op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
-                     zds->expected = 0;
-                     zds->streamStage = zdss_init;
-                     someMoreWork = 0;
-@@ -1969,7 +2103,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-             DEBUGLOG(4, "Consume header");
-             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
- 
--            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
-+            if (zds->format == ZSTD_f_zstd1
-+                && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
-                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
-                 zds->stage = ZSTDds_skipFrame;
-             } else {
-@@ -1985,11 +2120,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
-                             frameParameter_windowTooLarge, "");
-+            if (zds->maxBlockSizeParam != 0)
-+                zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
- 
-             /* Adapt buffer sizes to frame header instructions */
-             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
-                 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
--                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
-+                        ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
-                         : 0;
- 
-                 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
-@@ -2034,6 +2171,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                 }
-                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                     FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
-+                    assert(ip != NULL);
-                     ip += neededInSize;
-                     /* Function modifies the stage so we must break */
-                     break;
-@@ -2048,7 +2186,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                 int const isSkipFrame = ZSTD_isSkipFrame(zds);
-                 size_t loadedSize;
-                 /* At this point we shouldn't be decompressing a block that we can stream. */
--                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
-+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
-                 if (isSkipFrame) {
-                     loadedSize = MIN(toLoad, (size_t)(iend-ip));
-                 } else {
-@@ -2057,8 +2195,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                                     "should never happen");
-                     loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
-                 }
--                ip += loadedSize;
--                zds->inPos += loadedSize;
-+                if (loadedSize != 0) {
-+                    /* ip may be NULL */
-+                    ip += loadedSize;
-+                    zds->inPos += loadedSize;
-+                }
-                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
- 
-                 /* decode loaded input */
-@@ -2068,14 +2209,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-                 break;
-             }
-         case zdss_flush:
--            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
-+            {
-+                size_t const toFlushSize = zds->outEnd - zds->outStart;
-                 size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
--                op += flushedSize;
-+
-+                op = op ? op + flushedSize : op;
-+
-                 zds->outStart += flushedSize;
-                 if (flushedSize == toFlushSize) {  /* flush completed */
-                     zds->streamStage = zdss_read;
-                     if ( (zds->outBuffSize < zds->fParams.frameContentSize)
--                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
-+                        && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
-                         DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
-                                 (int)(zds->outBuffSize - zds->outStart),
-                                 (U32)zds->fParams.blockSizeMax);
-@@ -2089,7 +2233,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
- 
-         default:
-             assert(0);    /* impossible */
--            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
-+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
-     }   }
- 
-     /* result */
-@@ -2102,8 +2246,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
-     if ((ip==istart) && (op==ostart)) {  /* no forward progress */
-         zds->noForwardProgress ++;
-         if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
--            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
--            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
-+            RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
-+            RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
-             assert(0);
-         }
-     } else {
-@@ -2140,11 +2284,17 @@ size_t ZSTD_decompressStream_simpleArgs (
-                             void* dst, size_t dstCapacity, size_t* dstPos,
-                       const void* src, size_t srcSize, size_t* srcPos)
- {
--    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
--    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
--    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
--    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
--    *dstPos = output.pos;
--    *srcPos = input.pos;
--    return cErr;
-+    ZSTD_outBuffer output;
-+    ZSTD_inBuffer  input;
-+    output.dst = dst;
-+    output.size = dstCapacity;
-+    output.pos = *dstPos;
-+    input.src = src;
-+    input.size = srcSize;
-+    input.pos = *srcPos;
-+    {   size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
-+        *dstPos = output.pos;
-+        *srcPos = input.pos;
-+        return cErr;
-+    }
- }
-diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
-index c1913b8e7c89..9fe9a12c8a2c 100644
---- a/lib/zstd/decompress/zstd_decompress_block.c
-+++ b/lib/zstd/decompress/zstd_decompress_block.c
-@@ -1,5 +1,6 @@
-+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -20,12 +21,12 @@
- #include "../common/mem.h"         /* low level memory routines */
- #define FSE_STATIC_LINKING_ONLY
- #include "../common/fse.h"
--#define HUF_STATIC_LINKING_ONLY
- #include "../common/huf.h"
- #include "../common/zstd_internal.h"
- #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
- #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
- #include "zstd_decompress_block.h"
-+#include "../common/bits.h"  /* ZSTD_highbit32 */
- 
- /*_*******************************************************
- *  Macros
-@@ -51,6 +52,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
-  *   Block decoding
-  ***************************************************************/
- 
-+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
-+{
-+    size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
-+    assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
-+    return blockSizeMax;
-+}
-+
- /*! ZSTD_getcBlockSize() :
-  *  Provides the size of compressed block from block header `src` */
- size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
-@@ -73,41 +81,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
- static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
-     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
- {
--    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
--    {
--        /* room for litbuffer to fit without read faulting */
--        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
-+    size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
-+    assert(litSize <= blockSizeMax);
-+    assert(dctx->isFrameDecompression || streaming == not_streaming);
-+    assert(expectedWriteSize <= blockSizeMax);
-+    if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
-+        /* If we aren't streaming, we can just put the literals after the output
-+         * of the current block. We don't need to worry about overwriting the
-+         * extDict of our window, because it doesn't exist.
-+         * So if we have space after the end of the block, just put it there.
-+         */
-+        dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
-         dctx->litBufferEnd = dctx->litBuffer + litSize;
-         dctx->litBufferLocation = ZSTD_in_dst;
--    }
--    else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
--    {
--        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
-+    } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
-+        /* Literals fit entirely within the extra buffer, put them there to avoid
-+         * having to split the literals.
-+         */
-+        dctx->litBuffer = dctx->litExtraBuffer;
-+        dctx->litBufferEnd = dctx->litBuffer + litSize;
-+        dctx->litBufferLocation = ZSTD_not_in_dst;
-+    } else {
-+        assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
-+        /* Literals must be split between the output block and the extra lit
-+         * buffer. We fill the extra lit buffer with the tail of the literals,
-+         * and put the rest of the literals at the end of the block, with
-+         * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
-+         * This MUST not write more than our maxBlockSize beyond dst, because in
-+         * streaming mode, that could overwrite part of our extDict window.
-+         */
-         if (splitImmediately) {
-             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
-             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
-             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
--        }
--        else {
--            /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
-+        } else {
-+            /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
-             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
-             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
-         }
-         dctx->litBufferLocation = ZSTD_split;
--    }
--    else
--    {
--        /* fits entirely within litExtraBuffer, so no split is necessary */
--        dctx->litBuffer = dctx->litExtraBuffer;
--        dctx->litBufferEnd = dctx->litBuffer + litSize;
--        dctx->litBufferLocation = ZSTD_not_in_dst;
-+        assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
-     }
- }
- 
--/* Hidden declaration for fullbench */
--size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
--                          const void* src, size_t srcSize,
--                          void* dst, size_t dstCapacity, const streaming_operation streaming);
- /*! ZSTD_decodeLiteralsBlock() :
-  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
-  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
-@@ -116,7 +132,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-  *
-  * @return : nb of bytes read from src (< srcSize )
-  *  note : symbol not declared but exposed for fullbench */
--size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
-                           void* dst, size_t dstCapacity, const streaming_operation streaming)
- {
-@@ -125,6 +141,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
- 
-     {   const BYTE* const istart = (const BYTE*) src;
-         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
-+        size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
- 
-         switch(litEncType)
-         {
-@@ -134,13 +151,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-             ZSTD_FALLTHROUGH;
- 
-         case set_compressed:
--            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
-+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
-             {   size_t lhSize, litSize, litCSize;
-                 U32 singleStream=0;
-                 U32 const lhlCode = (istart[0] >> 2) & 3;
-                 U32 const lhc = MEM_readLE32(istart);
-                 size_t hufSuccess;
--                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
-+                int const flags = 0
-+                    | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
-+                    | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
-                 switch(lhlCode)
-                 {
-                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
-@@ -164,7 +184,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                     break;
-                 }
-                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
--                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
-+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
-+                if (!singleStream)
-+                    RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
-+                        "Not enough literals (%zu) for the 4-streams mode (min %u)",
-+                        litSize, MIN_LITERALS_FOR_4_STREAMS);
-                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
-                 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
-                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
-@@ -176,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
- 
-                 if (litEncType==set_repeat) {
-                     if (singleStream) {
--                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
-+                        hufSuccess = HUF_decompress1X_usingDTable(
-                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
--                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
-+                            dctx->HUFptr, flags);
-                     } else {
--                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
-+                        assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
-+                        hufSuccess = HUF_decompress4X_usingDTable(
-                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
--                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
-+                            dctx->HUFptr, flags);
-                     }
-                 } else {
-                     if (singleStream) {
-@@ -190,26 +215,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                         hufSuccess = HUF_decompress1X_DCtx_wksp(
-                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                             istart+lhSize, litCSize, dctx->workspace,
--                            sizeof(dctx->workspace));
-+                            sizeof(dctx->workspace), flags);
- #else
--                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
-+                        hufSuccess = HUF_decompress1X1_DCtx_wksp(
-                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                             istart+lhSize, litCSize, dctx->workspace,
--                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
-+                            sizeof(dctx->workspace), flags);
- #endif
-                     } else {
--                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
-+                        hufSuccess = HUF_decompress4X_hufOnly_wksp(
-                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                             istart+lhSize, litCSize, dctx->workspace,
--                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
-+                            sizeof(dctx->workspace), flags);
-                     }
-                 }
-                 if (dctx->litBufferLocation == ZSTD_split)
-                 {
-+                    assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
-                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
-                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
-                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
-                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
-+                    assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
-                 }
- 
-                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
-@@ -224,7 +251,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-         case set_basic:
-             {   size_t litSize, lhSize;
-                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
--                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
-                 switch(lhlCode)
-                 {
-                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
-@@ -237,11 +264,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                     break;
-                 case 3:
-                     lhSize = 3;
-+                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
-                     litSize = MEM_readLE24(istart) >> 4;
-                     break;
-                 }
- 
-                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
-                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
-                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
-                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
-@@ -270,7 +299,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-         case set_rle:
-             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                 size_t litSize, lhSize;
--                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
-                 switch(lhlCode)
-                 {
-                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
-@@ -279,16 +308,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                     break;
-                 case 1:
-                     lhSize = 2;
-+                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
-                     litSize = MEM_readLE16(istart) >> 4;
-                     break;
-                 case 3:
-                     lhSize = 3;
-+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
-                     litSize = MEM_readLE24(istart) >> 4;
--                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
-                     break;
-                 }
-                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
--                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
-+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
-                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
-                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
-                 if (dctx->litBufferLocation == ZSTD_split)
-@@ -310,6 +340,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-     }
- }
- 
-+/* Hidden declaration for fullbench */
-+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
-+                          const void* src, size_t srcSize,
-+                          void* dst, size_t dstCapacity);
-+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
-+                          const void* src, size_t srcSize,
-+                          void* dst, size_t dstCapacity)
-+{
-+    dctx->isFrameDecompression = 0;
-+    return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
-+}
-+
- /* Default FSE distribution tables.
-  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
-  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
-@@ -506,14 +548,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
-                 for (i = 8; i < n; i += 8) {
-                     MEM_write64(spread + pos + i, sv);
-                 }
--                pos += n;
-+                assert(n>=0);
-+                pos += (size_t)n;
-             }
-         }
-         /* Now we spread those positions across the table.
--         * The benefit of doing it in two stages is that we avoid the the
-+         * The benefit of doing it in two stages is that we avoid the
-          * variable size inner loop, which caused lots of branch misses.
-          * Now we can run through all the positions without any branch misses.
--         * We unroll the loop twice, since that is what emperically worked best.
-+         * We unroll the loop twice, since that is what empirically worked best.
-          */
-         {
-             size_t position = 0;
-@@ -540,7 +583,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
-             for (i=0; i<n; i++) {
-                 tableDecode[position].baseValue = s;
-                 position = (position + step) & tableMask;
--                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
-+                while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask;   /* lowprob area */
-         }   }
-         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-     }
-@@ -551,7 +594,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
-         for (u=0; u<tableSize; u++) {
-             U32 const symbol = tableDecode[u].baseValue;
-             U32 const nextState = symbolNext[symbol]++;
--            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
-+            tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
-             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
-             assert(nbAdditionalBits[symbol] < 255);
-             tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
-@@ -664,11 +707,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
- 
-     /* SeqHead */
-     nbSeq = *ip++;
--    if (!nbSeq) {
--        *nbSeqPtr=0;
--        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
--        return 1;
--    }
-     if (nbSeq > 0x7F) {
-         if (nbSeq == 0xFF) {
-             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
-@@ -681,8 +719,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
-     }
-     *nbSeqPtr = nbSeq;
- 
-+    if (nbSeq == 0) {
-+        /* No sequence : section ends immediately */
-+        RETURN_ERROR_IF(ip != iend, corruption_detected,
-+            "extraneous data present in the Sequences section");
-+        return (size_t)(ip - istart);
-+    }
-+
-     /* FSE table descriptors */
-     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
-+    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
-     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
-@@ -829,7 +875,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt
- /* ZSTD_safecopyDstBeforeSrc():
-  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
-  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
--static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
-+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
-     ptrdiff_t const diff = op - ip;
-     BYTE* const oend = op + length;
- 
-@@ -858,6 +904,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length
-  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
-  */
- FORCE_NOINLINE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_execSequenceEnd(BYTE* op,
-     BYTE* const oend, seq_t sequence,
-     const BYTE** litPtr, const BYTE* const litLimit,
-@@ -905,6 +952,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
-  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
-  */
- FORCE_NOINLINE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
-     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
-     const BYTE** litPtr, const BYTE* const litLimit,
-@@ -950,6 +998,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
- }
- 
- HINT_INLINE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_execSequence(BYTE* op,
-     BYTE* const oend, seq_t sequence,
-     const BYTE** litPtr, const BYTE* const litLimit,
-@@ -964,6 +1013,11 @@ size_t ZSTD_execSequence(BYTE* op,
- 
-     assert(op != NULL /* Precondition */);
-     assert(oend_w < oend /* No underflow */);
-+
-+#if defined(__aarch64__)
-+    /* prefetch sequence starting from match that will be used for copy later */
-+    PREFETCH_L1(match);
-+#endif
-     /* Handle edge cases in a slow path:
-      *   - Read beyond end of literals
-      *   - Match end is within WILDCOPY_OVERLIMIT of oend
-@@ -1043,6 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op,
- }
- 
- HINT_INLINE
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
-     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
-     const BYTE** litPtr, const BYTE* const litLimit,
-@@ -1154,7 +1209,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
- }
- 
- /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
-- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
-+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
-  * bits before reloading. This value is the maximum number of bytes we read
-  * after reloading when we are decoding long offsets.
-  */
-@@ -1165,13 +1220,37 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
- 
- typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
- 
-+/*
-+ * ZSTD_decodeSequence():
-+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
-+ *                  only used in 32-bit mode
-+ * @return : Sequence (litL + matchL + offset)
-+ */
- FORCE_INLINE_TEMPLATE seq_t
--ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
- {
-     seq_t seq;
-+    /*
-+     * ZSTD_seqSymbol is a 64 bits wide structure.
-+     * It can be loaded in one operation
-+     * and its fields extracted by simply shifting or bit-extracting on aarch64.
-+     * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
-+     * operations that cause performance drop. This can be avoided by using this
-+     * ZSTD_memcpy hack.
-+     */
-+#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
-+    ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
-+    ZSTD_seqSymbol* const llDInfo = &llDInfoS;
-+    ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
-+    ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
-+    ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
-+    ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
-+    ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
-+#else
-     const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
-     const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
-     const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
-+#endif
-     seq.matchLength = mlDInfo->baseValue;
-     seq.litLength = llDInfo->baseValue;
-     {   U32 const ofBase = ofDInfo->baseValue;
-@@ -1186,28 +1265,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-         U32 const llnbBits = llDInfo->nbBits;
-         U32 const mlnbBits = mlDInfo->nbBits;
-         U32 const ofnbBits = ofDInfo->nbBits;
-+
-+        assert(llBits <= MaxLLBits);
-+        assert(mlBits <= MaxMLBits);
-+        assert(ofBits <= MaxOff);
-         /*
-          * As gcc has better branch and block analyzers, sometimes it is only
--         * valuable to mark likelyness for clang, it gives around 3-4% of
-+         * valuable to mark likeliness for clang, it gives around 3-4% of
-          * performance.
-          */
- 
-         /* sequence */
-         {   size_t offset;
--    #if defined(__clang__)
--            if (LIKELY(ofBits > 1)) {
--    #else
-             if (ofBits > 1) {
--    #endif
-                 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-                 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
--                assert(ofBits <= MaxOff);
-+                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
-+                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
-                 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
--                    U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
-+                    /* Always read extra bits, this keeps the logic simple,
-+                     * avoids branches, and avoids accidentally reading 0 bits.
-+                     */
-+                    U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
-                     offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-                     BIT_reloadDStream(&seqState->DStream);
--                    if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
--                    assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
-+                    offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-                 } else {
-                     offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-                     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
-@@ -1224,7 +1306,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-                 } else {
-                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
-                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
--                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
-+                        temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
-                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
-                         seqState->prevOffset[1] = seqState->prevOffset[0];
-                         seqState->prevOffset[0] = offset = temp;
-@@ -1232,11 +1314,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-             seq.offset = offset;
-         }
- 
--    #if defined(__clang__)
--        if (UNLIKELY(mlBits > 0))
--    #else
-         if (mlBits > 0)
--    #endif
-             seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
- 
-         if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
-@@ -1246,11 +1324,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-         /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
-         ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
- 
--    #if defined(__clang__)
--        if (UNLIKELY(llBits > 0))
--    #else
-         if (llBits > 0)
--    #endif
-             seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
- 
-         if (MEM_32bits())
-@@ -1259,17 +1333,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
-                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
- 
--        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
--        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
--        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
--        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
-+        if (!isLastSeq) {
-+            /* don't update FSE state for last Sequence */
-+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
-+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
-+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
-+            BIT_reloadDStream(&seqState->DStream);
-+        }
-     }
- 
-     return seq;
- }
- 
--#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
--MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
-+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-+#if DEBUGLEVEL >= 1
-+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
- {
-     size_t const windowSize = dctx->fParams.windowSize;
-     /* No dictionary used. */
-@@ -1283,30 +1362,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix
-     /* Dictionary is active. */
-     return 1;
- }
-+#endif
- 
--MEM_STATIC void ZSTD_assertValidSequence(
-+static void ZSTD_assertValidSequence(
-         ZSTD_DCtx const* dctx,
-         BYTE const* op, BYTE const* oend,
-         seq_t const seq,
-         BYTE const* prefixStart, BYTE const* virtualStart)
- {
- #if DEBUGLEVEL >= 1
--    size_t const windowSize = dctx->fParams.windowSize;
--    size_t const sequenceSize = seq.litLength + seq.matchLength;
--    BYTE const* const oLitEnd = op + seq.litLength;
--    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
--            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
--    assert(op <= oend);
--    assert((size_t)(oend - op) >= sequenceSize);
--    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
--    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
--        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
--        /* Offset must be within the dictionary. */
--        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
--        assert(seq.offset <= windowSize + dictSize);
--    } else {
--        /* Offset must be within our window. */
--        assert(seq.offset <= windowSize);
-+    if (dctx->isFrameDecompression) {
-+        size_t const windowSize = dctx->fParams.windowSize;
-+        size_t const sequenceSize = seq.litLength + seq.matchLength;
-+        BYTE const* const oLitEnd = op + seq.litLength;
-+        DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
-+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-+        assert(op <= oend);
-+        assert((size_t)(oend - op) >= sequenceSize);
-+        assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
-+        if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
-+            size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
-+            /* Offset must be within the dictionary. */
-+            assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
-+            assert(seq.offset <= windowSize + dictSize);
-+        } else {
-+            /* Offset must be within our window. */
-+            assert(seq.offset <= windowSize);
-+        }
-     }
- #else
-     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
-@@ -1322,23 +1404,21 @@ DONT_VECTORIZE
- ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
-                                void* dst, size_t maxDstSize,
-                          const void* seqStart, size_t seqSize, int nbSeq,
--                         const ZSTD_longOffset_e isLongOffset,
--                         const int frame)
-+                         const ZSTD_longOffset_e isLongOffset)
- {
-     const BYTE* ip = (const BYTE*)seqStart;
-     const BYTE* const iend = ip + seqSize;
-     BYTE* const ostart = (BYTE*)dst;
--    BYTE* const oend = ostart + maxDstSize;
-+    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
-     BYTE* op = ostart;
-     const BYTE* litPtr = dctx->litPtr;
-     const BYTE* litBufferEnd = dctx->litBufferEnd;
-     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
-     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
-     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
--    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
--    (void)frame;
-+    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
- 
--    /* Regen sequences */
-+    /* Literals are split between internal buffer & output buffer */
-     if (nbSeq) {
-         seqState_t seqState;
-         dctx->fseEntropy = 1;
-@@ -1357,8 +1437,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
-                 BIT_DStream_completed < BIT_DStream_overflow);
- 
-         /* decompress without overrunning litPtr begins */
--        {
--            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-+        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
-             /* Align the decompression loop to 32 + 16 bytes.
-                 *
-                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
-@@ -1420,27 +1499,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
- #endif
- 
-             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
--            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
--                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
-+            for ( ; nbSeq; nbSeq--) {
-+                sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
-+                if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
-+                {   size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
--                assert(!ZSTD_isError(oneSeqSize));
--                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-+                    assert(!ZSTD_isError(oneSeqSize));
-+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
- #endif
--                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
--                    return oneSeqSize;
--                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
--                op += oneSeqSize;
--                if (UNLIKELY(!--nbSeq))
--                    break;
--                BIT_reloadDStream(&(seqState.DStream));
--                sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
--            }
-+                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-+                        return oneSeqSize;
-+                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-+                    op += oneSeqSize;
-+            }   }
-+            DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
- 
-             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
-             if (nbSeq > 0) {
-                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
--                if (leftoverLit)
--                {
-+                DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
-+                if (leftoverLit) {
-                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
-                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
-                     sequence.litLength -= leftoverLit;
-@@ -1449,24 +1527,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
-                 litPtr = dctx->litExtraBuffer;
-                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                 dctx->litBufferLocation = ZSTD_not_in_dst;
--                {
--                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
-+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                     assert(!ZSTD_isError(oneSeqSize));
--                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
- #endif
-                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                         return oneSeqSize;
-                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                     op += oneSeqSize;
--                    if (--nbSeq)
--                        BIT_reloadDStream(&(seqState.DStream));
-                 }
-+                nbSeq--;
-             }
-         }
- 
--        if (nbSeq > 0) /* there is remaining lit from extra buffer */
--        {
-+        if (nbSeq > 0) {
-+            /* there is remaining lit from extra buffer */
- 
- #if defined(__x86_64__)
-             __asm__(".p2align 6");
-@@ -1485,35 +1561,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
- #  endif
- #endif
- 
--            for (; ; ) {
--                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-+            for ( ; nbSeq ; nbSeq--) {
-+                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
-                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                 assert(!ZSTD_isError(oneSeqSize));
--                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-+                ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
- #endif
-                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                     return oneSeqSize;
-                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                 op += oneSeqSize;
--                if (UNLIKELY(!--nbSeq))
--                    break;
--                BIT_reloadDStream(&(seqState.DStream));
-             }
-         }
- 
-         /* check if reached exact end */
-         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
-         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
--        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
-+        DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
-+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
-         /* save reps for next block */
-         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
-     }
- 
-     /* last literal segment */
--    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
--    {
--        size_t const lastLLSize = litBufferEnd - litPtr;
-+    if (dctx->litBufferLocation == ZSTD_split) {
-+        /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
-+        size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
-+        DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
-         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
-         if (op != NULL) {
-             ZSTD_memmove(op, litPtr, lastLLSize);
-@@ -1523,15 +1598,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
-         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-         dctx->litBufferLocation = ZSTD_not_in_dst;
-     }
--    {   size_t const lastLLSize = litBufferEnd - litPtr;
-+    /* copy last literals from internal buffer */
-+    {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
-+        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
-         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
-         if (op != NULL) {
-             ZSTD_memcpy(op, litPtr, lastLLSize);
-             op += lastLLSize;
--        }
--    }
-+    }   }
- 
--    return op-ostart;
-+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
-+    return (size_t)(op - ostart);
- }
- 
- FORCE_INLINE_TEMPLATE size_t
-@@ -1539,21 +1616,19 @@ DONT_VECTORIZE
- ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
-     void* dst, size_t maxDstSize,
-     const void* seqStart, size_t seqSize, int nbSeq,
--    const ZSTD_longOffset_e isLongOffset,
--    const int frame)
-+    const ZSTD_longOffset_e isLongOffset)
- {
-     const BYTE* ip = (const BYTE*)seqStart;
-     const BYTE* const iend = ip + seqSize;
-     BYTE* const ostart = (BYTE*)dst;
--    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
-+    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
-     BYTE* op = ostart;
-     const BYTE* litPtr = dctx->litPtr;
-     const BYTE* const litEnd = litPtr + dctx->litSize;
-     const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
-     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
-     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
--    DEBUGLOG(5, "ZSTD_decompressSequences_body");
--    (void)frame;
-+    DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
- 
-     /* Regen sequences */
-     if (nbSeq) {
-@@ -1568,11 +1643,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
-         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-         assert(dst != NULL);
- 
--        ZSTD_STATIC_ASSERT(
--            BIT_DStream_unfinished < BIT_DStream_completed &&
--            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
--            BIT_DStream_completed < BIT_DStream_overflow);
--
- #if defined(__x86_64__)
-             __asm__(".p2align 6");
-             __asm__("nop");
-@@ -1587,73 +1657,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
- #  endif
- #endif
- 
--        for ( ; ; ) {
--            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-+        for ( ; nbSeq ; nbSeq--) {
-+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
-             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-             assert(!ZSTD_isError(oneSeqSize));
--            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-+            ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
- #endif
-             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                 return oneSeqSize;
-             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-             op += oneSeqSize;
--            if (UNLIKELY(!--nbSeq))
--                break;
--            BIT_reloadDStream(&(seqState.DStream));
-         }
- 
-         /* check if reached exact end */
--        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
--        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
--        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
-+        assert(nbSeq == 0);
-+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
-         /* save reps for next block */
-         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
-     }
- 
-     /* last literal segment */
--    {   size_t const lastLLSize = litEnd - litPtr;
-+    {   size_t const lastLLSize = (size_t)(litEnd - litPtr);
-+        DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
-         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
-         if (op != NULL) {
-             ZSTD_memcpy(op, litPtr, lastLLSize);
-             op += lastLLSize;
--        }
--    }
-+    }   }
- 
--    return op-ostart;
-+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
-+    return (size_t)(op - ostart);
- }
- 
- static size_t
- ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
-                                  void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
--                           const ZSTD_longOffset_e isLongOffset,
--                           const int frame)
-+                           const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- 
- static size_t
- ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
-                                                void* dst, size_t maxDstSize,
-                                          const void* seqStart, size_t seqSize, int nbSeq,
--                                         const ZSTD_longOffset_e isLongOffset,
--                                         const int frame)
-+                                         const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
- 
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
- 
--FORCE_INLINE_TEMPLATE size_t
--ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
-+FORCE_INLINE_TEMPLATE
-+
-+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
-                    const BYTE* const prefixStart, const BYTE* const dictEnd)
- {
-     prefetchPos += sequence.litLength;
-     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
--        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
--                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
-+        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-+         * No consequence though : memory address is only used for prefetching, not for dereferencing */
-+        const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
-         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
-     }
-     return prefetchPos + sequence.matchLength;
-@@ -1668,20 +1735,18 @@ ZSTD_decompressSequencesLong_body(
-                                ZSTD_DCtx* dctx,
-                                void* dst, size_t maxDstSize,
-                          const void* seqStart, size_t seqSize, int nbSeq,
--                         const ZSTD_longOffset_e isLongOffset,
--                         const int frame)
-+                         const ZSTD_longOffset_e isLongOffset)
- {
-     const BYTE* ip = (const BYTE*)seqStart;
-     const BYTE* const iend = ip + seqSize;
-     BYTE* const ostart = (BYTE*)dst;
--    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
-+    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
-     BYTE* op = ostart;
-     const BYTE* litPtr = dctx->litPtr;
-     const BYTE* litBufferEnd = dctx->litBufferEnd;
-     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
-     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
-     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
--    (void)frame;
- 
-     /* Regen sequences */
-     if (nbSeq) {
-@@ -1706,20 +1771,17 @@ ZSTD_decompressSequencesLong_body(
-         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
- 
-         /* prepare in advance */
--        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
--            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-+        for (seqNb=0; seqNb<seqAdvance; seqNb++) {
-+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
-             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-             sequences[seqNb] = sequence;
-         }
--        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
- 
-         /* decompress without stomping litBuffer */
--        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
--            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
--            size_t oneSeqSize;
-+        for (; seqNb < nbSeq; seqNb++) {
-+            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
- 
--            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
--            {
-+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
-                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
-                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                 if (leftoverLit)
-@@ -1732,26 +1794,26 @@ ZSTD_decompressSequencesLong_body(
-                 litPtr = dctx->litExtraBuffer;
-                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                 dctx->litBufferLocation = ZSTD_not_in_dst;
--                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
--                assert(!ZSTD_isError(oneSeqSize));
--                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
-+                    assert(!ZSTD_isError(oneSeqSize));
-+                    ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
- #endif
--                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-+                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
- 
--                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
--                sequences[seqNb & STORED_SEQS_MASK] = sequence;
--                op += oneSeqSize;
--            }
-+                    prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-+                    sequences[seqNb & STORED_SEQS_MASK] = sequence;
-+                    op += oneSeqSize;
-+            }   }
-             else
-             {
-                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
--                oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
-+                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
-                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
-                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                 assert(!ZSTD_isError(oneSeqSize));
--                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
-+                ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
- #endif
-                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
- 
-@@ -1760,17 +1822,15 @@ ZSTD_decompressSequencesLong_body(
-                 op += oneSeqSize;
-             }
-         }
--        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
-+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
- 
-         /* finish queue */
-         seqNb -= seqAdvance;
-         for ( ; seqNb<nbSeq ; seqNb++) {
-             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
--            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
--            {
-+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
-                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
--                if (leftoverLit)
--                {
-+                if (leftoverLit) {
-                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
-                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
-                     sequence->litLength -= leftoverLit;
-@@ -1779,11 +1839,10 @@ ZSTD_decompressSequencesLong_body(
-                 litPtr = dctx->litExtraBuffer;
-                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                 dctx->litBufferLocation = ZSTD_not_in_dst;
--                {
--                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                     assert(!ZSTD_isError(oneSeqSize));
--                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
-+                    ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
- #endif
-                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-                     op += oneSeqSize;
-@@ -1796,7 +1855,7 @@ ZSTD_decompressSequencesLong_body(
-                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
- #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                 assert(!ZSTD_isError(oneSeqSize));
--                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
-+                ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
- #endif
-                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-                 op += oneSeqSize;
-@@ -1808,8 +1867,7 @@ ZSTD_decompressSequencesLong_body(
-     }
- 
-     /* last literal segment */
--    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
--    {
-+    if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
-         size_t const lastLLSize = litBufferEnd - litPtr;
-         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
-         if (op != NULL) {
-@@ -1827,17 +1885,16 @@ ZSTD_decompressSequencesLong_body(
-         }
-     }
- 
--    return op-ostart;
-+    return (size_t)(op - ostart);
- }
- 
- static size_t
- ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
-                                  void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
--                           const ZSTD_longOffset_e isLongOffset,
--                           const int frame)
-+                           const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
- 
-@@ -1851,20 +1908,18 @@ DONT_VECTORIZE
- ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
-                                  void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
--                           const ZSTD_longOffset_e isLongOffset,
--                           const int frame)
-+                           const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- static BMI2_TARGET_ATTRIBUTE size_t
- DONT_VECTORIZE
- ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
-                                  void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
--                           const ZSTD_longOffset_e isLongOffset,
--                           const int frame)
-+                           const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
- 
-@@ -1873,10 +1928,9 @@ static BMI2_TARGET_ATTRIBUTE size_t
- ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
-                                  void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
--                           const ZSTD_longOffset_e isLongOffset,
--                           const int frame)
-+                           const ZSTD_longOffset_e isLongOffset)
- {
--    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
- 
-@@ -1886,37 +1940,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
-                             ZSTD_DCtx* dctx,
-                             void* dst, size_t maxDstSize,
-                             const void* seqStart, size_t seqSize, int nbSeq,
--                            const ZSTD_longOffset_e isLongOffset,
--                            const int frame);
-+                            const ZSTD_longOffset_e isLongOffset);
- 
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
- static size_t
- ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
-                    const void* seqStart, size_t seqSize, int nbSeq,
--                   const ZSTD_longOffset_e isLongOffset,
--                   const int frame)
-+                   const ZSTD_longOffset_e isLongOffset)
- {
-     DEBUGLOG(5, "ZSTD_decompressSequences");
- #if DYNAMIC_BMI2
-     if (ZSTD_DCtx_get_bmi2(dctx)) {
--        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
-     }
- #endif
--    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- static size_t
- ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
-                                  const void* seqStart, size_t seqSize, int nbSeq,
--                                 const ZSTD_longOffset_e isLongOffset,
--                                 const int frame)
-+                                 const ZSTD_longOffset_e isLongOffset)
- {
-     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
- #if DYNAMIC_BMI2
-     if (ZSTD_DCtx_get_bmi2(dctx)) {
--        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
-     }
- #endif
--    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
- 
-@@ -1931,69 +1982,114 @@ static size_t
- ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
-                              void* dst, size_t maxDstSize,
-                              const void* seqStart, size_t seqSize, int nbSeq,
--                             const ZSTD_longOffset_e isLongOffset,
--                             const int frame)
-+                             const ZSTD_longOffset_e isLongOffset)
- {
-     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
- #if DYNAMIC_BMI2
-     if (ZSTD_DCtx_get_bmi2(dctx)) {
--        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
-     }
- #endif
--  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
- }
- #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
- 
- 
-+/*
-+ * @returns The total size of the history referenceable by zstd, including
-+ * both the prefix and the extDict. At @p op any offset larger than this
-+ * is invalid.
-+ */
-+static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
-+{
-+    return (size_t)(op - virtualStart);
-+}
-+
-+typedef struct {
-+    unsigned longOffsetShare;
-+    unsigned maxNbAdditionalBits;
-+} ZSTD_OffsetInfo;
- 
--#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
--    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
--/* ZSTD_getLongOffsetsShare() :
-+/* ZSTD_getOffsetInfo() :
-  * condition : offTable must be valid
-  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
-- *           compared to maximum possible of (1<<OffFSELog) */
--static unsigned
--ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
-+ *           compared to maximum possible of (1<<OffFSELog),
-+ *           as well as the maximum number additional bits required.
-+ */
-+static ZSTD_OffsetInfo
-+ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
- {
--    const void* ptr = offTable;
--    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
--    const ZSTD_seqSymbol* table = offTable + 1;
--    U32 const max = 1 << tableLog;
--    U32 u, total = 0;
--    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
--
--    assert(max <= (1 << OffFSELog));  /* max not too large */
--    for (u=0; u<max; u++) {
--        if (table[u].nbAdditionalBits > 22) total += 1;
-+    ZSTD_OffsetInfo info = {0, 0};
-+    /* If nbSeq == 0, then the offTable is uninitialized, but we have
-+     * no sequences, so both values should be 0.
-+     */
-+    if (nbSeq != 0) {
-+        const void* ptr = offTable;
-+        U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
-+        const ZSTD_seqSymbol* table = offTable + 1;
-+        U32 const max = 1 << tableLog;
-+        U32 u;
-+        DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
-+
-+        assert(max <= (1 << OffFSELog));  /* max not too large */
-+        for (u=0; u<max; u++) {
-+            info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
-+            if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
-+        }
-+
-+        assert(tableLog <= OffFSELog);
-+        info.longOffsetShare <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
-     }
- 
--    assert(tableLog <= OffFSELog);
--    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
-+    return info;
-+}
- 
--    return total;
-+/*
-+ * @returns The maximum offset we can decode in one read of our bitstream, without
-+ * reloading more bits in the middle of the offset bits read. Any offsets larger
-+ * than this must use the long offset decoder.
-+ */
-+static size_t ZSTD_maxShortOffset(void)
-+{
-+    if (MEM_64bits()) {
-+        /* We can decode any offset without reloading bits.
-+         * This might change if the max window size grows.
-+         */
-+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
-+        return (size_t)-1;
-+    } else {
-+        /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
-+         * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
-+         * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
-+         */
-+        size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
-+        size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
-+        assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
-+        return maxOffset;
-+    }
- }
--#endif
- 
- size_t
- ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                               void* dst, size_t dstCapacity,
--                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
-+                        const void* src, size_t srcSize, const streaming_operation streaming)
- {   /* blockType == blockCompressed */
-     const BYTE* ip = (const BYTE*)src;
--    /* isLongOffset must be true if there are long offsets.
--     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
--     * We don't expect that to be the case in 64-bit mode.
--     * In block mode, window size is not known, so we have to be conservative.
--     * (note: but it could be evaluated from current-lowLimit)
--     */
--    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
--    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
--
--    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
-+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
-+
-+    /* Note : the wording of the specification
-+     * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
-+     * This generally does not happen, as it makes little sense,
-+     * since an uncompressed block would feature same size and have no decompression cost.
-+     * Also, note that decoder from reference libzstd before < v1.5.4
-+     * would consider this edge case as an error.
-+     * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
-+     * for broader compatibility with the deployed ecosystem of zstd decoders */
-+    RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
- 
-     /* Decode literals section */
-     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
--        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
-+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
-         if (ZSTD_isError(litCSize)) return litCSize;
-         ip += litCSize;
-         srcSize -= litCSize;
-@@ -2001,6 +2097,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
- 
-     /* Build Decoding Tables */
-     {
-+        /* Compute the maximum block size, which must also work when !frame and fParams are unset.
-+         * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
-+         */
-+        size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
-+        size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
-+        /* isLongOffset must be true if there are long offsets.
-+         * Offsets are long if they are larger than ZSTD_maxShortOffset().
-+         * We don't expect that to be the case in 64-bit mode.
-+         *
-+         * We check here to see if our history is large enough to allow long offsets.
-+         * If it isn't, then we can't possible have (valid) long offsets. If the offset
-+         * is invalid, then it is okay to read it incorrectly.
-+         *
-+         * If isLongOffsets is true, then we will later check our decoding table to see
-+         * if it is even possible to generate long offsets.
-+         */
-+        ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
-         /* These macros control at build-time which decompressor implementation
-          * we use. If neither is defined, we do some inspection and dispatch at
-          * runtime.
-@@ -2008,6 +2121,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-         int usePrefetchDecoder = dctx->ddictIsCold;
-+#else
-+        /* Set to 1 to avoid computing offset info if we don't need to.
-+         * Otherwise this value is ignored.
-+         */
-+        int usePrefetchDecoder = 1;
- #endif
-         int nbSeq;
-         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
-@@ -2015,40 +2133,55 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-         ip += seqHSize;
-         srcSize -= seqHSize;
- 
--        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
-+        RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
-+        RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
-+                "invalid dst");
- 
--#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
--    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
--        if ( !usePrefetchDecoder
--          && (!frame || (dctx->fParams.windowSize > (1<<24)))
--          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
--            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
--            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
--            usePrefetchDecoder = (shareLongOffsets >= minShare);
-+        /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
-+         * compute information about the share of long offsets, and the maximum nbAdditionalBits.
-+         * NOTE: could probably use a larger nbSeq limit
-+         */
-+        if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
-+            ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
-+            if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
-+                /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
-+                 * enough, then we know it is impossible to have too long an offset in this block, so we can
-+                 * use the regular offset decoder.
-+                 */
-+                isLongOffset = ZSTD_lo_isRegularOffset;
-+            }
-+            if (!usePrefetchDecoder) {
-+                U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
-+                usePrefetchDecoder = (info.longOffsetShare >= minShare);
-+            }
-         }
--#endif
- 
-         dctx->ddictIsCold = 0;
- 
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
--        if (usePrefetchDecoder)
-+        if (usePrefetchDecoder) {
-+#else
-+        (void)usePrefetchDecoder;
-+        {
- #endif
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
--            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
- #endif
-+        }
- 
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-         /* else */
-         if (dctx->litBufferLocation == ZSTD_split)
--            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-+            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
-         else
--            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-+            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
- #endif
-     }
- }
- 
- 
-+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
- {
-     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
-@@ -2060,13 +2193,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
- }
- 
- 
--size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
--                            void* dst, size_t dstCapacity,
--                      const void* src, size_t srcSize)
-+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
-+                                       void* dst, size_t dstCapacity,
-+                                 const void* src, size_t srcSize)
- {
-     size_t dSize;
-+    dctx->isFrameDecompression = 0;
-     ZSTD_checkContinuity(dctx, dst, dstCapacity);
--    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
-+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
-+    FORWARD_IF_ERROR(dSize, "");
-     dctx->previousDstEnd = (char*)dst + dSize;
-     return dSize;
- }
-+
-+
-+/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
-+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
-+                            void* dst, size_t dstCapacity,
-+                      const void* src, size_t srcSize)
-+{
-+    return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
-+}
-diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
-index 3d2d57a5d25a..becffbd89364 100644
---- a/lib/zstd/decompress/zstd_decompress_block.h
-+++ b/lib/zstd/decompress/zstd_decompress_block.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -47,7 +48,7 @@ typedef enum {
-  */
- size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                                void* dst, size_t dstCapacity,
--                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
-+                         const void* src, size_t srcSize, const streaming_operation streaming);
- 
- /* ZSTD_buildFSETable() :
-  * generate FSE decoding table for one symbol (ll, ml or off)
-@@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
-                    unsigned tableLog, void* wksp, size_t wkspSize,
-                    int bmi2);
- 
-+/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
-+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
-+                            void* dst, size_t dstCapacity,
-+                      const void* src, size_t srcSize);
-+
- 
- #endif /* ZSTD_DEC_BLOCK_H */
-diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
-index 98102edb6a83..0f02526be774 100644
---- a/lib/zstd/decompress/zstd_decompress_internal.h
-+++ b/lib/zstd/decompress/zstd_decompress_internal.h
-@@ -1,5 +1,6 @@
-+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Yann Collet, Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
- 
- #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
- #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
-+#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
- 
- typedef struct {
-     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
-     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
-     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
--    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
-+    HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)];  /* can accommodate HUF_decompress4X */
-     U32 rep[ZSTD_REP_NUM];
-     U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
- } ZSTD_entropyDTables_t;
-@@ -152,6 +154,7 @@ struct ZSTD_DCtx_s
-     size_t litSize;
-     size_t rleSize;
-     size_t staticSize;
-+    int isFrameDecompression;
- #if DYNAMIC_BMI2 != 0
-     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
- #endif
-@@ -164,6 +167,8 @@ struct ZSTD_DCtx_s
-     ZSTD_dictUses_e dictUses;
-     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
-     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
-+    int disableHufAsm;
-+    int maxBlockSizeParam;
- 
-     /* streaming */
-     ZSTD_dStreamStage streamStage;
-diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
-index a06ca187aab5..8a47eb2a4514 100644
---- a/lib/zstd/decompress_sources.h
-+++ b/lib/zstd/decompress_sources.h
-@@ -1,6 +1,6 @@
- /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c
-index 22686e367e6f..466828e35752 100644
---- a/lib/zstd/zstd_common_module.c
-+++ b/lib/zstd/zstd_common_module.c
-@@ -1,6 +1,6 @@
- // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
- EXPORT_SYMBOL_GPL(ZSTD_isError);
- EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
- EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
--EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
--EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
--EXPORT_SYMBOL_GPL(ZSTD_customFree);
- 
- MODULE_LICENSE("Dual BSD/GPL");
- MODULE_DESCRIPTION("Zstd Common");
-diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
-index 04e1b5c01d9b..8ecf43226af2 100644
---- a/lib/zstd/zstd_compress_module.c
-+++ b/lib/zstd/zstd_compress_module.c
-@@ -1,6 +1,6 @@
- // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
-index f4ed952ed485..7d31518e9d5a 100644
---- a/lib/zstd/zstd_decompress_module.c
-+++ b/lib/zstd/zstd_decompress_module.c
-@@ -1,6 +1,6 @@
- // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
- /*
-- * Copyright (c) Facebook, Inc.
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-  * All rights reserved.
-  *
-  * This source code is licensed under both the BSD-style license (found in the
-@@ -77,7 +77,7 @@ EXPORT_SYMBOL(zstd_init_dstream);
- 
- size_t zstd_reset_dstream(zstd_dstream *dstream)
- {
--	return ZSTD_resetDStream(dstream);
-+	return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
- }
- EXPORT_SYMBOL(zstd_reset_dstream);
- 
--- 
-2.46.0.rc1
diff --git a/patches/cachyos/0003-nvidia.patch b/patches/cachyos/0003-nvidia.patch
deleted file mode 100644
index a06229e..0000000
--- a/patches/cachyos/0003-nvidia.patch
+++ /dev/null
@@ -1,761 +0,0 @@
-From eb7e13baaf58cdede50c060633bdb14bf9603a54 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 3 Jun 2024 15:33:26 +0200
-Subject: [PATCH] Fix 6.10 NVIDIA
-
-Co Authord by Laio Oriel Seman <laioseman@gmail.com>
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- include/linux/mm.h |  4 ++++
- mm/memory.c        | 37 ++++++++++++++++++++++++++++++++++++-
- mm/nommu.c         | 21 +++++++++++++++++++++
- 3 files changed, 61 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 9849dfda44d43..adc5a252da02e 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -2438,6 +2438,10 @@ int
- copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
- int follow_pte(struct vm_area_struct *vma, unsigned long address,
- 	       pte_t **ptepp, spinlock_t **ptlp);
-+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-+	unsigned long *pfn);
-+//int follow_phys(struct vm_area_struct *vma, unsigned long address,
-+//		unsigned int flags, unsigned long *prot, resource_size_t *phys);
- int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
- 			void *buf, int len, int write);
- 
-diff --git a/mm/memory.c b/mm/memory.c
-index 0f47a533014e4..0401d10b3d824 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -5962,7 +5962,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
-  * should be taken for read.
-  *
-- * This function must not be used to modify PTE content.
-+ * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
-+ * it is not a good general-purpose API.
-  *
-  * Return: zero on success, -ve otherwise.
-  */
-@@ -6012,6 +6013,40 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
- }
- EXPORT_SYMBOL_GPL(follow_pte);
- 
-+/**
-+ * follow_pfn - look up PFN at a user virtual address
-+ * @vma: memory mapping
-+ * @address: user virtual address
-+ * @pfn: location to store found PFN
-+ *
-+ * Only IO mappings and raw PFN mappings are allowed.
-+ *
-+ * This function does not allow the caller to read the permissions
-+ * of the PTE.  Do not use it.
-+ *
-+ * Return: zero and the pfn at @pfn on success, -ve otherwise.
-+ */
-+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-+	unsigned long *pfn)
-+{
-+	int ret = -EINVAL;
-+	spinlock_t *ptl;
-+	pte_t *ptep;
-+
-+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-+		return ret;
-+
-+	//ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
-+	ret = follow_pte(vma, address, &ptep, &ptl);
-+	
-+	if (ret)
-+		return ret;
-+	*pfn = pte_pfn(ptep_get(ptep));
-+	pte_unmap_unlock(ptep, ptl);
-+	return 0;
-+}
-+EXPORT_SYMBOL(follow_pfn);
-+
- #ifdef CONFIG_HAVE_IOREMAP_PROT
- /**
-  * generic_access_phys - generic implementation for iomem mmap access
-diff --git a/mm/nommu.c b/mm/nommu.c
-index 7296e775e04e2..8e0deb733bfef 100644
---- a/mm/nommu.c
-+++ b/mm/nommu.c
-@@ -110,6 +110,27 @@ unsigned int kobjsize(const void *objp)
- 	return page_size(page);
- }
- 
-+/**
-+ * follow_pfn - look up PFN at a user virtual address
-+ * @vma: memory mapping
-+ * @address: user virtual address
-+ * @pfn: location to store found PFN
-+ *
-+ * Only IO mappings and raw PFN mappings are allowed.
-+ *
-+ * Returns zero and the pfn at @pfn on success, -ve otherwise.
-+ */
-+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-+	unsigned long *pfn)
-+{
-+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-+		return -EINVAL;
-+
-+	*pfn = address >> PAGE_SHIFT;
-+	return 0;
-+}
-+EXPORT_SYMBOL(follow_pfn);
-+
- void vfree(const void *addr)
- {
- 	kfree(addr);
--- 
-2.45.1
-
---- a/kernel/nvidia-drm/nvidia-drm-drv.c
-+++ b/kernel/nvidia-drm/nvidia-drm-drv.c
-@@ -480,6 +480,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
-         return -ENODEV;
-     }
- 
-+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
-+    /*
-+     * If fbdev is enabled, take modeset ownership now before other DRM clients
-+     * can take master (and thus NVKMS ownership).
-+     */
-+    if (nv_drm_fbdev_module_param) {
-+        if (!nvKms->grabOwnership(pDevice)) {
-+            nvKms->freeDevice(pDevice);
-+            NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
-+            return -EBUSY;
-+        }
-+
-+        nv_dev->hasFramebufferConsole = NV_TRUE;
-+    }
-+#endif
-+
-     mutex_lock(&nv_dev->lock);
- 
-     /* Set NvKmsKapiDevice */
-@@ -590,6 +606,15 @@ static void __nv_drm_unload(struct drm_device *dev)
-         return;
-     }
- 
-+    /* Release modeset ownership if fbdev is enabled */
-+
-+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
-+    if (nv_dev->hasFramebufferConsole) {
-+        drm_atomic_helper_shutdown(dev);
-+        nvKms->releaseOwnership(nv_dev->pDevice);
-+    }
-+#endif
-+
-     cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
-     mutex_lock(&nv_dev->lock);
- 
-@@ -1768,14 +1793,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
-     }
- 
- #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
--    if (nv_drm_fbdev_module_param &&
--        drm_core_check_feature(dev, DRIVER_MODESET)) {
--
--        if (!nvKms->grabOwnership(nv_dev->pDevice)) {
--            NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
--            goto failed_grab_ownership;
--        }
--
-+    if (nv_dev->hasFramebufferConsole) {
-         if (bus_is_pci) {
-             struct pci_dev *pdev = to_pci_dev(device);
- 
-@@ -1786,8 +1804,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
- #endif
-         }
-         drm_fbdev_generic_setup(dev, 32);
--
--        nv_dev->hasFramebufferConsole = NV_TRUE;
-     }
- #endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
- 
-@@ -1798,12 +1814,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
- 
-     return; /* Success */
- 
--#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
--failed_grab_ownership:
--
--    drm_dev_unregister(dev);
--#endif
--
- failed_drm_register:
- 
-     nv_drm_dev_free(dev);
-@@ -1870,12 +1880,6 @@ void nv_drm_remove_devices(void)
-         struct nv_drm_device *next = dev_list->next;
-         struct drm_device *dev = dev_list->dev;
- 
--#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
--        if (dev_list->hasFramebufferConsole) {
--            drm_atomic_helper_shutdown(dev);
--            nvKms->releaseOwnership(dev_list->pDevice);
--        }
--#endif
-         drm_dev_unregister(dev);
-         nv_drm_dev_free(dev);
-
-  From 612740b11c9645e0f0240b3ca5908ef225763bc8 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Thu, 27 Jun 2024 19:46:51 +0200
-Subject: [PATCH] gsp-stutter-fix
-
-We've been having reports of stutter issues in 555 releases related to GSP enablement. On the proprietary driver, NVreg_EnableGpuFirmware=0 makes them go away; on the open driver that's not an option.
-
-So far, we've identified two possible causes here. One is fixed by commit 674c009 below. The other we can't fix/workaround in the kernel modules and requires usermode changes, but commit 8c1c49b should tell us if that path is actually being hit or not.
-
-I've also augmented the logs captured by nvidia-bug-report.sh with some of the info that we found severely lacking in the bug reports so far.
-
-My hope is that folks that have experienced these stutter issues can take these patches, try to reproduce the issue and report back with their findings (and their nvidia-bug-report logs). Many thanks in advance to anyone willing to go the extra mile(s) for us here!
-
-We've unfortunately missed beta2 / 555.52 with this stuff (security fixes can't wait), but here it is early so we don't have to wait on the next release.
----
- kernel-open/nvidia/nv.c                      |  10 +
- src/nvidia/arch/nvalloc/unix/include/osapi.h |   6 -
- src/nvidia/arch/nvalloc/unix/src/escape.c    |  46 ----
- src/nvidia/arch/nvalloc/unix/src/osapi.c     | 230 ++++++++-----------
- src/nvidia/exports_link_command.txt          |   1 -
- src/nvidia/src/kernel/disp/disp_sw.c         |  23 ++
- 6 files changed, 132 insertions(+), 184 deletions(-)
-
-diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c
-index 99792de9..ccef3f29 100644
---- a/kernel-open/nvidia/nv.c
-+++ b/kernel-open/nvidia/nv.c
-@@ -4042,6 +4042,16 @@ int NV_API_CALL nv_get_event(
-     nvidia_event_t *nvet;
-     unsigned long eflags;
- 
-+    //
-+    // Note that the head read/write is not atomic when done outside of the
-+    // spinlock, so this might not be a valid pointer at all. But if we read
-+    // NULL here that means that the value indeed was NULL and we can bail
-+    // early since there's no events. Otherwise, we have to do a proper read
-+    // under a spinlock.
-+    //
-+    if (nvlfp->event_data_head == NULL)
-+        return NV_ERR_GENERIC;
-+
-     NV_SPIN_LOCK_IRQSAVE(&nvlfp->fp_lock, eflags);
- 
-     nvet = nvlfp->event_data_head;
-diff --git a/src/nvidia/arch/nvalloc/unix/include/osapi.h b/src/nvidia/arch/nvalloc/unix/include/osapi.h
-index f91e3aa5..640155e9 100644
---- a/src/nvidia/arch/nvalloc/unix/include/osapi.h
-+++ b/src/nvidia/arch/nvalloc/unix/include/osapi.h
-@@ -121,9 +121,6 @@ NvBool     RmGpuHasIOSpaceEnabled (nv_state_t *);
- void       RmFreeUnusedClients    (nv_state_t *, nv_file_private_t *);
- NV_STATUS  RmIoctl                (nv_state_t *, nv_file_private_t *, NvU32, void *, NvU32);
- 
--NV_STATUS  RmAllocOsEvent         (NvHandle, nv_file_private_t *, NvU32);
--NV_STATUS  RmFreeOsEvent          (NvHandle, NvU32);
--
- void       RmI2cAddGpuPorts(nv_state_t *);
- 
- NV_STATUS  RmInitX86EmuState(OBJGPU *);
-@@ -141,9 +138,6 @@ int        amd_msr_c0011022_incompatible(OBJOS *);
- 
- NV_STATUS  rm_get_adapter_status    (nv_state_t *, NvU32 *);
- 
--NV_STATUS  rm_alloc_os_event        (NvHandle, nv_file_private_t *, NvU32);
--NV_STATUS  rm_free_os_event         (NvHandle, NvU32);
--NV_STATUS  rm_get_event_data        (nv_file_private_t *, NvP64, NvU32 *);
- void       rm_client_free_os_events (NvHandle);
- 
- NV_STATUS  rm_create_mmap_context   (NvHandle, NvHandle, NvHandle, NvP64, NvU64, NvU64, NvU32, NvU32);
-diff --git a/src/nvidia/arch/nvalloc/unix/src/escape.c b/src/nvidia/arch/nvalloc/unix/src/escape.c
-index de099513..1046b19f 100644
---- a/src/nvidia/arch/nvalloc/unix/src/escape.c
-+++ b/src/nvidia/arch/nvalloc/unix/src/escape.c
-@@ -677,52 +677,6 @@ NV_STATUS RmIoctl(
-             break;
-         }
- 
--        case NV_ESC_ALLOC_OS_EVENT:
--        {
--            nv_ioctl_alloc_os_event_t *pApi = data;
--
--            if (dataSize != sizeof(nv_ioctl_alloc_os_event_t))
--            {
--                rmStatus = NV_ERR_INVALID_ARGUMENT;
--                goto done;
--            }
--
--            pApi->Status = rm_alloc_os_event(pApi->hClient,
--                                             nvfp,
--                                             pApi->fd);
--            break;
--        }
--
--        case NV_ESC_FREE_OS_EVENT:
--        {
--            nv_ioctl_free_os_event_t *pApi = data;
--
--            if (dataSize != sizeof(nv_ioctl_free_os_event_t))
--            {
--                rmStatus = NV_ERR_INVALID_ARGUMENT;
--                goto done;
--            }
--
--            pApi->Status = rm_free_os_event(pApi->hClient, pApi->fd);
--            break;
--        }
--
--        case NV_ESC_RM_GET_EVENT_DATA:
--        {
--            NVOS41_PARAMETERS *pApi = data;
--
--            if (dataSize != sizeof(NVOS41_PARAMETERS))
--            {
--                rmStatus = NV_ERR_INVALID_ARGUMENT;
--                goto done;
--            }
--
--            pApi->status = rm_get_event_data(nvfp,
--                                             pApi->pEvent,
--                                             &pApi->MoreEvents);
--            break;
--        }
--
-         case NV_ESC_STATUS_CODE:
-         {
-             nv_state_t *pNv;
-diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c
-index fd312466..51249750 100644
---- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
-+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
-@@ -25,6 +25,7 @@
- 
- #include <nv_ref.h>
- #include <nv.h>
-+#include <nv_escape.h>
- #include <nv-priv.h>
- #include <os/os.h>
- #include <osapi.h>
-@@ -406,6 +407,39 @@ static void free_os_events(
-     portSyncSpinlockRelease(nv->event_spinlock);
- }
- 
-+static NV_STATUS get_os_event_data(
-+    nv_file_private_t  *nvfp,
-+    NvP64               pEvent,
-+    NvU32              *MoreEvents
-+)
-+{
-+    nv_event_t        nv_event;
-+    NvUnixEvent      *nv_unix_event;
-+    NV_STATUS         status;
-+
-+    status = os_alloc_mem((void**)&nv_unix_event, sizeof(NvUnixEvent));
-+    if (status != NV_OK)
-+        return status;
-+
-+    status = nv_get_event(nvfp, &nv_event, MoreEvents);
-+    if (status != NV_OK)
-+    {
-+        status = NV_ERR_OPERATING_SYSTEM;
-+        goto done;
-+    }
-+
-+    os_mem_set(nv_unix_event, 0, sizeof(NvUnixEvent));
-+    nv_unix_event->hObject     = nv_event.hObject;
-+    nv_unix_event->NotifyIndex = nv_event.index;
-+    nv_unix_event->info32      = nv_event.info32;
-+    nv_unix_event->info16      = nv_event.info16;
-+
-+    status = os_memcpy_to_user(NvP64_VALUE(pEvent), nv_unix_event, sizeof(NvUnixEvent));
-+done:
-+    os_free_mem(nv_unix_event);
-+    return status;
-+}
-+
- void rm_client_free_os_events(
-     NvHandle client
- )
-@@ -482,6 +516,12 @@ static NV_STATUS allocate_os_event(
-         goto done;
-     }
- 
-+    new_event->hParent  = hParent;
-+    new_event->nvfp     = nvfp;
-+    new_event->fd       = fd;
-+    new_event->active   = NV_TRUE;
-+    new_event->refcount = 0;
-+
-     portSyncSpinlockAcquire(nv->event_spinlock);
-     for (event = nv->event_list; event; event = event->next)
-     {
-@@ -496,45 +536,26 @@ static NV_STATUS allocate_os_event(
- 
-     new_event->next = nv->event_list;
-     nv->event_list = new_event;
-+    nvfp->bCleanupRmapi = NV_TRUE;
-     portSyncSpinlockRelease(nv->event_spinlock);
- 
- done:
-     if (status == NV_OK)
-     {
--        new_event->hParent  = hParent;
--        new_event->nvfp     = nvfp;
--        new_event->fd       = fd;
--        new_event->active   = NV_TRUE;
--        new_event->refcount = 0;
--
--        nvfp->bCleanupRmapi = NV_TRUE;
--
-         NV_PRINTF(LEVEL_INFO, "allocated OS event:\n");
-         NV_PRINTF(LEVEL_INFO, "   hParent: 0x%x\n", hParent);
-         NV_PRINTF(LEVEL_INFO, "   fd: %d\n", fd);
-     }
-     else
-     {
-+        NV_PRINTF(LEVEL_ERROR, "failed to allocate OS event: 0x%08x\n", status);
-+        status = NV_ERR_INSUFFICIENT_RESOURCES;
-         portMemFree(new_event);
-     }
- 
-     return status;
- }
- 
--NV_STATUS RmAllocOsEvent(
--    NvHandle            hParent,
--    nv_file_private_t  *nvfp,
--    NvU32               fd
--)
--{
--    if (NV_OK != allocate_os_event(hParent, nvfp, fd))
--    {
--        NV_PRINTF(LEVEL_ERROR, "failed to allocate OS event\n");
--        return NV_ERR_INSUFFICIENT_RESOURCES;
--    }
--    return NV_OK;
--}
--
- static NV_STATUS free_os_event(
-     NvHandle    hParent,
-     NvU32       fd
-@@ -585,18 +606,6 @@ static NV_STATUS free_os_event(
-     return result;
- }
- 
--NV_STATUS RmFreeOsEvent(
--    NvHandle    hParent,
--    NvU32       fd
--)
--{
--    if (NV_OK != free_os_event(hParent, fd))
--    {
--        return NV_ERR_INVALID_EVENT;
--    }
--    return NV_OK;
--}
--
- static void RmExecuteWorkItem(
-     void *pWorkItem
- )
-@@ -656,40 +665,6 @@ done:
-     portMemFree((void *)pWi);
- }
- 
--static NV_STATUS RmGetEventData(
--    nv_file_private_t *nvfp,
--    NvP64 pEvent,
--    NvU32 *MoreEvents,
--    NvBool bUserModeArgs
--)
--{
--    NV_STATUS         RmStatus;
--    NvUnixEvent      *pKernelEvent = NULL;
--    nv_event_t        nv_event;
--    RMAPI_PARAM_COPY  paramCopy;
--
--    RmStatus = nv_get_event(nvfp, &nv_event, MoreEvents);
--    if (RmStatus != NV_OK)
--        return NV_ERR_OPERATING_SYSTEM;
--
--    // setup for access to client's parameters
--    RMAPI_PARAM_COPY_INIT(paramCopy, pKernelEvent, pEvent, 1, sizeof(NvUnixEvent));
--    RmStatus = rmapiParamsAcquire(&paramCopy, bUserModeArgs);
--    if (RmStatus != NV_OK)
--        return NV_ERR_OPERATING_SYSTEM;
--
--    pKernelEvent->hObject     = nv_event.hObject;
--    pKernelEvent->NotifyIndex = nv_event.index;
--    pKernelEvent->info32      = nv_event.info32;
--    pKernelEvent->info16      = nv_event.info16;
--
--    // release client buffer access, with copyout as needed
--    if (rmapiParamsRelease(&paramCopy) != NV_OK)
--        return NV_ERR_OPERATING_SYSTEM;
--
--    return NV_OK;
--}
--
- static NV_STATUS RmAccessRegistry(
-     NvHandle   hClient,
-     NvHandle   hObject,
-@@ -2738,16 +2713,68 @@ NV_STATUS NV_API_CALL rm_ioctl(
-     NvU32               dataSize
- )
- {
--    NV_STATUS rmStatus;
-+    NV_STATUS rmStatus = NV_OK;
-     THREAD_STATE_NODE threadState;
-     void *fp;
- 
-     NV_ENTER_RM_RUNTIME(sp,fp);
--    threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
- 
--    rmStatus = RmIoctl(pNv, nvfp, Command, pData, dataSize);
-+    //
-+    // Some ioctls are handled entirely inside the OS layer and don't need to
-+    // suffer the overhead of calling into RM core.
-+    //
-+    switch (Command)
-+    {
-+        case NV_ESC_ALLOC_OS_EVENT:
-+        {
-+            nv_ioctl_alloc_os_event_t *pApi = pData;
-+
-+            if (dataSize != sizeof(nv_ioctl_alloc_os_event_t))
-+            {
-+                rmStatus = NV_ERR_INVALID_ARGUMENT;
-+                break;
-+            }
-+
-+            pApi->Status = allocate_os_event(pApi->hClient, nvfp, pApi->fd);
-+            break;
-+        }
-+        case NV_ESC_FREE_OS_EVENT:
-+        {
-+            nv_ioctl_free_os_event_t *pApi = pData;
-+
-+            if (dataSize != sizeof(nv_ioctl_free_os_event_t))
-+            {
-+                rmStatus = NV_ERR_INVALID_ARGUMENT;
-+                break;
-+            }
-+
-+            pApi->Status = free_os_event(pApi->hClient, pApi->fd);
-+            break;
-+        }
-+        case NV_ESC_RM_GET_EVENT_DATA:
-+        {
-+            NVOS41_PARAMETERS *pApi = pData;
-+
-+            if (dataSize != sizeof(NVOS41_PARAMETERS))
-+            {
-+                rmStatus = NV_ERR_INVALID_ARGUMENT;
-+                break;
-+            }
-+
-+            pApi->status = get_os_event_data(nvfp,
-+                                             pApi->pEvent,
-+                                             &pApi->MoreEvents);
-+            break;
-+        }
-+        default:
-+        {
-+            threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
-+            rmStatus = RmIoctl(pNv, nvfp, Command, pData, dataSize);
-+            threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
-+            break;
-+        }
-+    }
- 
--    threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
-     NV_EXIT_RM_RUNTIME(sp,fp);
- 
-     return rmStatus;
-@@ -2882,65 +2909,6 @@ void NV_API_CALL rm_unbind_lock(
-     NV_EXIT_RM_RUNTIME(sp,fp);
- }
- 
--NV_STATUS rm_alloc_os_event(
--    NvHandle            hClient,
--    nv_file_private_t  *nvfp,
--    NvU32               fd
--)
--{
--    NV_STATUS RmStatus;
--
--    // LOCK: acquire API lock
--    if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK)
--    {
--        RmStatus = RmAllocOsEvent(hClient, nvfp, fd);
--
--        // UNLOCK: release API lock
--        rmapiLockRelease();
--    }
--
--    return RmStatus;
--}
--
--NV_STATUS rm_free_os_event(
--    NvHandle    hClient,
--    NvU32       fd
--)
--{
--    NV_STATUS RmStatus;
--
--    // LOCK: acquire API lock
--    if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK)
--    {
--        RmStatus = RmFreeOsEvent(hClient, fd);
--
--        // UNLOCK: release API lock
--        rmapiLockRelease();
--    }
--
--    return RmStatus;
--}
--
--NV_STATUS rm_get_event_data(
--    nv_file_private_t  *nvfp,
--    NvP64               pEvent,
--    NvU32              *MoreEvents
--)
--{
--    NV_STATUS RmStatus;
--
--    // LOCK: acquire API lock
--    if ((RmStatus = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_EVENT)) == NV_OK)
--    {
--        RmStatus = RmGetEventData(nvfp, pEvent, MoreEvents, NV_TRUE);
--
--        // UNLOCK: release API lock
--        rmapiLockRelease();
--    }
--
--    return RmStatus;
--}
--
- NV_STATUS NV_API_CALL rm_read_registry_dword(
-     nvidia_stack_t *sp,
-     nv_state_t *nv,
-diff --git a/src/nvidia/exports_link_command.txt b/src/nvidia/exports_link_command.txt
-index de3cf86d..b92185de 100644
---- a/src/nvidia/exports_link_command.txt
-+++ b/src/nvidia/exports_link_command.txt
-@@ -1,6 +1,5 @@
- --undefined=rm_disable_adapter
- --undefined=rm_execute_work_item
----undefined=rm_free_os_event
- --undefined=rm_free_private_state
- --undefined=rm_cleanup_file_private
- --undefined=rm_unbind_lock
-diff --git a/src/nvidia/src/kernel/disp/disp_sw.c b/src/nvidia/src/kernel/disp/disp_sw.c
-index 03ce58f7..bb7396b6 100644
---- a/src/nvidia/src/kernel/disp/disp_sw.c
-+++ b/src/nvidia/src/kernel/disp/disp_sw.c
-@@ -141,8 +141,15 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill
-     NvBool     bFound = NV_FALSE;
-     NV_STATUS  status;
- 
-+#define PRINT_INTERVAL 3600 // At 60Hz, this will emit about once per minute.
-+
-     if (flags & F_SEMAPHORE_ADDR_VALID)
-     {
-+        static NvU64 counter;
-+        if ((++counter % PRINT_INTERVAL) == 0) {
-+            NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_SEMAPHORE_ADDR_VALID = %llu\n", counter);
-+        }
-+
-         bFound = CliGetDmaMappingInfo(RES_GET_CLIENT(pDevice),
-                                       RES_GET_HANDLE(pDevice),
-                                       vaSpace,
-@@ -154,6 +161,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill
-     }
-     else if (flags & F_SEMAPHORE_RELEASE)
-     {
-+        static NvU64 counter;
-+        if ((++counter % PRINT_INTERVAL) == 0) {
-+            NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_SEMAPHORE_RELEASE = %llu\n", counter);
-+        }
-+
-         status =  semaphoreFillGPUVA(pGpu,
-                                      pDevice,
-                                      vaSpace,
-@@ -165,6 +177,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill
-     }
-     else if (flags & F_NOTIFIER_FILL)
-     {
-+        static NvU64 counter;
-+        if ((++counter % PRINT_INTERVAL) == 0) {
-+            NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - F_NOTIFIER_FILL = %llu\n", counter);
-+        }
-+
-         status = notifyFillNotifierGPUVA(pGpu,
-                                          pDevice,
-                                          vaSpace,
-@@ -175,5 +192,11 @@ NV_STATUS dispswReleaseSemaphoreAndNotifierFill
-                                          NV9072_NOTIFIERS_NOTIFY_ON_VBLANK /* Index */);
-         return status;
-     }
-+    else {
-+        static NvU64 counter;
-+        if ((++counter % PRINT_INTERVAL) == 0) {
-+            NV_PRINTF(LEVEL_ERROR, "XXXMT: NVRM debugging - ??? 0x%08x = %llu\n", flags, counter);
-+        }
-+    }
-     return NV9072_NOTIFICATION_STATUS_DONE_SUCCESS;
- }
--- 
-2.45.2
-
-       --- a/nvidia-drm/nvidia-drm-linux.c
-+++ b/nvidia-drm/nvidia-drm-linux.c
-@@ -31,13 +31,13 @@
- 
- MODULE_PARM_DESC(
-     modeset,
--    "Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
-+    "Enable atomic kernel modesetting (1 = enable (default), 0 = disable)");
- module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
- 
- #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
- MODULE_PARM_DESC(
-     fbdev,
--    "Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
-+    "Create a framebuffer device (1 = enable (default), 0 = disable) (EXPERIMENTAL)");
- module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
- #endif
- 
---- a/nvidia-drm/nvidia-drm-os-interface.c
-+++ b/nvidia-drm/nvidia-drm-os-interface.c
-@@ -41,8 +41,8 @@
- #include <drm/drmP.h>
- #endif
- 
--bool nv_drm_modeset_module_param = false;
--bool nv_drm_fbdev_module_param = false;
-+bool nv_drm_modeset_module_param = true;
-+bool nv_drm_fbdev_module_param = true;
- 
- void *nv_drm_calloc(size_t nmemb, size_t size)
- {
-
---- a/src/nvidia-modeset/Makefile
-+++ b/src/nvidia-modeset/Makefile
-@@ -142,6 +142,7 @@ ifeq ($(TARGET_ARCH),x86_64)
-   CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -fno-jump-tables)
-   CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch=thunk-extern)
-   CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch-register)
-+  CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mharden-sls=all)
- endif
- 
- CFLAGS += $(CONDITIONAL_CFLAGS)
\ No newline at end of file
diff --git a/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch b/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
deleted file mode 100644
index ecd0304..0000000
--- a/patches/nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
+++ /dev/null
@@ -1,258 +0,0 @@
-From 498e88ae626be4f523063c8a7027b4b02eca31d2 Mon Sep 17 00:00:00 2001
-From: GloriousEggroll <gloriouseggroll@gmail.com>
-Date: Tue, 17 Jan 2023 12:08:46 -0700
-Subject: [PATCH] Allow to set custom USB pollrate for specific devices like
- so: usbcore.interrupt_interval_override=045e:00db:16,1bcf:0005:1
-
----
- .../admin-guide/kernel-parameters.txt         |   8 +
- drivers/usb/core/config.c                     | 170 +++++++++++++++++-
- drivers/usb/core/usb.c                        |   1 +
- drivers/usb/core/usb.h                        |   1 +
- 4 files changed, 179 insertions(+), 1 deletion(-)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index dbd26fde4..c9b8b80af 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -6552,6 +6552,14 @@
- 					delay after resetting its port);
- 			Example: quirks=0781:5580:bk,0a5c:5834:gij
- 
-+	usbcore.interrupt_interval_override=
-+			[USB] A list of USB devices for which a different polling
-+			interval than the default shall be used on all interrupt-type
-+			endpoints. The format is VendorID:ProductID:interval, with
-+			the vendor and product ids specified hexadecimally, and the
-+			interval decimally in milliseconds.
-+			Example: interrupt_interval_override=045e:00db:16,1bcf:0005:2
-+
- 	usbhid.mousepoll=
- 			[USBHID] The interval which mice are to be polled at.
- 
-diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
-index 48bc8a481..84bd550ad 100644
---- a/drivers/usb/core/config.c
-+++ b/drivers/usb/core/config.c
-@@ -19,6 +19,149 @@
- #define USB_MAXCONFIG			8	/* Arbitrary limit */
- 
- 
-+/* A struct associated with the interrupt_interval_override module parameter, representing
-+   an user's choice to force a specific interrupt interval upon all interrupt endpoints of
-+   a certain device. */
-+struct interrupt_interval_override {
-+	/* The vendor ID of the device of which the interrupt interval shall be overridden */
-+	u16 vendor;
-+	/* The product ID of the device of which the interrupt interval shall be overridden */
-+	u16 product;
-+	/* The new interval measured in milliseconds that shall be given to all endpoints of type interrupt on said device */
-+	unsigned int interval;
-+};
-+
-+static DEFINE_MUTEX(interrupt_interval_override_mutex);
-+static char interrupt_interval_override_param[128];
-+static struct interrupt_interval_override *interrupt_interval_override_list = NULL;
-+static size_t interrupt_interval_override_count = 0;
-+
-+static int interrupt_interval_override_param_set(const char *value, const struct kernel_param *kp)
-+{
-+	const char *p;
-+	unsigned short vendor, product;
-+	unsigned int interval;
-+	struct interrupt_interval_override* list;
-+	struct interrupt_interval_override param;
-+	size_t count, max_count, i, len;
-+	int err, res;
-+
-+	mutex_lock(&interrupt_interval_override_mutex);
-+
-+	if (!value || !*value) {
-+		/* Unset the current variable. */
-+		kfree(interrupt_interval_override_list);
-+		interrupt_interval_override_list = NULL;
-+		interrupt_interval_override_count = 0;
-+		param_set_copystring(value, kp);  /* Does not fail: the empty string is short enough to fit. */
-+		mutex_unlock(&interrupt_interval_override_mutex);
-+		return 0;
-+	}
-+
-+	/* Compute an upper bound on the amount of entries we need. */
-+	for (max_count = 1, i = 0; value[i]; i++) {
-+		if (value[i] == ',')
-+			max_count++;
-+	}
-+
-+	/* Ensure we can allocate enough memory before overwriting the global variables. */
-+	list = kcalloc(max_count,
-+		sizeof(struct interrupt_interval_override),
-+		GFP_KERNEL);
-+
-+	if (!list) {
-+		mutex_unlock(&interrupt_interval_override_mutex);
-+		return -ENOMEM;
-+	}
-+
-+	err = param_set_copystring(value, kp);
-+	if (err) {
-+		kfree(list);
-+		mutex_unlock(&interrupt_interval_override_mutex);
-+		return err;
-+	}
-+
-+	/* Parse the parameter. Example of a valid parameter: 045e:00db:16,1bcf:0005:2 */
-+	for (count = 0, p = (const char*)value; p && *p;) {
-+		res = sscanf(p, "%hx:%hx:%d%zn", &vendor, &product, &interval, &len);
-+
-+		/* Check whether all variables (vendor, product, interval, len) were assigned.
-+		   %zn does not increase the assignment count, so we need to check for value 3 instead of 4.
-+		   %zn does not consume input either, so setting len shouldn't fail if interval has been properly set. */
-+		if (res != 3) {
-+			pr_warn("Error while parsing USB interrupt interval override parameter %s.\n", value);
-+			break;
-+		}
-+
-+		param.vendor = (u16)vendor;
-+		param.product = (u16)product;
-+		param.interval = interval;
-+		list[count++] = param;
-+
-+		p += len;
-+		if (*p == ',' && *(p+1) != '\0') {
-+			p++;
-+			continue;
-+		} else if(*p == '\0' || (*p == '\n' && *(p+1) == '\0')) {
-+			break;
-+		} else {
-+			pr_warn("Error while parsing USB interrupt interval override parameter %s.\n", value);
-+			break;
-+		}
-+	}
-+
-+	/* Overwrite the global variables with the local ones. */
-+	kfree(interrupt_interval_override_list);
-+	interrupt_interval_override_list = list;
-+	interrupt_interval_override_count = count;
-+	mutex_unlock(&interrupt_interval_override_mutex);
-+	return 0;
-+}
-+
-+static const struct kernel_param_ops interrupt_interval_override_param_ops = {
-+	.set = interrupt_interval_override_param_set,
-+	.get = param_get_string,
-+};
-+
-+static struct kparam_string interrupt_interval_override_param_string = {
-+	.maxlen = sizeof(interrupt_interval_override_param),
-+	.string = interrupt_interval_override_param,
-+};
-+
-+device_param_cb(interrupt_interval_override,
-+	&interrupt_interval_override_param_ops,
-+	&interrupt_interval_override_param_string,
-+	0644);
-+MODULE_PARM_DESC(interrupt_interval_override,
-+	"Override the polling interval of all interrupt-type endpoints of a specific USB"
-+	" device by specifying interrupt_interval_override=vendorID:productID:interval.");
-+
-+/* Given an USB device, this checks whether the user has specified they want to override the interrupt
-+   polling interval on all interrupt-type endpoints of said device.
-+
-+   This function returns the user-desired amount of milliseconds between interrupts on said endpoint.
-+   If this function returns zero, the device-requested interrupt interval should be used. */
-+static unsigned int usb_check_interrupt_interval_override(struct usb_device* udev)
-+{
-+	size_t i;
-+	unsigned int res;
-+	u16 vendor = le16_to_cpu(udev->descriptor.idVendor);
-+	u16 product = le16_to_cpu(udev->descriptor.idProduct);
-+
-+	mutex_lock(&interrupt_interval_override_mutex);
-+	for (i = 0; i < interrupt_interval_override_count; i++) {
-+		if (interrupt_interval_override_list[i].vendor == vendor
-+				&& interrupt_interval_override_list[i].product == product) {
-+
-+			res = interrupt_interval_override_list[i].interval;
-+			mutex_unlock(&interrupt_interval_override_mutex);
-+			return res;
-+		}
-+	}
-+	mutex_unlock(&interrupt_interval_override_mutex);
-+	return 0;
-+}
-+
- static inline const char *plural(int n)
- {
- 	return (n == 1 ? "" : "s");
-@@ -261,7 +404,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
- 	struct usb_endpoint_descriptor *d;
- 	struct usb_host_endpoint *endpoint;
- 	int n, i, j, retval;
--	unsigned int maxp;
-+	unsigned int maxp, ival;
- 	const unsigned short *maxpacket_maxes;
- 
- 	d = (struct usb_endpoint_descriptor *) buffer;
-@@ -386,6 +529,23 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
- 		endpoint->desc.bInterval = n;
- 	}
- 
-+	/* Override the interrupt polling interval if a module parameter tells us to do so. */
-+	if (usb_endpoint_xfer_int(d)) {
-+		ival = usb_check_interrupt_interval_override(udev);
-+		if (ival > 0) {
-+			switch (udev->speed) {
-+			case USB_SPEED_SUPER_PLUS:
-+			case USB_SPEED_SUPER:
-+			case USB_SPEED_HIGH:
-+				endpoint->desc.bInterval = fls(ival) + 3;
-+				break;
-+			default:  /* USB_SPEED_FULL or _LOW */
-+				endpoint->desc.bInterval = ival;
-+				break;
-+			}
-+		}
-+	}
-+
- 	/* Some buggy low-speed devices have Bulk endpoints, which is
- 	 * explicitly forbidden by the USB spec.  In an attempt to make
- 	 * them usable, we will try treating them as Interrupt endpoints.
-@@ -1092,3 +1252,11 @@ int usb_get_bos_descriptor(struct usb_device *dev)
- 	usb_release_bos_descriptor(dev);
- 	return ret;
- }
-+
-+void usb_release_interrupt_interval_override_list(void)
-+{
-+	mutex_lock(&interrupt_interval_override_mutex);
-+	kfree(interrupt_interval_override_list);
-+	interrupt_interval_override_list = NULL;
-+	mutex_unlock(&interrupt_interval_override_mutex);
-+}
-diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
-index 11b15d7b3..ec52c6322 100644
---- a/drivers/usb/core/usb.c
-+++ b/drivers/usb/core/usb.c
-@@ -1066,6 +1066,7 @@ static void __exit usb_exit(void)
- 		return;
- 
- 	usb_release_quirk_list();
-+	usb_release_interrupt_interval_override_list();
- 	usb_deregister_device_driver(&usb_generic_driver);
- 	usb_major_cleanup();
- 	usb_deregister(&usbfs_driver);
-diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
-index 82538daac..b6faa897c 100644
---- a/drivers/usb/core/usb.h
-+++ b/drivers/usb/core/usb.h
-@@ -37,6 +37,7 @@ extern void usb_authorize_interface(struct usb_interface *);
- extern void usb_detect_quirks(struct usb_device *udev);
- extern void usb_detect_interface_quirks(struct usb_device *udev);
- extern void usb_release_quirk_list(void);
-+extern void usb_release_interrupt_interval_override_list(void);
- extern bool usb_endpoint_is_ignored(struct usb_device *udev,
- 		struct usb_host_interface *intf,
- 		struct usb_endpoint_descriptor *epd);
--- 
-2.39.0
-
diff --git a/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch b/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch
deleted file mode 100644
index dd8f961..0000000
--- a/patches/nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 4b4ce124699c160925e5fdeb147a78f79d38351f Mon Sep 17 00:00:00 2001
-From: Simon May <simon.may@protonmail.ch>
-Date: Sun, 19 Sep 2021 23:45:59 +0200
-Subject: [PATCH] Revert "PCI: Add a REBAR size quirk for Sapphire RX 5600 XT
- Pulse"
-
-This reverts commit 907830b0fc9e374d00f3c83de5e426157b482c01.
----
- drivers/pci/pci.c | 9 +--------
- 1 file changed, 1 insertion(+), 8 deletions(-)
-
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-index a607f277c..3174fa871 100644
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -3755,14 +3755,8 @@ u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar)
- 		return 0;
-
- 	pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap);
--	cap = FIELD_GET(PCI_REBAR_CAP_SIZES, cap);
-
--	/* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */
--	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f &&
--	    bar == 0 && cap == 0x700)
--		return 0x3f00;
--
--	return cap;
-+	return (cap & PCI_REBAR_CAP_SIZES) >> 4;
- }
- EXPORT_SYMBOL(pci_rebar_get_possible_sizes);
-
--- 
-2.30.2
-
diff --git a/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch b/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch
deleted file mode 100644
index 6eac856..0000000
--- a/patches/nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 7d86ca8db51f6b75b5c1470d6294c6f24221f560 Mon Sep 17 00:00:00 2001
-From: GloriousEggroll <gloriouseggroll@gmail.com>
-Date: Mon, 30 Oct 2023 22:36:19 -0600
-Subject: [PATCH] Revert "nvme-pci: drop redundant
- pci_enable_pcie_error_reporting()"
-
-This reverts commits:
-1ad11eafc63ac16e667853bee4273879226d2d1b
-7ec4b34be4234599cf1241ef807cdb7c3636f6fe
-69b264df8a412820e98867dbab871c6526c5e5aa
-
----
- drivers/nvme/host/pci.c | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
-index 3f0c9ee09a12..bc11bfe6f87a 100644
---- a/drivers/nvme/host/pci.c
-+++ b/drivers/nvme/host/pci.c
-@@ -5,6 +5,7 @@
-  */
- 
- #include <linux/acpi.h>
-+#include <linux/aer.h>
- #include <linux/async.h>
- #include <linux/blkdev.h>
- #include <linux/blk-mq.h>
-@@ -2537,6 +2538,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
- 
- 	nvme_map_cmb(dev);
- 
-+	pci_enable_pcie_error_reporting(pdev);
- 	pci_save_state(pdev);
- 
- 	result = nvme_pci_configure_admin_queue(dev);
-@@ -2601,8 +2603,10 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
- 	nvme_suspend_io_queues(dev);
- 	nvme_suspend_queue(dev, 0);
- 	pci_free_irq_vectors(pdev);
--	if (pci_is_enabled(pdev))
-+	if (pci_is_enabled(pdev)) {
-+		pci_disable_pcie_error_reporting(pdev);
- 		pci_disable_device(pdev);
-+	}
- 	nvme_reap_pending_cqes(dev);
- 
- 	nvme_cancel_tagset(&dev->ctrl);
--- 
-2.41.0
-diff --git a/include/linux/aer.h b/include/linux/aer.h
-index 29cc10220..94ce49a5f 100644
---- a/include/linux/aer.h
-+++ b/include/linux/aer.h
-@@ -41,9 +41,20 @@ struct aer_capability_regs {
- };
-
- #if defined(CONFIG_PCIEAER)
-+/* PCIe port driver needs this function to enable AER */
-+int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-+int pci_disable_pcie_error_reporting(struct pci_dev *dev);
- int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
- int pcie_aer_is_native(struct pci_dev *dev);
- #else
-+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-+{
-+	return -EINVAL;
-+}
-+static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-+{
-+	return -EINVAL;
-+}
- static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
- {
- 	return -EINVAL;
-
-diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
-index 9c8fd69ae..0dc7be481 100644
---- a/drivers/pci/pcie/aer.c
-+++ b/drivers/pci/pcie/aer.c
-@@ -231,7 +231,7 @@ int pcie_aer_is_native(struct pci_dev *dev)
- }
- EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL);
-
--static int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-+int pci_enable_pcie_error_reporting(struct pci_dev *dev)
- {
- 	int rc;
-
-@@ -241,6 +241,19 @@ static int pci_enable_pcie_error_reporting(struct pci_dev *dev)
- 	rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
- 	return pcibios_err_to_errno(rc);
- }
-+EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
-+
-+int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-+{
-+	int rc;
-+
-+	if (!pcie_aer_is_native(dev))
-+		return -EIO;
-+
-+	rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
-+	return pcibios_err_to_errno(rc);
-+}
-+EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
-
- int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
- {
diff --git a/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch b/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch
deleted file mode 100644
index 26e3ab7..0000000
--- a/patches/nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 9179080ffaaf1d438db6e0a5a37bdf8dafe233a6 Mon Sep 17 00:00:00 2001
-From: Thomas Crider <gloriouseggroll@gmail.com>
-Date: Mon, 27 Nov 2023 16:13:13 -0500
-Subject: [PATCH] Set amdgpu.ppfeaturemask=0xffffffff as default
-
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-index e06009966..4e791eb8f 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -158,7 +158,7 @@ bool enforce_isolation;
-  * OverDrive(bit 14) disabled by default
-  * GFX DCS(bit 19) disabled by default
-  */
--uint amdgpu_pp_feature_mask = 0xfff7bfff;
-+uint amdgpu_pp_feature_mask = 0xffffffff;
- uint amdgpu_force_long_training;
- int amdgpu_lbpw = -1;
- int amdgpu_compute_multipipe = -1;
--- 
-2.43.0
-
diff --git a/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch b/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch
deleted file mode 100644
index eb3cf54..0000000
--- a/patches/nobara/0001-acpi-proc-idle-skip-dummy-wait.patch
+++ /dev/null
@@ -1,125 +0,0 @@
-Processors based on the Zen microarchitecture support IOPORT based deeper
-C-states. The idle driver reads the acpi_gbl_FADT.xpm_timer_block.address
-in the IOPORT based C-state exit path which is claimed to be a
-"Dummy wait op" and has been around since ACPI introduction to Linux
-dating back to Andy Grover's Mar 14, 2002 posting [1].
-The comment above the dummy operation was elaborated by Andreas Mohr back
-in 2006 in commit b488f02156d3d ("ACPI: restore comment justifying 'extra'
-P_LVLx access") [2] where the commit log claims:
-"this dummy read was about: STPCLK# doesn't get asserted in time on
-(some) chipsets, which is why we need to have a dummy I/O read to delay
-further instruction processing until the CPU is fully stopped."
-
-However, sampling certain workloads with IBS on AMD Zen3 system shows
-that a significant amount of time is spent in the dummy op, which
-incorrectly gets accounted as C-State residency. A large C-State
-residency value can prime the cpuidle governor to recommend a deeper
-C-State during the subsequent idle instances, starting a vicious cycle,
-leading to performance degradation on workloads that rapidly switch
-between busy and idle phases.
-
-One such workload is tbench where a massive performance degradation can
-be observed during certain runs. Following are some statistics gathered
-by running tbench with 128 clients, on a dual socket (2 x 64C/128T) Zen3
-system with the baseline kernel, baseline kernel keeping C2 disabled,
-and baseline kernel with this patch applied keeping C2 enabled:
-
-baseline kernel was tip:sched/core at
-commit f3dd3f674555 ("sched: Remove the limitation of WF_ON_CPU on
-wakelist if wakee cpu is idle")
-
-Kernel        : baseline      baseline + C2 disabled   baseline + patch
-
-Min (MB/s)    : 2215.06       33072.10 (+1393.05%)     33016.10 (+1390.52%)
-Max (MB/s)    : 32938.80      34399.10                 34774.50
-Median (MB/s) : 32191.80      33476.60                 33805.70
-AMean (MB/s)  : 22448.55      33649.27 (+49.89%)       33865.43 (+50.85%)
-AMean Stddev  : 17526.70      680.14                   880.72
-AMean CoefVar : 78.07%        2.02%                    2.60%
-
-The data shows there are edge cases that can cause massive regressions
-in case of tbench. Profiling the bad runs with IBS shows a significant
-amount of time being spent in acpi_idle_do_entry method:
-
-Overhead  Command          Shared Object             Symbol
-  74.76%  swapper          [kernel.kallsyms]         [k] acpi_idle_do_entry
-   0.71%  tbench           [kernel.kallsyms]         [k] update_sd_lb_stats.constprop.0
-   0.69%  tbench_srv       [kernel.kallsyms]         [k] update_sd_lb_stats.constprop.0
-   0.49%  swapper          [kernel.kallsyms]         [k] psi_group_change
-   ...
-
-Annotation of acpi_idle_do_entry method reveals almost all the time in
-acpi_idle_do_entry is spent on the port I/O in wait_for_freeze():
-
-  0.14 │      in     (%dx),%al       # <------ First "in" corresponding to inb(cx->address)
-  0.51 │      mov    0x144d64d(%rip),%rax
-  0.00 │      test   $0x80000000,%eax
-       │    ↓ jne    62 	     # <------ Skip if running in guest
-  0.00 │      mov    0x19800c3(%rip),%rdx
- 99.33 │      in     (%dx),%eax      # <------ Second "in" corresponding to inl(acpi_gbl_FADT.xpm_timer_block.address)
-  0.00 │62:   mov    -0x8(%rbp),%r12
-  0.00 │      leave
-  0.00 │    ← ret
-
-This overhead is reflected in the C2 residency on the test system where
-C2 is an IOPORT based C-State. The total C-state residency reported by
-"cpupower idle-info" on CPU0 for good and bad case over the 80s tbench
-run is as follows (all numbers are in microseconds):
-
-			    Good Run 		Bad Run
-			   (Baseline)
-
-POLL: 			       43338		   6231  (-85.62%)
-C1 (MWAIT Based): 	    23576156 		 363861  (-98.45%)
-C2 (IOPORT Based): 	    10781218 	       77027280  (+614.45%)
-
-The larger residency value in bad case leads to the system recommending
-C2 state again for subsequent idle instances. The pattern lasts till the
-end of the tbench run. Following is the breakdown of "entry_method"
-passed to acpi_idle_do_entry during good run and bad run:
-
-                                        			Good Run    Bad Run
-							       (Baseline)
-
-Number of times acpi_idle_do_entry was called:             	6149573     6149050  (-0.01%)
- |-> Number of times entry_method was "ACPI_CSTATE_FFH":        6141494       88144  (-98.56%)
- |-> Number of times entry_method was "ACPI_CSTATE_HALT":             0           0  (+0.00%)
- |-> Number of times entry_method was "ACPI_CSTATE_SYSTEMIO":      8079     6060906  (+74920.49%)
-
-For processors based on the Zen microarchitecture, this dummy wait op is
-unnecessary and can be skipped when choosing IOPORT based C-States to
-avoid polluting the C-state residency information.
-
-Link: https://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux-fullhistory.git/commit/?id=972c16130d9dc182cedcdd408408d9eacc7d6a2d [1]
-Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b488f02156d3deb08f5ad7816d565c370a8cc6f1 [2]
-
-Suggested-by: Calvin Ong <calvin.ong@amd.com>
-Cc: stable@vger.kernel.org
-Cc: regressions@lists.linux.dev
-Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
----
- drivers/acpi/processor_idle.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
-index 16a1663d02d4..18850aa2b79b 100644
---- a/drivers/acpi/processor_idle.c
-+++ b/drivers/acpi/processor_idle.c
-@@ -529,9 +529,11 @@ static __cpuidle void io_idle(unsigned long addr)
- 	inb(addr);
- 
- #ifdef	CONFIG_X86
--	/* No delay is needed if we are in guest */
--	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
--		return;
-+	/*
-+	 * No delay is needed if we are in guest or on a processor
-+	 * based on the Zen microarchitecture.
-+	 */
-+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || boot_cpu_has(X86_FEATURE_ZEN))
- 	/*
- 	 * Modern (>=Nehalem) Intel systems use ACPI via intel_idle,
- 	 * not this code.  Assume that any Intel systems using this
-
--- 
-2.25.1
diff --git a/patches/nobara/0001-add-acpi_call.patch b/patches/nobara/0001-add-acpi_call.patch
deleted file mode 100644
index b0a185a..0000000
--- a/patches/nobara/0001-add-acpi_call.patch
+++ /dev/null
@@ -1,506 +0,0 @@
-From 3f14226e2e90dba5d72c106da29e1876eb7b88ff Mon Sep 17 00:00:00 2001
-From: Denis <benato.denis96@gmail.com>
-Date: Thu, 28 Sep 2023 03:40:53 +0200
-Subject: [PATCH] add acpi_call
-
----
- drivers/platform/x86/Kconfig     |   5 +
- drivers/platform/x86/Makefile    |   4 +
- drivers/platform/x86/acpi_call.c | 449 +++++++++++++++++++++++++++++++
- 3 files changed, 458 insertions(+)
- create mode 100644 drivers/platform/x86/acpi_call.c
-
-diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
-index 49c2c4cd8d00..fde791e51261 100644
---- a/drivers/platform/x86/Kconfig
-+++ b/drivers/platform/x86/Kconfig
-@@ -170,6 +170,11 @@ config ACER_WIRELESS
-           If you choose to compile this driver as a module the module will be
-           called acer-wireless.
- 
-+config ACPI_CALL
-+	tristate "acpi_call module"
-+	help
-+	  This embeds acpi_call module into the kernel
-+
- config ACER_WMI
- 	tristate "Acer WMI Laptop Extras"
- 	depends on BACKLIGHT_CLASS_DEVICE
-diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
-index 52dfdf574ac2..1e434fcb8273 100644
---- a/drivers/platform/x86/Makefile
-+++ b/drivers/platform/x86/Makefile
-@@ -4,10 +4,14 @@
- # x86 Platform-Specific Drivers
- #
- 
-+# ACPI calls
-+
- # Windows Management Interface
- obj-$(CONFIG_ACPI_WMI)		+= wmi.o
- obj-$(CONFIG_WMI_BMOF)		+= wmi-bmof.o
- 
-+obj-$(CONFIG_ACPI_CALL)		+= acpi_call.o
-+
- # WMI drivers
- obj-$(CONFIG_HUAWEI_WMI)		+= huawei-wmi.o
- obj-$(CONFIG_MXM_WMI)			+= mxm-wmi.o
-diff --git a/drivers/platform/x86/acpi_call.c b/drivers/platform/x86/acpi_call.c
-new file mode 100644
-index 000000000000..d7bc238e16da
---- /dev/null
-+++ b/drivers/platform/x86/acpi_call.c
-@@ -0,0 +1,449 @@
-+/* Copyright (c) 2010: Michal Kottman */
-+
-+#define BUILDING_ACPICA
-+
-+#include <linux/module.h>
-+#include <linux/kernel.h>
-+#include <linux/version.h>
-+#include <linux/proc_fs.h>
-+#include <linux/slab.h>
-+#include <linux/uaccess.h>
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
-+#include <asm/uaccess.h>
-+#endif
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
-+#include <linux/acpi.h>
-+#else
-+#include <acpi/acpi.h>
-+#endif
-+
-+MODULE_LICENSE("GPL");
-+
-+/* Uncomment the following line to enable debug messages */
-+/*
-+#define DEBUG
-+*/
-+
-+#define BUFFER_SIZE 4096
-+#define INPUT_BUFFER_SIZE (2 * BUFFER_SIZE)
-+#define MAX_ACPI_ARGS 16
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
-+#define HAVE_PROC_CREATE
-+#endif
-+
-+extern struct proc_dir_entry *acpi_root_dir;
-+
-+static char input_buffer[INPUT_BUFFER_SIZE];
-+static char result_buffer[BUFFER_SIZE];
-+static char not_called_message[11] = "not called";
-+
-+static u8 temporary_buffer[BUFFER_SIZE];
-+
-+static size_t get_avail_bytes(void) {
-+    return BUFFER_SIZE - strlen(result_buffer);
-+}
-+static char *get_buffer_end(void) {
-+    return result_buffer + strlen(result_buffer);
-+}
-+
-+/** Appends the contents of an acpi_object to the result buffer
-+@param result   An acpi object holding result data
-+@returns        0 if the result could fully be saved, a higher value otherwise
-+*/
-+static int acpi_result_to_string(union acpi_object *result) {
-+    if (result->type == ACPI_TYPE_INTEGER) {
-+        snprintf(get_buffer_end(), get_avail_bytes(),
-+            "0x%x", (int)result->integer.value);
-+    } else if (result->type == ACPI_TYPE_STRING) {
-+        snprintf(get_buffer_end(), get_avail_bytes(),
-+            "\"%*s\"", result->string.length, result->string.pointer);
-+    } else if (result->type == ACPI_TYPE_BUFFER) {
-+        int i;
-+        // do not store more than data if it does not fit. The first element is
-+        // just 4 chars, but there is also two bytes from the curly brackets
-+        int show_values = min((size_t)result->buffer.length, get_avail_bytes() / 6);
-+
-+        snprintf(get_buffer_end(), get_avail_bytes(), "{");
-+        for (i = 0; i < show_values; i++)
-+            sprintf(get_buffer_end(),
-+                i == 0 ? "0x%02x" : ", 0x%02x", result->buffer.pointer[i]);
-+
-+        if (result->buffer.length > show_values) {
-+            // if data was truncated, show a trailing comma if there is space
-+            snprintf(get_buffer_end(), get_avail_bytes(), ",");
-+            return 1;
-+        } else {
-+            // in case show_values == 0, but the buffer is too small to hold
-+            // more values (i.e. the buffer cannot have anything more than "{")
-+            snprintf(get_buffer_end(), get_avail_bytes(), "}");
-+        }
-+    } else if (result->type == ACPI_TYPE_PACKAGE) {
-+        int i;
-+        snprintf(get_buffer_end(), get_avail_bytes(), "[");
-+        for (i=0; i<result->package.count; i++) {
-+            if (i > 0)
-+                snprintf(get_buffer_end(), get_avail_bytes(), ", ");
-+
-+            // abort if there is no more space available
-+            if (!get_avail_bytes() || acpi_result_to_string(&result->package.elements[i]))
-+                return 1;
-+        }
-+        snprintf(get_buffer_end(), get_avail_bytes(), "]");
-+    } else {
-+        snprintf(get_buffer_end(), get_avail_bytes(),
-+            "Object type 0x%x\n", result->type);
-+    }
-+
-+    // return 0 if there are still bytes available, 1 otherwise
-+    return !get_avail_bytes();
-+}
-+
-+/**
-+@param method   The full name of ACPI method to call
-+@param argc     The number of parameters
-+@param argv     A pre-allocated array of arguments of type acpi_object
-+*/
-+static void do_acpi_call(const char * method, int argc, union acpi_object *argv)
-+{
-+    acpi_status status;
-+    acpi_handle handle;
-+    struct acpi_object_list arg;
-+    struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-+
-+#ifdef DEBUG
-+    printk(KERN_INFO "acpi_call: Calling %s\n", method);
-+#endif
-+
-+    // get the handle of the method, must be a fully qualified path
-+    status = acpi_get_handle(NULL, (acpi_string) method, &handle);
-+
-+    if (ACPI_FAILURE(status))
-+    {
-+        snprintf(result_buffer, BUFFER_SIZE, "Error: %s", acpi_format_exception(status));
-+        printk(KERN_ERR "acpi_call: Cannot get handle: %s\n", result_buffer);
-+        return;
-+    }
-+
-+    // prepare parameters
-+    arg.count = argc;
-+    arg.pointer = argv;
-+
-+    // call the method
-+    status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
-+    if (ACPI_FAILURE(status))
-+    {
-+        snprintf(result_buffer, BUFFER_SIZE, "Error: %s", acpi_format_exception(status));
-+        printk(KERN_ERR "acpi_call: Method call failed: %s\n", result_buffer);
-+        return;
-+    }
-+
-+    // reset the result buffer
-+    *result_buffer = '\0';
-+    acpi_result_to_string(buffer.pointer);
-+    kfree(buffer.pointer);
-+
-+#ifdef DEBUG
-+    printk(KERN_INFO "acpi_call: Call successful: %s\n", result_buffer);
-+#endif
-+}
-+
-+/** Decodes 2 hex characters to an u8 int
-+*/
-+u8 decodeHex(char *hex) {
-+    char buf[3] = { hex[0], hex[1], 0};
-+    return (u8) simple_strtoul(buf, NULL, 16);
-+}
-+
-+/** Parses method name and arguments
-+@param input Input string to be parsed. Modified in the process.
-+@param nargs Set to number of arguments parsed (output)
-+@param args
-+*/
-+static char *parse_acpi_args(char *input, int *nargs, union acpi_object **args)
-+{
-+    char *s = input;
-+    int i;
-+
-+    *nargs = 0;
-+    *args = NULL;
-+
-+    // the method name is separated from the arguments by a space
-+    while (*s && *s != ' ')
-+        s++;
-+    // if no space is found, return 0 arguments
-+    if (*s == 0)
-+        return input;
-+
-+    *args = (union acpi_object *) kmalloc(MAX_ACPI_ARGS * sizeof(union acpi_object), GFP_KERNEL);
-+    if (!*args) {
-+        printk(KERN_ERR "acpi_call: unable to allocate buffer\n");
-+        return NULL;
-+    }
-+
-+    while (*s) {
-+        if (*s == ' ') {
-+            if (*nargs == 0)
-+                *s = 0; // change first space to nul
-+            ++ *nargs;
-+            ++ s;
-+        } else {
-+            union acpi_object *arg = (*args) + (*nargs - 1);
-+            if (*s == '"') {
-+                // decode string
-+                arg->type = ACPI_TYPE_STRING;
-+                arg->string.pointer = ++s;
-+                arg->string.length = 0;
-+                while (*s && *s++ != '"')
-+                    arg->string.length ++;
-+                // skip the last "
-+                if (*s == '"')
-+                    ++s;
-+            } else if (*s == 'b') {
-+                // decode buffer - bXXXX
-+                char *p = ++s;
-+                int len = 0, i;
-+                u8 *buf = NULL;
-+
-+                while (*p && *p!=' ')
-+                    p++;
-+
-+                len = p - s;
-+                if (len % 2 == 1) {
-+                    printk(KERN_ERR "acpi_call: buffer arg%d is not multiple of 8 bits\n", *nargs);
-+                    --*nargs;
-+                    goto err;
-+                }
-+                len /= 2;
-+
-+                buf = (u8*) kmalloc(len, GFP_KERNEL);
-+                if (!buf) {
-+                    printk(KERN_ERR "acpi_call: unable to allocate buffer\n");
-+                    --*nargs;
-+                    goto err;
-+                }
-+                for (i=0; i<len; i++) {
-+                    buf[i] = decodeHex(s + i*2);
-+                }
-+                s = p;
-+
-+                arg->type = ACPI_TYPE_BUFFER;
-+                arg->buffer.pointer = buf;
-+                arg->buffer.length = len;
-+            } else if (*s == '{') {
-+                // decode buffer - { b1, b2 ...}
-+                u8 *buf = temporary_buffer;
-+                arg->type = ACPI_TYPE_BUFFER;
-+                arg->buffer.pointer = buf;
-+                arg->buffer.length = 0;
-+                while (*s && *s++ != '}') {
-+                    if (buf >= temporary_buffer + sizeof(temporary_buffer)) {
-+                        printk(KERN_ERR "acpi_call: buffer arg%d is truncated because the buffer is full\n", *nargs);
-+                        // clear remaining arguments
-+                        while (*s && *s != '}')
-+                            ++s;
-+                        break;
-+                    }
-+                    else if (*s >= '0' && *s <= '9') {
-+                        // decode integer into buffer
-+                        arg->buffer.length ++;
-+                        if (s[0] == '0' && s[1] == 'x')
-+                            *buf++ = simple_strtol(s+2, 0, 16);
-+                        else
-+                            *buf++ = simple_strtol(s, 0, 10);
-+                    }
-+                    // skip until space or comma or '}'
-+                    while (*s && *s != ' ' && *s != ',' && *s != '}')
-+                        ++s;
-+                }
-+                // store the result in new allocated buffer
-+                buf = (u8*) kmalloc(arg->buffer.length, GFP_KERNEL);
-+                if (!buf) {
-+                    printk(KERN_ERR "acpi_call: unable to allocate buffer\n");
-+                    --*nargs;
-+                    goto err;
-+                }
-+                memcpy(buf, temporary_buffer, arg->buffer.length);
-+                arg->buffer.pointer = buf;
-+            } else {
-+                // decode integer, N or 0xN
-+                arg->type = ACPI_TYPE_INTEGER;
-+                if (s[0] == '0' && s[1] == 'x') {
-+                    arg->integer.value = simple_strtol(s+2, 0, 16);
-+                } else {
-+                    arg->integer.value = simple_strtol(s, 0, 10);
-+                }
-+                while (*s && *s != ' ') {
-+                    ++s;
-+                }
-+            }
-+        }
-+    }
-+
-+    return input;
-+
-+err:
-+    for (i=0; i<*nargs; i++)
-+        if ((*args)[i].type == ACPI_TYPE_BUFFER && (*args)[i].buffer.pointer)
-+            kfree((*args)[i].buffer.pointer);
-+    kfree(*args);
-+    return NULL;
-+}
-+
-+/** procfs write callback. Called when writing into /proc/acpi/call.
-+*/
-+#ifdef HAVE_PROC_CREATE
-+static ssize_t acpi_proc_write( struct file *filp, const char __user *buff,
-+    size_t len, loff_t *data )
-+#else
-+static int acpi_proc_write( struct file *filp, const char __user *buff,
-+    unsigned long len, void *data )
-+#endif
-+{
-+    union acpi_object *args;
-+    int nargs, i;
-+    char *method;
-+
-+    memset(input_buffer, 0, INPUT_BUFFER_SIZE);
-+    if (len > sizeof(input_buffer) - 1) {
-+#ifdef HAVE_PROC_CREATE
-+        printk(KERN_ERR "acpi_call: Input too long! (%zu)\n", len);
-+#else
-+        printk(KERN_ERR "acpi_call: Input too long! (%lu)\n", len);
-+#endif
-+        return -ENOSPC;
-+    }
-+
-+    if (copy_from_user( input_buffer, buff, len )) {
-+        return -EFAULT;
-+    }
-+    input_buffer[len] = '\0';
-+    if (input_buffer[len-1] == '\n')
-+        input_buffer[len-1] = '\0';
-+
-+    method = parse_acpi_args(input_buffer, &nargs, &args);
-+    if (method) {
-+        do_acpi_call(method, nargs, args);
-+        if (args) {
-+            for (i=0; i<nargs; i++)
-+                if (args[i].type == ACPI_TYPE_BUFFER)
-+                    kfree(args[i].buffer.pointer);
-+        }
-+    }
-+    if (args)
-+        kfree(args);
-+
-+    return len;
-+}
-+
-+/** procfs 'call' read callback. Called when reading the content of /proc/acpi/call.
-+Returns the last call status:
-+- "not called" when no call was previously issued
-+- "failed" if the call failed
-+- "ok" if the call succeeded
-+*/
-+#ifdef HAVE_PROC_CREATE
-+static ssize_t acpi_proc_read( struct file *filp, char __user *buff,
-+            size_t count, loff_t *off )
-+{
-+    ssize_t ret;
-+    int len = strlen(result_buffer);
-+
-+    if(len == 0) {
-+        ret = simple_read_from_buffer(buff, count, off, not_called_message, strlen(not_called_message) + 1);
-+    } else if(len + 1 > count) {
-+        // user buffer is too small
-+        ret = 0;
-+    } else if(*off == len + 1) {
-+        // we're done
-+        ret = 0;
-+        result_buffer[0] = '\0';
-+    } else {
-+        // output the current result buffer
-+        ret = simple_read_from_buffer(buff, count, off, result_buffer, len + 1);
-+        *off = ret;
-+    }
-+
-+    return ret;
-+}
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
-+static struct proc_ops proc_acpi_operations = {
-+	.proc_read = acpi_proc_read,
-+	.proc_write = acpi_proc_write,
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0)
-+	.proc_lseek = default_llseek,
-+#endif
-+};
-+#else
-+static struct file_operations proc_acpi_operations = {
-+        .owner    = THIS_MODULE,
-+        .read     = acpi_proc_read,
-+        .write    = acpi_proc_write,
-+};
-+#endif
-+
-+#else
-+static int acpi_proc_read(char *page, char **start, off_t off,
-+    int count, int *eof, void *data)
-+{
-+    int len = 0;
-+
-+    if (off > 0) {
-+        *eof = 1;
-+        return 0;
-+    }
-+
-+    // output the current result buffer
-+    len = strlen(result_buffer);
-+    memcpy(page, result_buffer, len + 1);
-+
-+    // initialize the result buffer for later
-+    strcpy(result_buffer, "not called");
-+
-+    return len;
-+}
-+#endif
-+
-+/** module initialization function */
-+static int __init init_acpi_call(void)
-+{
-+#ifdef HAVE_PROC_CREATE
-+    struct proc_dir_entry *acpi_entry = proc_create("call",
-+                                                    0660,
-+                                                    acpi_root_dir,
-+                                                    &proc_acpi_operations);
-+#else
-+    struct proc_dir_entry *acpi_entry = create_proc_entry("call", 0660, acpi_root_dir);
-+#endif
-+
-+    strcpy(result_buffer, "not called");
-+
-+    if (acpi_entry == NULL) {
-+      printk(KERN_ERR "acpi_call: Couldn't create proc entry\n");
-+      return -ENOMEM;
-+    }
-+
-+#ifndef HAVE_PROC_CREATE
-+    acpi_entry->write_proc = acpi_proc_write;
-+    acpi_entry->read_proc = acpi_proc_read;
-+#endif
-+
-+#ifdef DEBUG
-+    printk(KERN_INFO "acpi_call: Module loaded successfully\n");
-+#endif
-+
-+    return 0;
-+}
-+
-+static void __exit unload_acpi_call(void)
-+{
-+    remove_proc_entry("call", acpi_root_dir);
-+
-+#ifdef DEBUG
-+    printk(KERN_INFO "acpi_call: Module unloaded successfully\n");
-+#endif
-+}
-+
-+module_init(init_acpi_call);
-+module_exit(unload_acpi_call);
-\ No newline at end of file
--- 
-2.42.0
-
diff --git a/patches/nobara/0001-amd-hdr.patch b/patches/nobara/0001-amd-hdr.patch
deleted file mode 100644
index 030317f..0000000
--- a/patches/nobara/0001-amd-hdr.patch
+++ /dev/null
@@ -1,2042 +0,0 @@
-From af60f9afa522f5f337d9b4e24eef1fdcd0ab6c05 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 11 Sep 2023 14:31:43 +0200
-Subject: [PATCH 1/7] amd-hdr
-
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |  71 ++
- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  34 +-
- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 100 +++
- .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 805 ++++++++++++++++--
- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |  72 ++
- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 224 ++++-
- .../amd/display/dc/dcn10/dcn10_cm_common.c    |  95 ++-
- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c    |  37 +
- .../drm/amd/display/dc/dcn30/dcn30_hwseq.h    |   3 +
- .../drm/amd/display/dc/dcn301/dcn301_init.c   |   2 +-
- .../gpu/drm/amd/display/include/fixed31_32.h  |  12 +
- drivers/gpu/drm/arm/malidp_crtc.c             |   2 +-
- drivers/gpu/drm/drm_atomic.c                  |   1 +
- drivers/gpu/drm/drm_atomic_state_helper.c     |   1 +
- drivers/gpu/drm/drm_property.c                |  49 ++
- include/drm/drm_mode_object.h                 |   2 +-
- include/drm/drm_plane.h                       |   7 +
- include/drm/drm_property.h                    |   6 +
- include/uapi/drm/drm_mode.h                   |   8 +
- 19 files changed, 1441 insertions(+), 90 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
-index 32fe05c810c6..84bf501b02f4 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
-@@ -343,6 +343,77 @@ struct amdgpu_mode_info {
- 	int			disp_priority;
- 	const struct amdgpu_display_funcs *funcs;
- 	const enum drm_plane_type *plane_type;
-+
-+	/* Driver-private color mgmt props */
-+
-+	/* @plane_degamma_lut_property: Plane property to set a degamma LUT to
-+	 * convert input space before blending.
-+	 */
-+	struct drm_property *plane_degamma_lut_property;
-+	/* @plane_degamma_lut_size_property: Plane property to define the max
-+	 * size of degamma LUT as supported by the driver (read-only).
-+	 */
-+	struct drm_property *plane_degamma_lut_size_property;
-+	/**
-+	 * @plane_degamma_tf_property: Plane pre-defined transfer function to
-+	 * to go from scanout/encoded values to linear values.
-+	 */
-+	struct drm_property *plane_degamma_tf_property;
-+	/**
-+	 * @plane_hdr_mult_property:
-+	 */
-+	struct drm_property *plane_hdr_mult_property;
-+
-+	struct drm_property *plane_ctm_property;
-+	/**
-+	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
-+	 * that converts color content before 3D LUT.
-+	 */
-+	struct drm_property *plane_shaper_lut_property;
-+	/**
-+	 * @shaper_lut_size_property: Plane property for the size of
-+	 * pre-blending shaper LUT as supported by the driver (read-only).
-+	 */
-+	struct drm_property *plane_shaper_lut_size_property;
-+	/**
-+	 * @plane_shaper_tf_property: Plane property to set a predefined
-+	 * transfer function for pre-blending shaper (before applying 3D LUT)
-+	 * with or without LUT.
-+	 */
-+	struct drm_property *plane_shaper_tf_property;
-+	/**
-+	 * @plane_lut3d_property: Plane property for gamma correction using a
-+	 * 3D LUT (pre-blending).
-+	 */
-+	struct drm_property *plane_lut3d_property;
-+	/**
-+	 * @plane_degamma_lut_size_property: Plane property to define the max
-+	 * size of 3D LUT as supported by the driver (read-only).
-+	 */
-+	struct drm_property *plane_lut3d_size_property;
-+	/**
-+	 * @plane_blend_lut_property: Plane property for output gamma before
-+	 * blending. Userspace set a blend LUT to convert colors after 3D LUT
-+	 * conversion. It works as a post-3D LUT 1D LUT, with shaper LUT, they
-+	 * are sandwiching 3D LUT with two 1D LUT.
-+	 */
-+	struct drm_property *plane_blend_lut_property;
-+	/**
-+	 * @plane_blend_lut_size_property: Plane property to define the max
-+	 * size of blend LUT as supported by the driver (read-only).
-+	 */
-+	struct drm_property *plane_blend_lut_size_property;
-+	/**
-+	 * @plane_blend_tf_property: Plane property to set a predefined
-+	 * transfer function for pre-blending blend (before applying 3D LUT)
-+	 * with or without LUT.
-+	 */
-+	struct drm_property *plane_blend_tf_property;
-+	/* @regamma_tf_property: Transfer function for CRTC regamma
-+	 * (post-blending). Possible values are defined by `enum
-+	 * amdgpu_transfer_function`.
-+	 */
-+	struct drm_property *regamma_tf_property;
- };
-
- #define AMDGPU_MAX_BL_LEVEL 0xFF
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-index 34f011cedd06..fb3400eff0b6 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-@@ -4021,6 +4021,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
- 		return r;
- 	}
-
-+#ifdef AMD_PRIVATE_COLOR
-+	if (amdgpu_dm_create_color_properties(adev))
-+		return -ENOMEM;
-+#endif
-+
- 	r = amdgpu_dm_audio_init(adev);
- 	if (r) {
- 		dc_release_state(state->context);
-@@ -5093,7 +5098,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
- 	 * Always set input transfer function, since plane state is refreshed
- 	 * every time.
- 	 */
--	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
-+	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
-+						plane_state,
-+						dc_plane_state);
- 	if (ret)
- 		return ret;
-
-@@ -8113,6 +8120,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
- 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
- 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
- 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
-+			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
-+			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
-+			bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func;
-+			bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf;
- 		}
-
- 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
-@@ -8324,6 +8335,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
- 				&acrtc_state->stream->csc_color_matrix;
- 			bundle->stream_update.out_transfer_func =
- 				acrtc_state->stream->out_transfer_func;
-+			bundle->stream_update.lut3d_func =
-+				(struct dc_3dlut *) acrtc_state->stream->lut3d_func;
-+			bundle->stream_update.func_shaper =
-+				(struct dc_transfer_func *) acrtc_state->stream->func_shaper;
- 		}
-
- 		acrtc_state->stream->abm_level = acrtc_state->abm_level;
-@@ -9512,6 +9527,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
- 	 * when a modeset is needed, to ensure it gets reprogrammed.
- 	 */
- 	if (dm_new_crtc_state->base.color_mgmt_changed ||
-+	    dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
- 	    drm_atomic_crtc_needs_modeset(new_crtc_state)) {
- 		ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
- 		if (ret)
-@@ -9579,6 +9595,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
- 	 */
- 	for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
- 		struct amdgpu_framebuffer *old_afb, *new_afb;
-+		struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
-+
-+		dm_new_other_state = to_dm_plane_state(new_other_state);
-+		dm_old_other_state = to_dm_plane_state(old_other_state);
-
- 		if (other->type == DRM_PLANE_TYPE_CURSOR)
- 			continue;
-@@ -9615,6 +9635,18 @@ static bool should_reset_plane(struct drm_atomic_state *state,
- 		    old_other_state->color_encoding != new_other_state->color_encoding)
- 			return true;
-
-+		/* HDR/Transfer Function changes. */
-+		if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
-+		    dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
-+		    dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
-+		    dm_old_other_state->ctm != dm_new_other_state->ctm ||
-+		    dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
-+		    dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
-+		    dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
-+		    dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
-+		    dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
-+			return true;
-+
- 		/* Framebuffer checks fall at the end. */
- 		if (!old_other_state->fb || !new_other_state->fb)
- 			continue;
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
-index 9e4cc5eeda76..24c87f425afb 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
-@@ -33,6 +33,8 @@
- #include <drm/drm_plane.h>
- #include "link_service_types.h"
-
-+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
-+
- /*
-  * This file contains the definition for amdgpu_display_manager
-  * and its API for amdgpu driver's use.
-@@ -716,9 +718,91 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status,
-
- extern const struct amdgpu_ip_block_version dm_ip_block;
-
-+enum amdgpu_transfer_function {
-+	AMDGPU_TRANSFER_FUNCTION_DEFAULT,
-+	AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_BT709_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_LINEAR,
-+	AMDGPU_TRANSFER_FUNCTION_UNITY,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
-+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
-+        AMDGPU_TRANSFER_FUNCTION_COUNT
-+};
-+
- struct dm_plane_state {
- 	struct drm_plane_state base;
- 	struct dc_plane_state *dc_state;
-+
-+	/* Plane color mgmt */
-+	/**
-+	 * @degamma_lut:
-+	 *
-+	 * 1D LUT for mapping framebuffer/plane pixel data before sampling or
-+	 * blending operations. It's usually applied to linearize input space.
-+	 * The blob (if not NULL) is an array of &struct drm_color_lut.
-+	 */
-+	struct drm_property_blob *degamma_lut;
-+	/**
-+	 * @degamma_tf:
-+	 *
-+	 * Predefined transfer function to tell DC driver the input space to
-+	 * linearize.
-+	 */
-+	enum amdgpu_transfer_function degamma_tf;
-+	/**
-+	 * @hdr_mult:
-+	 *
-+	 * Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
-+	 * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
-+	 * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
-+	 * Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
-+	 * want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
-+	 * S31.32 sign-magnitude.
-+	 */
-+	__u64 hdr_mult;
-+	/**
-+	 * @ctm:
-+	 *
-+	 * Color transformation matrix. See drm_crtc_enable_color_mgmt(). The
-+	 * blob (if not NULL) is a &struct drm_color_ctm.
-+	 */
-+	struct drm_property_blob *ctm;
-+	/**
-+	 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
-+	 * array of &struct drm_color_lut.
-+	 */
-+	struct drm_property_blob *shaper_lut;
-+	/**
-+	 * @shaper_tf:
-+	 *
-+	 * Predefined transfer function to delinearize color space.
-+	 */
-+	enum amdgpu_transfer_function shaper_tf;
-+	/**
-+	 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
-+	 * &struct drm_color_lut.
-+	 */
-+	struct drm_property_blob *lut3d;
-+	/**
-+	 * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
-+	 * array of &struct drm_color_lut.
-+	 */
-+	struct drm_property_blob *blend_lut;
-+	/**
-+	 * @blend_tf:
-+	 *
-+	 * Pre-defined transfer function for converting plane pixel data before
-+	 * applying blend LUT.
-+	 */
-+	enum amdgpu_transfer_function blend_tf;
- };
-
- struct dm_crtc_state {
-@@ -743,6 +827,14 @@ struct dm_crtc_state {
- 	struct dc_info_packet vrr_infopacket;
-
- 	int abm_level;
-+
-+        /**
-+	 * @regamma_tf:
-+	 *
-+	 * Pre-defined transfer function for converting internal FB -> wire
-+	 * encoding.
-+	 */
-+	enum amdgpu_transfer_function regamma_tf;
- };
-
- #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
-@@ -804,14 +896,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
-
- void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
-
-+/* 3D LUT max size is 17x17x17 */
-+#define MAX_COLOR_3DLUT_ENTRIES 4913
-+#define MAX_COLOR_3DLUT_BITDEPTH 12
-+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
-+				struct drm_plane_state *plane_state);
-+/* 1D LUT size */
- #define MAX_COLOR_LUT_ENTRIES 4096
- /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
- #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
-
- void amdgpu_dm_init_color_mod(void);
-+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
- int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
- int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
- int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
-+				      struct drm_plane_state *plane_state,
- 				      struct dc_plane_state *dc_plane_state);
-
- void amdgpu_dm_update_connector_after_detect(
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-index a4cb23d059bd..0442eeaa9763 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
-@@ -72,6 +72,7 @@
-  */
-
- #define MAX_DRM_LUT_VALUE 0xFFFF
-+#define SDR_WHITE_LEVEL_INIT_VALUE 80
-
- /**
-  * amdgpu_dm_init_color_mod - Initialize the color module.
-@@ -84,6 +85,213 @@ void amdgpu_dm_init_color_mod(void)
- 	setup_x_points_distribution();
- }
-
-+#ifdef AMD_PRIVATE_COLOR
-+/* Pre-defined Transfer Functions (TF)
-+ *
-+ * AMD driver supports pre-defined mathematical functions for transferring
-+ * between encoded values and optical/linear space. Depending on HW color caps,
-+ * ROMs and curves built by the AMD color module support these transforms.
-+ *
-+ * The driver-specific color implementation exposes properties for pre-blending
-+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
-+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
-+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
-+ * curves for the other blocks. What can be done by each color block is
-+ * described by struct dpp_color_capsand struct mpc_color_caps.
-+ *
-+ * AMD driver-specific color API exposes the following pre-defined transfer
-+ * functions:
-+ *
-+ * - Linear/Unity: linear/identity relationship between pixel value and
-+ *   luminance value;
-+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure gamma functions;
-+ * - sRGB: 2.4 gamma with small initial linear section as standardized by IEC
-+ *   61966-2-1:1999;
-+ * - BT.709 (BT.1886): 2.4 gamma with differences in the dark end of the scale.
-+ *   Used in HD-TV and standardized by ITU-R BT.1886;
-+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
-+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
-+ *
-+ * In the driver-specific API, color block names attached to TF properties
-+ * suggest the intention regarding non-linear encoding pixel's luminance
-+ * values. As some newer encodings don't use gamma curve, we make encoding and
-+ * decoding explicit by defining an enum list of transfer functions supported
-+ * in terms of EOTF and inverse EOTF, where:
-+ *
-+ * - EOTF (electro-optical transfer function): is the transfer function to go
-+ *   from the encoded value to an optical (linear) value. De-gamma functions
-+ *   traditionally do this.
-+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
-+ *   from an optical/linear space (which might have been used for blending)
-+ *   back to the encoded values. Gamma functions traditionally do this.
-+ */
-+static const char * const
-+amdgpu_transfer_function_names[] = {
-+	[AMDGPU_TRANSFER_FUNCTION_DEFAULT]		= "Default",
-+	[AMDGPU_TRANSFER_FUNCTION_LINEAR]		= "Linear",
-+	[AMDGPU_TRANSFER_FUNCTION_UNITY]		= "Unity",
-+	[AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]		= "sRGB EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_BT709_EOTF]		= "BT.709 EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]		= "PQ EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF]		= "Gamma 2.2 EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF]		= "Gamma 2.4 EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF]		= "Gamma 2.6 EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]	= "sRGB inv_EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF]	= "BT.709 inv_EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]		= "PQ inv_EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF]	= "Gamma 2.2 inv_EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF]	= "Gamma 2.4 inv_EOTF",
-+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF]	= "Gamma 2.6 inv_EOTF",
-+};
-+
-+static const u32 amdgpu_eotf =
-+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
-+
-+static const u32 amdgpu_inv_eotf =
-+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
-+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
-+
-+static struct drm_property *
-+amdgpu_create_tf_property(struct drm_device *dev,
-+			  const char *name,
-+			  u32 supported_tf)
-+{
-+	u32 transfer_functions = supported_tf |
-+				 BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
-+				 BIT(AMDGPU_TRANSFER_FUNCTION_LINEAR) |
-+				 BIT(AMDGPU_TRANSFER_FUNCTION_UNITY);
-+	struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
-+	int i, len;
-+
-+	len = 0;
-+	for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
-+		if ((transfer_functions & BIT(i)) == 0)
-+			continue;
-+
-+		enum_list[len].type = i;
-+		enum_list[len].name = amdgpu_transfer_function_names[i];
-+		len++;
-+	}
-+
-+	return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
-+					name, enum_list, len);
-+}
-+
-+int
-+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
-+{
-+	struct drm_property *prop;
-+
-+	prop = drm_property_create(adev_to_drm(adev),
-+				   DRM_MODE_PROP_BLOB,
-+				   "AMD_PLANE_DEGAMMA_LUT", 0);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_degamma_lut_property = prop;
-+
-+	prop = drm_property_create_range(adev_to_drm(adev),
-+					 DRM_MODE_PROP_IMMUTABLE,
-+					 "AMD_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_degamma_lut_size_property = prop;
-+
-+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
-+					 "AMD_PLANE_DEGAMMA_TF",
-+					 amdgpu_eotf);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_degamma_tf_property = prop;
-+
-+	prop = drm_property_create_range(adev_to_drm(adev),
-+					 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_hdr_mult_property = prop;
-+
-+	prop = drm_property_create(adev_to_drm(adev),
-+				   DRM_MODE_PROP_BLOB,
-+				   "AMD_PLANE_CTM", 0);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_ctm_property = prop;
-+
-+	prop = drm_property_create(adev_to_drm(adev),
-+				   DRM_MODE_PROP_BLOB,
-+				   "AMD_PLANE_SHAPER_LUT", 0);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_shaper_lut_property = prop;
-+
-+	prop = drm_property_create_range(adev_to_drm(adev),
-+					 DRM_MODE_PROP_IMMUTABLE,
-+					 "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_shaper_lut_size_property = prop;
-+
-+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
-+					 "AMD_PLANE_SHAPER_TF",
-+					 amdgpu_inv_eotf);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_shaper_tf_property = prop;
-+
-+	prop = drm_property_create(adev_to_drm(adev),
-+				   DRM_MODE_PROP_BLOB,
-+				   "AMD_PLANE_LUT3D", 0);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_lut3d_property = prop;
-+
-+	prop = drm_property_create_range(adev_to_drm(adev),
-+					 DRM_MODE_PROP_IMMUTABLE,
-+					 "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_lut3d_size_property = prop;
-+
-+	prop = drm_property_create(adev_to_drm(adev),
-+				   DRM_MODE_PROP_BLOB,
-+				   "AMD_PLANE_BLEND_LUT", 0);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_blend_lut_property = prop;
-+
-+	prop = drm_property_create_range(adev_to_drm(adev),
-+					 DRM_MODE_PROP_IMMUTABLE,
-+					 "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_blend_lut_size_property = prop;
-+
-+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
-+					 "AMD_PLANE_BLEND_TF",
-+					 amdgpu_eotf);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.plane_blend_tf_property = prop;
-+
-+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
-+					 "AMD_CRTC_REGAMMA_TF",
-+					 amdgpu_inv_eotf);
-+	if (!prop)
-+		return -ENOMEM;
-+	adev->mode_info.regamma_tf_property = prop;
-+
-+	return 0;
-+}
-+#endif
-+
- /**
-  * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
-  * @blob: DRM color mgmt property blob
-@@ -182,7 +390,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
- static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
- 				   struct fixed31_32 *matrix)
- {
--	int64_t val;
- 	int i;
-
- 	/*
-@@ -201,12 +408,33 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
- 		}
-
- 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
--		val = ctm->matrix[i - (i / 4)];
--		/* If negative, convert to 2's complement. */
--		if (val & (1ULL << 63))
--			val = -(val & ~(1ULL << 63));
-+		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
-+	}
-+}
-
--		matrix[i].value = val;
-+/**
-+ * __drm_ctm2_to_dc_matrix - converts a DRM CTM2 to a DC CSC float matrix
-+ * @ctm: DRM color transformation matrix
-+ * @matrix: DC CSC float matrix
-+ *
-+ * The matrix needs to be a 3x4 (12 entry) matrix.
-+ */
-+static void __drm_ctm2_to_dc_matrix(const struct drm_color_ctm2 *ctm,
-+				   struct fixed31_32 *matrix)
-+{
-+	int i;
-+
-+	/*
-+	 * DRM gives a 3x3 matrix, but DC wants 3x4. Assuming we're operating
-+	 * with homogeneous coordinates, augment the matrix with 0's.
-+	 *
-+	 * The format provided is S31.32, using signed-magnitude representation.
-+	 * Our fixed31_32 is also S31.32, but is using 2's complement. We have
-+	 * to convert from signed-magnitude to 2's complement.
-+	 */
-+	for (i = 0; i < 12; i++) {
-+		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-+		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
- 	}
- }
-
-@@ -268,16 +496,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
- 	struct calculate_buffer cal_buffer = {0};
- 	bool res;
-
--	ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
--
- 	cal_buffer.buffer_index = -1;
-
--	gamma = dc_create_gamma();
--	if (!gamma)
--		return -ENOMEM;
-+	if (lut_size) {
-+		ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
--	gamma->num_entries = lut_size;
--	__drm_lut_to_dc_gamma(lut, gamma, false);
-+		gamma = dc_create_gamma();
-+		if (!gamma)
-+			return -ENOMEM;
-+
-+		gamma->num_entries = lut_size;
-+		__drm_lut_to_dc_gamma(lut, gamma, false);
-+	}
-
- 	if (func->tf == TRANSFER_FUNCTION_LINEAR) {
- 		/*
-@@ -285,27 +515,63 @@ static int __set_output_tf(struct dc_transfer_func *func,
- 		 * on top of a linear input. But degamma params can be used
- 		 * instead to simulate this.
- 		 */
--		gamma->type = GAMMA_CUSTOM;
-+		if (gamma)
-+			gamma->type = GAMMA_CUSTOM;
- 		res = mod_color_calculate_degamma_params(NULL, func,
--							gamma, true);
-+							 gamma, gamma != NULL);
- 	} else {
- 		/*
- 		 * Assume sRGB. The actual mapping will depend on whether the
- 		 * input was legacy or not.
- 		 */
--		gamma->type = GAMMA_CS_TFM_1D;
--		res = mod_color_calculate_regamma_params(func, gamma, false,
-+		if (gamma)
-+			gamma->type = GAMMA_CS_TFM_1D;
-+		res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
- 							 has_rom, NULL, &cal_buffer);
- 	}
-
--	dc_gamma_release(&gamma);
-+	if (gamma)
-+		dc_gamma_release(&gamma);
-
- 	return res ? 0 : -ENOMEM;
- }
-
-+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
-+					const struct drm_color_lut *regamma_lut,
-+					uint32_t regamma_size, bool has_rom,
-+					enum dc_transfer_func_predefined tf)
-+{
-+	struct dc_transfer_func *out_tf = stream->out_transfer_func;
-+	int ret = 0;
-+
-+	if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
-+		/* CRTC RGM goes into RGM LUT.
-+		 *
-+		 * Note: there is no implicit sRGB regamma here. We are using
-+		 * degamma calculation from color module to calculate the curve
-+		 * from a linear base.
-+		 */
-+		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-+		out_tf->tf = tf;
-+		out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
-+
-+		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
-+	} else {
-+		/*
-+		 * No CRTC RGM means we can just put the block into bypass
-+		 * since we don't have any plane level adjustments using it.
-+		 */
-+		out_tf->type = TF_TYPE_BYPASS;
-+		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
-+	}
-+
-+	return ret;
-+}
-+
- /**
-  * __set_input_tf - calculates the input transfer function based on expected
-  * input space.
-+ * @caps: dc color capabilities
-  * @func: transfer function
-  * @lut: lookup table that defines the color space
-  * @lut_size: size of respective lut.
-@@ -313,27 +579,249 @@ static int __set_output_tf(struct dc_transfer_func *func,
-  * Returns:
-  * 0 in case of success. -ENOMEM if fails.
-  */
--static int __set_input_tf(struct dc_transfer_func *func,
-+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
- 			  const struct drm_color_lut *lut, uint32_t lut_size)
- {
- 	struct dc_gamma *gamma = NULL;
- 	bool res;
-
--	gamma = dc_create_gamma();
--	if (!gamma)
--		return -ENOMEM;
-+	if (lut_size) {
-+		gamma = dc_create_gamma();
-+		if (!gamma)
-+			return -ENOMEM;
-
--	gamma->type = GAMMA_CUSTOM;
--	gamma->num_entries = lut_size;
-+		gamma->type = GAMMA_CUSTOM;
-+		gamma->num_entries = lut_size;
-
--	__drm_lut_to_dc_gamma(lut, gamma, false);
-+		__drm_lut_to_dc_gamma(lut, gamma, false);
-+	}
-
--	res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
--	dc_gamma_release(&gamma);
-+	res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
-+
-+	if (gamma)
-+		dc_gamma_release(&gamma);
-
- 	return res ? 0 : -ENOMEM;
- }
-
-+static enum dc_transfer_func_predefined
-+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
-+{
-+	switch (tf)
-+	{
-+	default:
-+	case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
-+	case AMDGPU_TRANSFER_FUNCTION_LINEAR:
-+		return TRANSFER_FUNCTION_LINEAR;
-+	case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
-+		return TRANSFER_FUNCTION_SRGB;
-+	case AMDGPU_TRANSFER_FUNCTION_BT709_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_BT709_INV_EOTF:
-+		return TRANSFER_FUNCTION_BT709;
-+	case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
-+		return TRANSFER_FUNCTION_PQ;
-+	case AMDGPU_TRANSFER_FUNCTION_UNITY:
-+		return TRANSFER_FUNCTION_UNITY;
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
-+		return TRANSFER_FUNCTION_GAMMA22;
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
-+		return TRANSFER_FUNCTION_GAMMA24;
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
-+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
-+		return TRANSFER_FUNCTION_GAMMA26;
-+	}
-+}
-+
-+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
-+				const struct drm_color_lut lut,
-+				int bit_precision)
-+{
-+	rgb->red = drm_color_lut_extract(lut.red, bit_precision);
-+	rgb->green = drm_color_lut_extract(lut.green, bit_precision);
-+	rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
-+}
-+
-+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
-+				    uint32_t lut3d_size,
-+				    struct tetrahedral_params *params,
-+				    bool use_tetrahedral_9,
-+				    int bit_depth)
-+{
-+	struct dc_rgb *lut0;
-+	struct dc_rgb *lut1;
-+	struct dc_rgb *lut2;
-+	struct dc_rgb *lut3;
-+	int lut_i, i;
-+
-+
-+	if (use_tetrahedral_9) {
-+		lut0 = params->tetrahedral_9.lut0;
-+		lut1 = params->tetrahedral_9.lut1;
-+		lut2 = params->tetrahedral_9.lut2;
-+		lut3 = params->tetrahedral_9.lut3;
-+	} else {
-+		lut0 = params->tetrahedral_17.lut0;
-+		lut1 = params->tetrahedral_17.lut1;
-+		lut2 = params->tetrahedral_17.lut2;
-+		lut3 = params->tetrahedral_17.lut3;
-+	}
-+
-+	for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
-+		/* We should consider the 3dlut RGB values are distributed
-+		 * along four arrays lut0-3 where the first sizes 1229 and the
-+		 * other 1228. The bit depth supported for 3dlut channel is
-+		 * 12-bit, but DC also supports 10-bit.
-+		 *
-+		 * TODO: improve color pipeline API to enable the userspace set
-+		 * bit depth and 3D LUT size/stride, as specified by VA-API.
-+		 */
-+		__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
-+		__to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
-+		__to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
-+		__to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
-+	}
-+	/* lut0 has 1229 points (lut_size/4 + 1) */
-+	__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
-+}
-+
-+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
-+ * @drm_lut3d: DRM CRTC (user) 3D LUT
-+ * @drm_lut3d_size: size of 3D LUT
-+ * @lut3d: DC 3D LUT
-+ *
-+ * Map DRM CRTC 3D LUT to DC 3D LUT and all necessary bits to program it
-+ * on DCN MPC accordingly.
-+ */
-+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut,
-+				   uint32_t drm_lut3d_size,
-+				   struct dc_3dlut *lut)
-+{
-+	if (!drm_lut3d_size) {
-+		lut->state.bits.initialized = 0;
-+	} else {
-+		/* Stride and bit depth are not programmable by API yet.
-+		 * Therefore, only supports 17x17x17 3D LUT (12-bit).
-+		 */
-+		lut->lut_3d.use_tetrahedral_9 = false;
-+		lut->lut_3d.use_12bits = true;
-+		lut->state.bits.initialized = 1;
-+		__drm_3dlut_to_dc_3dlut(drm_lut, drm_lut3d_size, &lut->lut_3d,
-+					lut->lut_3d.use_tetrahedral_9,
-+					MAX_COLOR_3DLUT_BITDEPTH);
-+	}
-+}
-+
-+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
-+				       bool has_rom,
-+				       enum dc_transfer_func_predefined tf,
-+				       uint32_t shaper_size,
-+				       struct dc_transfer_func *func_shaper)
-+{
-+	int ret = 0;
-+
-+	if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
-+		/* If DRM shaper LUT is set, we assume a linear color space
-+		 * (linearized by DRM degamma 1D LUT or not)
-+		 */
-+		func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
-+		func_shaper->tf = tf;
-+		func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
-+
-+		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
-+	} else {
-+		func_shaper->type = TF_TYPE_BYPASS;
-+		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
-+	}
-+
-+	return ret;
-+}
-+
-+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
-+				       bool has_rom,
-+				       enum dc_transfer_func_predefined tf,
-+				       uint32_t blend_size,
-+				       struct dc_transfer_func *func_blend)
-+{
-+	int ret = 0;
-+
-+	if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
-+		/* DRM plane gamma LUT or TF means we are linearizing color
-+		 * space before blending (similar to degamma programming). As
-+		 * we don't have hardcoded curve support, or we use AMD color
-+		 * module to fill the parameters that will be translated to HW
-+		 * points.
-+		 */
-+		func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
-+		func_blend->tf = tf;
-+		func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
-+
-+		ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
-+	} else {
-+		func_blend->type = TF_TYPE_BYPASS;
-+		func_blend->tf = TRANSFER_FUNCTION_LINEAR;
-+	}
-+
-+	return ret;
-+}
-+
-+/* amdgpu_dm_lut3d_size - get expected size according to hw color caps
-+ * @adev: amdgpu device
-+ * @lut_size: default size
-+ *
-+ * Return:
-+ * lut_size if DC 3D LUT is supported, zero otherwise.
-+ */
-+static uint32_t amdgpu_dm_get_lut3d_size(struct amdgpu_device *adev,
-+					 uint32_t lut_size)
-+{
-+	return adev->dm.dc->caps.color.dpp.hw_3d_lut ? lut_size : 0;
-+}
-+
-+/**
-+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if DRM 3D
-+ * LUT matches the hw supported size
-+ * @adev: amdgpu device
-+ * @crtc_state: the DRM CRTC state
-+ *
-+ * Verifies if post-blending (MPC) 3D LUT is supported by the HW (DCN 3.0 or
-+ * newer) and if the DRM 3D LUT matches the supported size.
-+ *
-+ * Returns:
-+ * 0 on success. -EINVAL if lut size are invalid.
-+ */
-+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
-+				struct drm_plane_state *plane_state)
-+{
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-+	const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
-+	uint32_t exp_size, size;
-+
-+	/* shaper LUT is only available if 3D LUT color caps*/
-+	exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_LUT_ENTRIES);
-+	shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
-+
-+	if (shaper && size != exp_size) {
-+		drm_dbg(&adev->ddev,
-+			"Invalid Shaper LUT size. Should be %u but got %u.\n",
-+			exp_size, size);
-+	}
-+
-+	exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_3DLUT_ENTRIES);
-+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
-+
-+	if (lut3d && size != exp_size) {
-+		drm_dbg(&adev->ddev, "Invalid 3D LUT size. Should be %u but got %u.\n",
-+			exp_size, size);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+
- /**
-  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
-  * @crtc_state: the DRM CRTC state
-@@ -401,9 +889,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
- 	const struct drm_color_lut *degamma_lut, *regamma_lut;
- 	uint32_t degamma_size, regamma_size;
- 	bool has_regamma, has_degamma;
-+	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
- 	bool is_legacy;
- 	int r;
-
-+	tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
-+
- 	r = amdgpu_dm_verify_lut_sizes(&crtc->base);
- 	if (r)
- 		return r;
-@@ -440,26 +931,22 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
- 		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
- 		stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
-+		/* Note: although we pass has_rom as parameter here, we never
-+		 * actually use ROM because the color module only takes the ROM
-+		 * path if transfer_func->type == PREDEFINED.
-+		 *
-+		 * See more in mod_color_calculate_regamma_params()
-+		 */
- 		r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
- 				    regamma_size, has_rom);
- 		if (r)
- 			return r;
--	} else if (has_regamma) {
--		/* If atomic regamma, CRTC RGM goes into RGM LUT. */
--		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
--		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
--
--		r = __set_output_tf(stream->out_transfer_func, regamma_lut,
--				    regamma_size, has_rom);
-+	} else {
-+		regamma_size = has_regamma ? regamma_size : 0;
-+		r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
-+						 regamma_size, has_rom, tf);
- 		if (r)
- 			return r;
--	} else {
--		/*
--		 * No CRTC RGM means we can just put the block into bypass
--		 * since we don't have any plane level adjustments using it.
--		 */
--		stream->out_transfer_func->type = TF_TYPE_BYPASS;
--		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
- 	}
-
- 	/*
-@@ -495,20 +982,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
- 	return 0;
- }
-
--/**
-- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
-- * @crtc: amdgpu_dm crtc state
-- * @dc_plane_state: target DC surface
-- *
-- * Update the underlying dc_stream_state's input transfer function (ITF) in
-- * preparation for hardware commit. The transfer function used depends on
-- * the preparation done on the stream for color management.
-- *
-- * Returns:
-- * 0 on success. -ENOMEM if mem allocation fails.
-- */
--int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
--				      struct dc_plane_state *dc_plane_state)
-+static int
-+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
-+			     struct dc_plane_state *dc_plane_state,
-+			     struct dc_color_caps *caps)
- {
- 	const struct drm_color_lut *degamma_lut;
- 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
-@@ -531,8 +1008,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- 						 &degamma_size);
- 		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
-
--		dc_plane_state->in_transfer_func->type =
--			TF_TYPE_DISTRIBUTED_POINTS;
-+		dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-
- 		/*
- 		 * This case isn't fully correct, but also fairly
-@@ -564,11 +1040,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- 			dc_plane_state->in_transfer_func->tf =
- 				TRANSFER_FUNCTION_LINEAR;
-
--		r = __set_input_tf(dc_plane_state->in_transfer_func,
-+		r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
- 				   degamma_lut, degamma_size);
- 		if (r)
- 			return r;
--	} else if (crtc->cm_is_degamma_srgb) {
-+	} else {
- 		/*
- 		 * For legacy gamma support we need the regamma input
- 		 * in linear space. Assume that the input is sRGB.
-@@ -577,14 +1053,213 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- 		dc_plane_state->in_transfer_func->tf = tf;
-
- 		if (tf != TRANSFER_FUNCTION_SRGB &&
--		    !mod_color_calculate_degamma_params(NULL,
--			    dc_plane_state->in_transfer_func, NULL, false))
-+		    !mod_color_calculate_degamma_params(caps,
-+							dc_plane_state->in_transfer_func,
-+							NULL, false))
-+			return -ENOMEM;
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
-+		       struct dc_plane_state *dc_plane_state,
-+		       struct dc_color_caps *color_caps)
-+{
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-+	const struct drm_color_lut *degamma_lut;
-+	enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-+	uint32_t degamma_size;
-+	bool has_degamma_lut;
-+	int ret;
-+
-+	degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
-+					 &degamma_size);
-+
-+	has_degamma_lut = degamma_lut &&
-+			  !__is_lut_linear(degamma_lut, degamma_size);
-+
-+	tf = dm_plane_state->degamma_tf;
-+
-+	/* If we don't have plane degamma LUT nor TF to set on DC, we have
-+	 * nothing to do here, return.
-+	 */
-+	if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
-+		return -EINVAL;
-+
-+	dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
-+
-+	if (has_degamma_lut) {
-+		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
-+
-+		dc_plane_state->in_transfer_func->type =
-+			TF_TYPE_DISTRIBUTED_POINTS;
-+
-+		ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func,
-+				     degamma_lut, degamma_size);
-+		if (ret)
-+			return ret;
-+       } else {
-+		dc_plane_state->in_transfer_func->type =
-+			TF_TYPE_PREDEFINED;
-+
-+		if (!mod_color_calculate_degamma_params(color_caps,
-+		    dc_plane_state->in_transfer_func, NULL, false))
- 			return -ENOMEM;
--	} else {
--		/* ...Otherwise we can just bypass the DGM block. */
--		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
--		dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-+	}
-+	return 0;
-+}
-+
-+static int
-+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
-+				     struct dc_plane_state *dc_plane_state,
-+				     struct dc_color_caps *color_caps)
-+{
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-+	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-+	enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-+	const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
-+	uint32_t shaper_size, lut3d_size, blend_size;
-+	int ret;
-+
-+	/* We have nothing to do here, return */
-+	if (!plane_state->color_mgmt_changed)
-+		return 0;
-+
-+	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
-+
-+	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
-+	shaper_size = shaper_lut != NULL ? shaper_size : 0;
-+	shaper_tf = dm_plane_state->shaper_tf;
-+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
-+	lut3d_size = lut3d != NULL ? lut3d_size : 0;
-+
-+	amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func);
-+	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
-+					  amdgpu_tf_to_dc_tf(shaper_tf),
-+					  shaper_size,
-+					  dc_plane_state->in_shaper_func);
-+	if (ret) {
-+		drm_dbg_kms(plane_state->plane->dev,
-+			    "setting plane %d shaper LUT failed.\n",
-+			    plane_state->plane->index);
-+
-+		return ret;
-+	}
-+
-+	blend_tf = dm_plane_state->blend_tf;
-+	blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
-+	blend_size = blend_lut != NULL ? blend_size : 0;
-+
-+	ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
-+					 amdgpu_tf_to_dc_tf(blend_tf),
-+					 blend_size, dc_plane_state->blend_tf);
-+	if (ret) {
-+		drm_dbg_kms(plane_state->plane->dev,
-+			    "setting plane %d gamma lut failed.\n",
-+			    plane_state->plane->index);
-+
-+		return ret;
- 	}
-
- 	return 0;
- }
-+
-+/**
-+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
-+ * @crtc: amdgpu_dm crtc state
-+ * @plane_state: DRM plane state
-+ * @dc_plane_state: target DC surface
-+ *
-+ * Update the underlying dc_stream_state's input transfer function (ITF) in
-+ * preparation for hardware commit. The transfer function used depends on
-+ * the preparation done on the stream for color management.
-+ *
-+ * Returns:
-+ * 0 on success. -ENOMEM if mem allocation fails.
-+ */
-+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
-+				      struct drm_plane_state *plane_state,
-+				      struct dc_plane_state *dc_plane_state)
-+{
-+	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-+	struct drm_color_ctm2 *ctm = NULL;
-+	struct dc_color_caps *color_caps = NULL;
-+	bool has_crtc_cm_degamma;
-+	int ret;
-+
-+	ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
-+	if (ret) {
-+		drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
-+		return ret;
-+	}
-+
-+	if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
-+		color_caps = &dc_plane_state->ctx->dc->caps.color;
-+
-+	/* Initially, we can just bypass the DGM block. */
-+	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-+	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-+
-+	/* After, we start to update values according to color props */
-+	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
-+
-+	ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
-+	if (ret == -ENOMEM)
-+		return ret;
-+
-+	/* We only have one degamma block available (pre-blending) for the
-+	 * whole color correction pipeline, so that we can't actually perform
-+	 * plane and CRTC degamma at the same time. Explicitly reject atomic
-+	 * updates when userspace sets both plane and CRTC degamma properties.
-+	 */
-+	if (has_crtc_cm_degamma && ret != -EINVAL){
-+		drm_dbg_kms(crtc->base.crtc->dev,
-+			    "doesn't support plane and CRTC degamma at the same time\n");
-+			return -EINVAL;
-+	}
-+
-+	/* If we are here, it means we don't have plane degamma settings, check
-+	 * if we have CRTC degamma waiting for mapping to pre-blending degamma
-+	 * block
-+	 */
-+	if (has_crtc_cm_degamma) {
-+		/* AMD HW doesn't have post-blending degamma caps. When DRM
-+		 * CRTC atomic degamma is set, we maps it to DPP degamma block
-+		 * (pre-blending) or, on legacy gamma, we use DPP degamma to
-+		 * linearize (implicit degamma) from sRGB/BT709 according to
-+		 * the input space.
-+		 */
-+		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
-+		if (ret)
-+			return ret;
-+	}
-+
-+	/* Setup CRTC CTM. */
-+	if (dm_plane_state->ctm) {
-+		ctm = (struct drm_color_ctm2 *)dm_plane_state->ctm->data;
-+
-+		/*
-+		 * So far, if we have both plane and CRTC CTM, plane CTM takes
-+		 * the priority and we discard data for CRTC CTM, as
-+		 * implemented in dcn10_program_gamut_remap().  However, we
-+		 * have MPC gamut_remap_matrix from DCN3 family, therefore we
-+		 * can remap MPC programing of the matrix to MPC block and
-+		 * provide support for both DPP and MPC matrix at the same
-+		 * time.
-+		 */
-+		__drm_ctm2_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
-+
-+		dc_plane_state->gamut_remap_matrix.enable_remap = true;
-+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
-+	} else {
-+		/* Bypass CTM. */
-+		dc_plane_state->gamut_remap_matrix.enable_remap = false;
-+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
-+	}
-+
-+	return amdgpu_dm_plane_set_color_properties(plane_state,
-+						    dc_plane_state, color_caps);
-+}
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-index 97b7a0b8a1c2..a05c210754d4 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
-@@ -260,6 +260,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc)
- 	state->freesync_config = cur->freesync_config;
- 	state->cm_has_degamma = cur->cm_has_degamma;
- 	state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
-+	state->regamma_tf = cur->regamma_tf;
- 	state->crc_skip_count = cur->crc_skip_count;
- 	state->mpo_requested = cur->mpo_requested;
- 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
-@@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
- }
- #endif
-
-+#ifdef AMD_PRIVATE_COLOR
-+/**
-+ * drm_crtc_additional_color_mgmt - enable additional color properties
-+ * @crtc: DRM CRTC
-+ *
-+ * This function lets the driver enable post-blending CRTC regamma transfer
-+ * function property in addition to DRM CRTC gamma LUT. Default value means
-+ * linear transfer function, which is the default CRTC gamma LUT behaviour
-+ * without this property.
-+ */
-+static void
-+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
-+{
-+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
-+
-+	if(adev->dm.dc->caps.color.mpc.ogam_ram)
-+		drm_object_attach_property(&crtc->base,
-+					   adev->mode_info.regamma_tf_property,
-+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-+}
-+
-+static int
-+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
-+				   struct drm_crtc_state *state,
-+				   struct drm_property *property,
-+				   uint64_t val)
-+{
-+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
-+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
-+
-+	if (property == adev->mode_info.regamma_tf_property) {
-+		if (acrtc_state->regamma_tf != val) {
-+			acrtc_state->regamma_tf = val;
-+			acrtc_state->base.color_mgmt_changed |= 1;
-+		}
-+	} else {
-+		drm_dbg_atomic(crtc->dev,
-+			       "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
-+			       crtc->base.id, crtc->name,
-+			       property->base.id, property->name);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
-+				   const struct drm_crtc_state *state,
-+				   struct drm_property *property,
-+				   uint64_t *val)
-+{
-+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
-+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
-+
-+	if (property == adev->mode_info.regamma_tf_property)
-+		*val = acrtc_state->regamma_tf;
-+	else
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+#endif
-+
- /* Implemented only the options currently available for the driver */
- static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- 	.reset = dm_crtc_reset_state,
-@@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- #if defined(CONFIG_DEBUG_FS)
- 	.late_register = amdgpu_dm_crtc_late_register,
- #endif
-+#ifdef AMD_PRIVATE_COLOR
-+	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
-+	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
-+#endif
- };
-
- static void dm_crtc_helper_disable(struct drm_crtc *crtc)
-@@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
-
- 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
-
-+#ifdef AMD_PRIVATE_COLOR
-+	dm_crtc_additional_color_mgmt(&acrtc->base);
-+#endif
- 	return 0;
-
- fail:
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-index cc74dd69acf2..17719e15cbe5 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-@@ -1333,8 +1333,14 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
- 	amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
- 	WARN_ON(amdgpu_state == NULL);
-
--	if (amdgpu_state)
--		__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
-+	if (!amdgpu_state)
-+		return;
-+
-+	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
-+	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-+	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
-+	amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-+	amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
- }
-
- static struct drm_plane_state *
-@@ -1354,6 +1360,22 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
- 		dc_plane_state_retain(dm_plane_state->dc_state);
- 	}
-
-+	if (dm_plane_state->degamma_lut)
-+		drm_property_blob_get(dm_plane_state->degamma_lut);
-+	if (dm_plane_state->ctm)
-+		drm_property_blob_get(dm_plane_state->ctm);
-+	if (dm_plane_state->shaper_lut)
-+		drm_property_blob_get(dm_plane_state->shaper_lut);
-+	if (dm_plane_state->lut3d)
-+		drm_property_blob_get(dm_plane_state->lut3d);
-+	if (dm_plane_state->blend_lut)
-+		drm_property_blob_get(dm_plane_state->blend_lut);
-+
-+	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
-+	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
-+	dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
-+	dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
-+
- 	return &dm_plane_state->base;
- }
-
-@@ -1421,12 +1443,203 @@ static void dm_drm_plane_destroy_state(struct drm_plane *plane,
- {
- 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
-
-+	if (dm_plane_state->degamma_lut)
-+		drm_property_blob_put(dm_plane_state->degamma_lut);
-+	if (dm_plane_state->ctm)
-+		drm_property_blob_put(dm_plane_state->ctm);
-+	if (dm_plane_state->lut3d)
-+		drm_property_blob_put(dm_plane_state->lut3d);
-+	if (dm_plane_state->shaper_lut)
-+		drm_property_blob_put(dm_plane_state->shaper_lut);
-+	if (dm_plane_state->blend_lut)
-+		drm_property_blob_put(dm_plane_state->blend_lut);
-+
- 	if (dm_plane_state->dc_state)
- 		dc_plane_state_release(dm_plane_state->dc_state);
-
- 	drm_atomic_helper_plane_destroy_state(plane, state);
- }
-
-+#ifdef AMD_PRIVATE_COLOR
-+static void
-+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
-+					     struct drm_plane *plane)
-+{
-+	struct amdgpu_mode_info mode_info = dm->adev->mode_info;
-+	struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
-+
-+	/* Check HW color pipeline capabilities for DPP (pre-blending) before expose*/
-+	if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_degamma_lut_property, 0);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_degamma_lut_size_property,
-+					   MAX_COLOR_LUT_ENTRIES);
-+		drm_object_attach_property(&plane->base,
-+					   dm->adev->mode_info.plane_degamma_tf_property,
-+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-+	}
-+	/* HDR MULT is always available */
-+	drm_object_attach_property(&plane->base,
-+				   dm->adev->mode_info.plane_hdr_mult_property,
-+				   AMDGPU_HDR_MULT_DEFAULT);
-+
-+	/* Only enable plane CTM if both DPP and MPC gamut remap is available. */
-+	if (dm->dc->caps.color.mpc.gamut_remap)
-+		drm_object_attach_property(&plane->base,
-+					   dm->adev->mode_info.plane_ctm_property, 0);
-+
-+	if (dpp_color_caps.hw_3d_lut) {
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_shaper_lut_property, 0);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_shaper_lut_size_property,
-+					   MAX_COLOR_LUT_ENTRIES);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_shaper_tf_property,
-+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_lut3d_property, 0);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_lut3d_size_property,
-+					   MAX_COLOR_3DLUT_ENTRIES);
-+	}
-+
-+	if (dpp_color_caps.ogam_ram) {
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_blend_lut_property, 0);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_blend_lut_size_property,
-+					   MAX_COLOR_LUT_ENTRIES);
-+		drm_object_attach_property(&plane->base,
-+					   mode_info.plane_blend_tf_property,
-+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-+	}
-+}
-+
-+static int
-+dm_atomic_plane_set_property(struct drm_plane *plane,
-+			     struct drm_plane_state *state,
-+			     struct drm_property *property,
-+			     uint64_t val)
-+{
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
-+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
-+	bool replaced = false;
-+	int ret;
-+
-+	if (property == adev->mode_info.plane_degamma_lut_property) {
-+		ret = drm_property_replace_blob_from_id(plane->dev,
-+							&dm_plane_state->degamma_lut,
-+							val,
-+							-1, sizeof(struct drm_color_lut),
-+							&replaced);
-+		dm_plane_state->base.color_mgmt_changed |= replaced;
-+		return ret;
-+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
-+		if (dm_plane_state->degamma_tf != val) {
-+			dm_plane_state->degamma_tf = val;
-+			dm_plane_state->base.color_mgmt_changed = 1;
-+		}
-+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
-+		if (dm_plane_state->hdr_mult != val) {
-+			dm_plane_state->hdr_mult = val;
-+			dm_plane_state->base.color_mgmt_changed = 1;
-+		}
-+	} else if (property == adev->mode_info.plane_ctm_property) {
-+		ret = drm_property_replace_blob_from_id(plane->dev,
-+							&dm_plane_state->ctm,
-+							val,
-+							sizeof(struct drm_color_ctm2), -1,
-+							&replaced);
-+		dm_plane_state->base.color_mgmt_changed |= replaced;
-+		return ret;
-+	} else if (property == adev->mode_info.plane_shaper_lut_property) {
-+		ret = drm_property_replace_blob_from_id(plane->dev,
-+							&dm_plane_state->shaper_lut,
-+							val, -1,
-+							sizeof(struct drm_color_lut),
-+							&replaced);
-+		dm_plane_state->base.color_mgmt_changed |= replaced;
-+		return ret;
-+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
-+		if (dm_plane_state->shaper_tf != val) {
-+			dm_plane_state->shaper_tf = val;
-+			dm_plane_state->base.color_mgmt_changed = 1;
-+		}
-+	} else if (property == adev->mode_info.plane_lut3d_property) {
-+		ret = drm_property_replace_blob_from_id(plane->dev,
-+							&dm_plane_state->lut3d,
-+							val, -1,
-+							sizeof(struct drm_color_lut),
-+							&replaced);
-+		dm_plane_state->base.color_mgmt_changed |= replaced;
-+		return ret;
-+	} else if (property == adev->mode_info.plane_blend_lut_property) {
-+		ret = drm_property_replace_blob_from_id(plane->dev,
-+							&dm_plane_state->blend_lut,
-+							val, -1,
-+							sizeof(struct drm_color_lut),
-+							&replaced);
-+		dm_plane_state->base.color_mgmt_changed |= replaced;
-+		return ret;
-+	} else if (property == adev->mode_info.plane_blend_tf_property) {
-+		if (dm_plane_state->blend_tf != val) {
-+			dm_plane_state->blend_tf = val;
-+			dm_plane_state->base.color_mgmt_changed = 1;
-+		}
-+	} else {
-+		drm_dbg_atomic(plane->dev,
-+			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
-+			       plane->base.id, plane->name,
-+			       property->base.id, property->name);
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+dm_atomic_plane_get_property(struct drm_plane *plane,
-+			     const struct drm_plane_state *state,
-+			     struct drm_property *property,
-+			     uint64_t *val)
-+{
-+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
-+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
-+
-+	if (property == adev->mode_info.plane_degamma_lut_property) {
-+		*val = (dm_plane_state->degamma_lut) ?
-+			dm_plane_state->degamma_lut->base.id : 0;
-+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
-+		*val = dm_plane_state->degamma_tf;
-+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
-+		*val = dm_plane_state->hdr_mult;
-+	} else if (property == adev->mode_info.plane_ctm_property) {
-+		*val = (dm_plane_state->ctm) ?
-+			dm_plane_state->ctm->base.id : 0;
-+	} else 	if (property == adev->mode_info.plane_shaper_lut_property) {
-+		*val = (dm_plane_state->shaper_lut) ?
-+			dm_plane_state->shaper_lut->base.id : 0;
-+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
-+		*val = dm_plane_state->shaper_tf;
-+	} else 	if (property == adev->mode_info.plane_lut3d_property) {
-+		*val = (dm_plane_state->lut3d) ?
-+			dm_plane_state->lut3d->base.id : 0;
-+	} else 	if (property == adev->mode_info.plane_blend_lut_property) {
-+		*val = (dm_plane_state->blend_lut) ?
-+			dm_plane_state->blend_lut->base.id : 0;
-+	} else if (property == adev->mode_info.plane_blend_tf_property) {
-+		*val = dm_plane_state->blend_tf;
-+
-+	} else {
-+		return -EINVAL;
-+	}
-+
-+	return 0;
-+}
-+#endif
-+
- static const struct drm_plane_funcs dm_plane_funcs = {
- 	.update_plane	= drm_atomic_helper_update_plane,
- 	.disable_plane	= drm_atomic_helper_disable_plane,
-@@ -1435,6 +1648,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
- 	.atomic_duplicate_state = dm_drm_plane_duplicate_state,
- 	.atomic_destroy_state = dm_drm_plane_destroy_state,
- 	.format_mod_supported = dm_plane_format_mod_supported,
-+#ifdef AMD_PRIVATE_COLOR
-+	.atomic_set_property = dm_atomic_plane_set_property,
-+	.atomic_get_property = dm_atomic_plane_get_property,
-+#endif
- };
-
- int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
-@@ -1514,6 +1731,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
-
- 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
-
-+#ifdef AMD_PRIVATE_COLOR
-+	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
-+#endif
- 	/* Create (reset) the plane state */
- 	if (plane->funcs->reset)
- 		plane->funcs->reset(plane);
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
-index 3538973bd0c6..04b2e04b68f3 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
-@@ -349,20 +349,37 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx,
- 		 * segment is from 2^-10 to 2^1
- 		 * There are less than 256 points, for optimization
- 		 */
--		seg_distr[0] = 3;
--		seg_distr[1] = 4;
--		seg_distr[2] = 4;
--		seg_distr[3] = 4;
--		seg_distr[4] = 4;
--		seg_distr[5] = 4;
--		seg_distr[6] = 4;
--		seg_distr[7] = 4;
--		seg_distr[8] = 4;
--		seg_distr[9] = 4;
--		seg_distr[10] = 1;
--
--		region_start = -10;
--		region_end = 1;
-+		if (output_tf->tf == TRANSFER_FUNCTION_LINEAR) {
-+			seg_distr[0] = 0; /* 2 */
-+			seg_distr[1] = 1; /* 4 */
-+			seg_distr[2] = 2; /* 4 */
-+			seg_distr[3] = 3; /* 8 */
-+			seg_distr[4] = 4; /* 16 */
-+			seg_distr[5] = 5; /* 32 */
-+			seg_distr[6] = 6; /* 64 */
-+			seg_distr[7] = 7; /* 128 */
-+
-+			region_start = -8;
-+			region_end = 1;
-+		} else {
-+			seg_distr[0] = 3; /* 8 */
-+			seg_distr[1] = 4; /* 16 */
-+			seg_distr[2] = 4;
-+			seg_distr[3] = 4;
-+			seg_distr[4] = 4;
-+			seg_distr[5] = 4;
-+			seg_distr[6] = 4;
-+			seg_distr[7] = 4;
-+			seg_distr[8] = 4;
-+			seg_distr[9] = 4;
-+			seg_distr[10] = 1; /* 2 */
-+			/* total = 8*16 + 8 + 64 + 2 = */
-+
-+			region_start = -10;
-+			region_end = 1;
-+		}
-+
-+
- 	}
-
- 	for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
-@@ -375,16 +392,56 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx,
-
- 	j = 0;
- 	for (k = 0; k < (region_end - region_start); k++) {
--		increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
-+		/*
-+		 * We're using an ugly-ish hack here. Our HW allows for
-+		 * 256 segments per region but SW_SEGMENTS is 16.
-+		 * SW_SEGMENTS has some undocumented relationship to
-+		 * the number of points in the tf_pts struct, which
-+		 * is 512, unlike what's suggested TRANSFER_FUNC_POINTS.
-+		 *
-+		 * In order to work past this dilemma we'll scale our
-+		 * increment by (1 << 4) and then do the inverse (1 >> 4)
-+		 * when accessing the elements in tf_pts.
-+		 *
-+		 * TODO: find a better way using SW_SEGMENTS and
-+		 *       TRANSFER_FUNC_POINTS definitions
-+		 */
-+		increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]);
- 		start_index = (region_start + k + MAX_LOW_POINT) *
- 				NUMBER_SW_SEGMENTS;
--		for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
-+		for (i = (start_index << 4); i < (start_index << 4) + (NUMBER_SW_SEGMENTS << 4);
- 				i += increment) {
-+			struct fixed31_32 in_plus_one, in;
-+			struct fixed31_32 value, red_value, green_value, blue_value;
-+			uint32_t t = i & 0xf;
-+
- 			if (j == hw_points - 1)
- 				break;
--			rgb_resulted[j].red = output_tf->tf_pts.red[i];
--			rgb_resulted[j].green = output_tf->tf_pts.green[i];
--			rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
-+
-+			in_plus_one = output_tf->tf_pts.red[(i >> 4) + 1];
-+			in = output_tf->tf_pts.red[i >> 4];
-+			value = dc_fixpt_sub(in_plus_one, in);
-+			value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
-+			value = dc_fixpt_add(in, value);
-+			red_value = value;
-+
-+			in_plus_one = output_tf->tf_pts.green[(i >> 4) + 1];
-+			in = output_tf->tf_pts.green[i >> 4];
-+			value = dc_fixpt_sub(in_plus_one, in);
-+			value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
-+			value = dc_fixpt_add(in, value);
-+			green_value = value;
-+
-+			in_plus_one = output_tf->tf_pts.blue[(i >> 4) + 1];
-+			in = output_tf->tf_pts.blue[i >> 4];
-+			value = dc_fixpt_sub(in_plus_one, in);
-+			value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
-+			value = dc_fixpt_add(in, value);
-+			blue_value = value;
-+
-+			rgb_resulted[j].red = red_value;
-+			rgb_resulted[j].green = green_value;
-+			rgb_resulted[j].blue = blue_value;
- 			j++;
- 		}
- 	}
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-index 255713ec29bb..fce9b33c0f88 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
- 	return result;
- }
-
-+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
-+{
-+	int i = 0;
-+	struct dpp_grph_csc_adjustment dpp_adjust;
-+	struct mpc_grph_gamut_adjustment mpc_adjust;
-+	int mpcc_id = pipe_ctx->plane_res.hubp->inst;
-+	struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
-+
-+	memset(&dpp_adjust, 0, sizeof(dpp_adjust));
-+	dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-+
-+	if (pipe_ctx->plane_state &&
-+	    pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
-+		dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-+		for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-+			dpp_adjust.temperature_matrix[i] =
-+				pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
-+	}
-+
-+	pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
-+							    &dpp_adjust);
-+
-+	memset(&mpc_adjust, 0, sizeof(mpc_adjust));
-+	mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-+
-+	if (pipe_ctx->top_pipe == NULL) {
-+		if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
-+			mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-+			for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-+				mpc_adjust.temperature_matrix[i] =
-+					pipe_ctx->stream->gamut_remap_matrix.matrix[i];
-+		}
-+	}
-+
-+	mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
-+}
-+
- bool dcn30_set_output_transfer_func(struct dc *dc,
- 				struct pipe_ctx *pipe_ctx,
- 				const struct dc_stream_state *stream)
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-index ce19c54097f8..e557e2b98618 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
- bool dcn30_set_input_transfer_func(struct dc *dc,
- 				struct pipe_ctx *pipe_ctx,
- 				const struct dc_plane_state *plane_state);
-+
-+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
-+
- bool dcn30_set_output_transfer_func(struct dc *dc,
- 				struct pipe_ctx *pipe_ctx,
- 				const struct dc_stream_state *stream);
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-index 61205cdbe2d5..fdbe3d42cd7b 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-@@ -33,7 +33,7 @@
- #include "dcn301_init.h"
-
- static const struct hw_sequencer_funcs dcn301_funcs = {
--	.program_gamut_remap = dcn10_program_gamut_remap,
-+	.program_gamut_remap = dcn30_program_gamut_remap,
- 	.init_hw = dcn10_init_hw,
- 	.power_down_on_boot = dcn10_power_down_on_boot,
- 	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
-diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
-index d4cf7ead1d87..84da1dd34efd 100644
---- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
-+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
-@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
- static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL };
- static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL };
-
-+static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
-+{
-+	struct fixed31_32 val;
-+
-+	/* If negative, convert to 2's complement. */
-+	if (x & (1ULL << 63))
-+		x = -(x & ~(1ULL << 63));
-+
-+	val.value = x;
-+	return val;
-+}
-+
- /*
-  * @brief
-  * Initialization routines
-diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
-index dc01c43f6193..d72c22dcf685 100644
---- a/drivers/gpu/drm/arm/malidp_crtc.c
-+++ b/drivers/gpu/drm/arm/malidp_crtc.c
-@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc,
-
- 	/*
- 	 * The size of the ctm is checked in
--	 * drm_atomic_replace_property_blob_from_id.
-+	 * drm_property_replace_blob_from_id.
- 	 */
- 	ctm = (struct drm_color_ctm *)state->ctm->data;
- 	for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
-diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
-index c277b198fa3f..c3df45f90145 100644
---- a/drivers/gpu/drm/drm_atomic.c
-+++ b/drivers/gpu/drm/drm_atomic.c
-@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
- 		   drm_get_color_encoding_name(state->color_encoding));
- 	drm_printf(p, "\tcolor-range=%s\n",
- 		   drm_get_color_range_name(state->color_range));
-+	drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
-
- 	if (plane->funcs->atomic_print_state)
- 		plane->funcs->atomic_print_state(p, state);
-diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
-index 784e63d70a42..25bb0859fda7 100644
---- a/drivers/gpu/drm/drm_atomic_state_helper.c
-+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
-@@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
- 	state->fence = NULL;
- 	state->commit = NULL;
- 	state->fb_damage_clips = NULL;
-+	state->color_mgmt_changed = false;
- }
- EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
-
-diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
-index dfec479830e4..f72ef6493340 100644
---- a/drivers/gpu/drm/drm_property.c
-+++ b/drivers/gpu/drm/drm_property.c
-@@ -751,6 +751,55 @@ bool drm_property_replace_blob(struct drm_property_blob **blob,
- }
- EXPORT_SYMBOL(drm_property_replace_blob);
-
-+/**
-+ * drm_property_replace_blob_from_id - replace a blob property taking a reference
-+ * @dev: DRM device
-+ * @blob: a pointer to the member blob to be replaced
-+ * @blob_id: the id of the new blob to replace with
-+ * @expected_size: expected size of the blob property
-+ * @expected_elem_size: expected size of an element in the blob property
-+ * @replaced: if the blob was in fact replaced
-+ *
-+ * Look up the new blob from id, take its reference, check expected sizes of
-+ * the blob and its element and replace the old blob by the new one. Advertise
-+ * if the replacement operation was successful.
-+ *
-+ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was
-+ * not found or sizes don't match.
-+ */
-+int drm_property_replace_blob_from_id(struct drm_device *dev,
-+					 struct drm_property_blob **blob,
-+					 uint64_t blob_id,
-+					 ssize_t expected_size,
-+					 ssize_t expected_elem_size,
-+					 bool *replaced)
-+{
-+	struct drm_property_blob *new_blob = NULL;
-+
-+	if (blob_id != 0) {
-+		new_blob = drm_property_lookup_blob(dev, blob_id);
-+		if (new_blob == NULL)
-+			return -EINVAL;
-+
-+		if (expected_size > 0 &&
-+		    new_blob->length != expected_size) {
-+			drm_property_blob_put(new_blob);
-+			return -EINVAL;
-+		}
-+		if (expected_elem_size > 0 &&
-+		    new_blob->length % expected_elem_size != 0) {
-+			drm_property_blob_put(new_blob);
-+			return -EINVAL;
-+		}
-+	}
-+
-+	*replaced |= drm_property_replace_blob(blob, new_blob);
-+	drm_property_blob_put(new_blob);
-+
-+	return 0;
-+}
-+EXPORT_SYMBOL(drm_property_replace_blob_from_id);
-+
- int drm_mode_getblob_ioctl(struct drm_device *dev,
- 			   void *data, struct drm_file *file_priv)
- {
-diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
-index 912f1e415685..08d7a7f0188f 100644
---- a/include/drm/drm_mode_object.h
-+++ b/include/drm/drm_mode_object.h
-@@ -60,7 +60,7 @@ struct drm_mode_object {
- 	void (*free_cb)(struct kref *kref);
- };
-
--#define DRM_OBJECT_MAX_PROPERTY 24
-+#define DRM_OBJECT_MAX_PROPERTY 64
- /**
-  * struct drm_object_properties - property tracking for &drm_mode_object
-  */
-diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
-index 79d62856defb..4f87803b3ea1 100644
---- a/include/drm/drm_plane.h
-+++ b/include/drm/drm_plane.h
-@@ -237,6 +237,13 @@ struct drm_plane_state {
-
- 	/** @state: backpointer to global drm_atomic_state */
- 	struct drm_atomic_state *state;
-+
-+	/**
-+	 * @color_mgmt_changed: Color management properties have changed. Used
-+	 * by the atomic helpers and drivers to steer the atomic commit control
-+	 * flow.
-+	 */
-+	bool color_mgmt_changed : 1;
- };
-
- static inline struct drm_rect
-diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h
-index 65bc9710a470..082f29156b3e 100644
---- a/include/drm/drm_property.h
-+++ b/include/drm/drm_property.h
-@@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev,
- 						   const void *data);
- struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev,
- 						   uint32_t id);
-+int drm_property_replace_blob_from_id(struct drm_device *dev,
-+				      struct drm_property_blob **blob,
-+				      uint64_t blob_id,
-+				      ssize_t expected_size,
-+				      ssize_t expected_elem_size,
-+				      bool *replaced);
- int drm_property_replace_global_blob(struct drm_device *dev,
- 				     struct drm_property_blob **replace,
- 				     size_t length,
-diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
-index ea1b639bcb28..cea5653e4020 100644
---- a/include/uapi/drm/drm_mode.h
-+++ b/include/uapi/drm/drm_mode.h
-@@ -846,6 +846,14 @@ struct drm_color_ctm {
- 	__u64 matrix[9];
- };
-
-+struct drm_color_ctm2 {
-+	/*
-+	 * Conversion matrix in S31.32 sign-magnitude
-+	 * (not two's complement!) format.
-+	 */
-+	__u64 matrix[12];
-+};
-+
- struct drm_color_lut {
- 	/*
- 	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
---
-2.43.0.rc2
-
diff --git a/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch b/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch
deleted file mode 100644
index 757f777..0000000
--- a/patches/nobara/0001-drm-i915-quirks-disable-async-flipping-on-specific-d.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan200101 <sentrycraft123@gmail.com>
-Date: Mon, 14 Nov 2022 20:13:53 +0100
-Subject: [PATCH] drm/i915/quirks: disable async flipping on specific devices
-
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/gpu/drm/i915/display/intel_quirks.c | 20 ++++++++++++++++++++
- 1 file changed, 20 insertions(+)
-
-diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
-index a280448df771..1596114dd9ae 100644
---- a/drivers/gpu/drm/i915/display/intel_quirks.c
-+++ b/drivers/gpu/drm/i915/display/intel_quirks.c
-@@ -14,6 +14,12 @@ static void intel_set_quirk(struct drm_i915_private *i915, enum intel_quirk_id q
- 	i915->display.quirks.mask |= BIT(quirk);
- }
- 
-+static void quirk_async_page_flips_force_disable(struct drm_i915_private *i915)
-+{
-+	i915->drm.mode_config.async_page_flip = false;
-+	drm_info(&i915->drm, "applying async flip disable quirk\n");
-+}
-+
- /*
-  * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason
-  */
-@@ -136,6 +142,20 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = {
- 		},
- 		.hook = quirk_no_pps_backlight_power_hook,
- 	},
-+	{
-+		.dmi_id_list = &(const struct dmi_system_id[]) {
-+			{
-+				.callback = NULL,
-+				.ident = "ASUS TUF DASH F15",
-+				.matches = {
-+					DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-+					DMI_MATCH(DMI_PRODUCT_NAME, "ASUS TUF Dash F15 FX516PC_FX516PC"),
-+				},
-+			},
-+			{ }
-+		},
-+		.hook = quirk_async_page_flips_force_disable,
-+	},
- };
- 
- static struct intel_quirk intel_quirks[] = {
diff --git a/patches/nobara/0001-hid-asus-nero-patches-rogue.patch b/patches/nobara/0001-hid-asus-nero-patches-rogue.patch
deleted file mode 100644
index 2ca98bd..0000000
--- a/patches/nobara/0001-hid-asus-nero-patches-rogue.patch
+++ /dev/null
@@ -1,972 +0,0 @@
-diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
-index fd61dba88..3220d96fc 100644
---- a/drivers/hid/hid-asus.c
-+++ b/drivers/hid/hid-asus.c
-@@ -26,7 +26,9 @@
- #include <linux/dmi.h>
- #include <linux/hid.h>
- #include <linux/module.h>
-+#include <linux/sysfs.h>
- #include <linux/platform_data/x86/asus-wmi.h>
-+#include <linux/platform_device.h>
- #include <linux/input/mt.h>
- #include <linux/usb.h> /* For to_usb_interface for T100 touchpad intf check */
- #include <linux/power_supply.h>
-@@ -94,6 +96,435 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
-
- #define TRKID_SGN       ((TRKID_MAX + 1) >> 1)
-
-+/*
-+ * USB buffers to be used in a control transfer to make the joystick change buttons mode and scancodes
-+ * 0 is default (game_mode with back buttons sending F17 and F18 instead of F15 for both as when unconfigured)
-+ * 1 is mouse mode: back buttons still are F17 and F18
-+ * 2 is macro mode
-+ */
-+static const u8 rc71l_mode_switch_commands[][23][64] = {
-+	{
-+		{
-+			0x5A, 0xD1, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x01, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0A, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x01, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x01, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x30, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x01, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0E, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		}
-+	},
-+	{
-+		{
-+			0x5A, 0xD1, 0x01, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x02, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x99, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x02, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x02, 0x00, 0x9B, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x02, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x02, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x02, 0x00, 0x97, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x96, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x30, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x88, 0x0D, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		}
-+	},
-+	{
-+		{
-+			0x5A, 0xD1, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x01, 0x2C, 0x01, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0A, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x8C, 0x88, 0x76, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x02, 0x2C, 0x01, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x23, 0x00, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x0D, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x03, 0x2C, 0x01, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x04, 0x2C, 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x05, 0x2C, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x05, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x31, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x06, 0x2C, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x82, 0x4D, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x07, 0x2C, 0x01, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x08, 0x2C, 0x02, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x02, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x8F, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x02, 0x09, 0x2C, 0x01, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0E, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x0F, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x06, 0x02, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x04, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		},
-+		{
-+			0x5A, 0xD1, 0x05, 0x04, 0x00, 0x64, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-+		}
-+	}
-+};
-+
- struct asus_kbd_leds {
- 	struct led_classdev cdev;
- 	struct hid_device *hdev;
-@@ -103,6 +534,25 @@ struct asus_kbd_leds {
- 	bool removed;
- };
-
-+enum rc71l_controller_mode {
-+	rc71l_gamepad_mode,
-+	rc71l_mouse_mode,
-+	rc71l_macro_mode,
-+};
-+
-+struct asus_rc71l {
-+	unsigned int usb_pipe;
-+
-+	struct platform_device *mcu_dev;
-+
-+	struct mutex mutex; /* Mutex that protects everything below it */
-+
-+	enum rc71l_controller_mode mode;
-+
-+	u8 usb_in_buf[32];
-+	u8 usb_out_buf[64]; /* A temporary buffer to hold data that gets sent over USB (must be accessed upon locking the appropriate mutex) */
-+};
-+
- struct asus_touchpad_info {
- 	int max_x;
- 	int max_y;
-@@ -127,6 +577,7 @@ struct asus_drvdata {
- 	int battery_stat;
- 	bool battery_in_query;
- 	unsigned long battery_next_query;
-+	struct asus_rc71l *rc71l_data;
- };
-
- static int asus_report_battery(struct asus_drvdata *, u8 *, int);
-@@ -189,6 +640,245 @@ static const struct asus_touchpad_info medion_e1239t_tp = {
- 	.report_size = 32 /* 2 byte header + 5 * 5 + 5 byte footer */,
- };
-
-+/**
-+ * This function reads data over the USB device on the ROG Ally.
-+ * Unlike outgoing traffic the inbound always performs 32-bytes transfers.
-+ *
-+ * PRE:
-+ *     - rc71l internal mutex MUST be locked
-+ */
-+static int rc71l_usb_read(struct hid_device * hdev) {
-+	struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev);
-+	if (drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-+	struct usb_device *dev = interface_to_usbdev(intf);
-+
-+	const int retval = usb_control_msg_recv(dev, 0x80, 0x01, 0xa1, 0x035A, 0x0002, (void*)&rc71l_drvdata->usb_in_buf[0], 32, 250, GFP_KERNEL);
-+
-+	if (retval < 0) {
-+		hid_err(hdev, "Ally read failed performing control read, error %d\n", retval);
-+		goto rc71l_usb_read_err;
-+	}
-+
-+	const char* b = (const u8*)&rc71l_drvdata->usb_in_buf[0];
-+	hid_info(hdev, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
-+		b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
-+		b[10], b[11], b[12], b[13], b[14], b[15], b[16], b[17], b[18], b[19],
-+		b[20], b[21], b[22], b[23], b[24], b[25], b[26], b[27], b[28], b[29],
-+		b[30], b[31]
-+	);
-+
-+rc71l_usb_read_err:
-+	return retval;
-+}
-+
-+/**
-+ * This function writes a command over the USB device on the ROG Ally.
-+ * The ROG Ally accepts 64-bytes long messages as commands: as such at most 64-bytes will be sent
-+ * and unused bytes will be zeroed out.
-+ *
-+ * PRE:
-+ *     - rc71l internal mutex MUST be locked
-+ */
-+static int rc71l_usb_write(struct hid_device * hdev, const void* buf, size_t buf_sz) {
-+	struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev);
-+
-+	if (drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-+	struct usb_device *dev = interface_to_usbdev(intf);
-+
-+	if (buf_sz > 64) {
-+		hid_err(hdev, "Bug in the kernel: cannot write more than 64-bytes\n");
-+
-+		return -EINVAL;
-+	}
-+
-+	// make sure bytes in excess will be zeroes and copy the user-provided buffer
-+	memset((void*)&rc71l_drvdata->usb_out_buf[0], 0, 64);
-+	memcpy((void*)&rc71l_drvdata->usb_out_buf[0], buf, buf_sz);
-+
-+	/* send the data out the bulk port */
-+	const int retval = usb_control_msg(dev, rc71l_drvdata->usb_pipe, 0x09, 0x21, 0x035A, 0x0002, (void*)&rc71l_drvdata->usb_out_buf[0], 64, 250);
-+	if (retval < 0) {
-+		hid_err(hdev,
-+			"Failed submitting control write error %d\n", retval);
-+
-+		goto rc71l_usb_write_err;
-+	}
-+
-+rc71l_usb_write_err:
-+	return retval < 0 ? retval : 0;
-+}
-+
-+static int rc71l_mode_change(struct hid_device * hdev, enum rc71l_controller_mode new_mode) {
-+	struct asus_drvdata *drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev);
-+	if (drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	int ret = 0;
-+
-+	size_t packets_group = 0;
-+	switch (new_mode) {
-+		case rc71l_gamepad_mode:
-+			packets_group = 0;
-+			break;
-+
-+		case rc71l_mouse_mode:
-+			packets_group = 1;
-+			break;
-+
-+		case rc71l_macro_mode:
-+			packets_group = 2;
-+			break;
-+
-+		default:
-+			return -EINVAL;
-+	}
-+
-+	for (int i = 0; (i < 23) && (ret == 0); ++i) {
-+		ret = rc71l_usb_write(hdev, (const void*)&rc71l_mode_switch_commands[packets_group][i][0], 64);
-+		if (ret > 0) {
-+			hid_err(hdev, "Ally controller mode switch %d/23 error %d\n", i, ret);
-+			goto rc71l_mode_change_err;
-+		}
-+	}
-+
-+	// controller mode has been switched successfully: change that in driver data
-+	if (ret == 0) {
-+		hid_info(hdev, "ROG Ally [RC71L] controller mode switch succeeded\n");
-+		rc71l_drvdata->mode = new_mode;
-+	}
-+
-+rc71l_mode_change_err:
-+	return ret;
-+}
-+
-+static ssize_t __maybe_unused mode_show(struct device *raw_dev, struct device_attribute *attr, char *buf) {
-+	struct platform_device *const pdev = to_platform_device(raw_dev);
-+	struct hid_device *const hdev = platform_get_drvdata(pdev);
-+	if (hdev == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_drvdata *const drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev);
-+ 	if (drvdata == NULL) {
-+ 		return -EINVAL;
-+ 	}
-+
-+	struct asus_rc71l *const rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	mutex_lock(&rc71l_drvdata->mutex);
-+	int current_mode = 0;
-+	switch (rc71l_drvdata->mode) {
-+		case rc71l_gamepad_mode:
-+			current_mode = 0;
-+			break;
-+
-+		case rc71l_mouse_mode:
-+			current_mode = 1;
-+			break;
-+
-+		case rc71l_macro_mode:
-+			current_mode = 2;
-+			break;
-+
-+		default:
-+			mutex_unlock(&rc71l_drvdata->mutex);
-+			return -EINVAL;
-+	}
-+	mutex_unlock(&rc71l_drvdata->mutex);
-+
-+	return sysfs_emit(buf, "%d\n", (int)current_mode);
-+}
-+
-+static ssize_t __maybe_unused mode_store(struct device *raw_dev, struct device_attribute *attr, const char *buf, size_t count) {
-+	struct platform_device *const pdev = to_platform_device(raw_dev);
-+	struct hid_device *const hdev = platform_get_drvdata(pdev);
-+	if (hdev == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_drvdata *const drvdata = (struct asus_drvdata*)hid_get_drvdata(hdev);
-+	if (drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	struct asus_rc71l *const rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata == NULL) {
-+		return -EINVAL;
-+	}
-+
-+	int res = -EINVAL;
-+	int val = -EINVAL;
-+	res = kstrtoint(buf, 0, &val);
-+	if (res)
-+		return res;
-+
-+	switch (val) {
-+		case 0:
-+			mutex_lock(&rc71l_drvdata->mutex);
-+			res = rc71l_mode_change(hdev, rc71l_gamepad_mode);
-+			mutex_unlock(&rc71l_drvdata->mutex);
-+			break;
-+
-+		case 1:
-+			mutex_lock(&rc71l_drvdata->mutex);
-+			res = rc71l_mode_change(hdev, rc71l_mouse_mode);
-+			mutex_unlock(&rc71l_drvdata->mutex);
-+			break;
-+
-+		case 2:
-+			mutex_lock(&rc71l_drvdata->mutex);
-+			res = rc71l_mode_change(hdev, rc71l_macro_mode);
-+			mutex_unlock(&rc71l_drvdata->mutex);
-+			break;
-+
-+		default:
-+			return -EINVAL;
-+	}
-+
-+	hid_err(hdev, "Ally controller mode switch to %d mode op result: %d\n", val, res);
-+
-+	return count;
-+}
-+
-+DEVICE_ATTR_RW(mode);
-+
-+static struct attribute *rc71l_input_attrs[] = {
-+	&dev_attr_mode.attr,
-+	NULL
-+};
-+
-+static const struct attribute_group mcu_attr_group = {
-+	.name = "input",
-+	.attrs = rc71l_input_attrs,
-+};
-+
- static void asus_report_contact_down(struct asus_drvdata *drvdat,
- 		int toolType, u8 *data)
- {
-@@ -386,7 +1076,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size
- 	unsigned char *dmabuf;
- 	int ret;
-
--	dmabuf = kmemdup(buf, buf_size, GFP_KERNEL);
-+	dmabuf = kmemdup((const void*)buf, buf_size, GFP_KERNEL);
- 	if (!dmabuf)
- 		return -ENOMEM;
-
-@@ -897,6 +1587,10 @@ static int asus_input_mapping(struct hid_device *hdev,
- 		case 0xb3: asus_map_key_clear(KEY_PROG3);	break; /* Fn+Left next aura */
- 		case 0x6a: asus_map_key_clear(KEY_F13);		break; /* Screenpad toggle */
- 		case 0x4b: asus_map_key_clear(KEY_F14);		break; /* Arrows/Pg-Up/Dn toggle */
-+		case 0xa5: asus_map_key_clear(KEY_F15);		break; /* ROG Ally left back */
-+		case 0xa6: asus_map_key_clear(KEY_F16);		break; /* ROG Ally QAM button */
-+		case 0xa7: asus_map_key_clear(KEY_F17);		break; /* ROG Ally ROG long-press */
-+		case 0xa8: asus_map_key_clear(KEY_F18);		break; /* ROG Ally ROG long-press-release */
-
-
- 		default:
-@@ -1000,16 +1694,108 @@ static int asus_start_multitouch(struct hid_device *hdev)
- 	return 0;
- }
-
-+#ifdef CONFIG_PM
- static int __maybe_unused asus_reset_resume(struct hid_device *hdev)
- {
-+	int ret = 0;
-+
- 	struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
-+	if (drvdata != NULL) {
-+		return -EINVAL;
-+	}
-
- 	if (drvdata->tp)
- 		return asus_start_multitouch(hdev);
-
--	return 0;
-+	return ret;
- }
-
-+static int __maybe_unused asus_resume(struct hid_device *hdev)
-+{
-+	int ret = 0;
-+	struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
-+/*
-+	// Controller mode is kept on device sleep
-+	if (dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L"))
-+	{
-+		// Apply the joystick mode switch
-+		ret = rog_ally_controller_mode_change(hdev, game_mode);
-+
-+		hid_err(hdev, "Asus wake, restore controller %d\n", ret);
-+	}
-+*/
-+
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata != NULL) {
-+		mutex_lock(&rc71l_drvdata->mutex);
-+		ret = rc71l_mode_change(hdev, rc71l_drvdata->mode);
-+		mutex_unlock(&rc71l_drvdata->mutex);
-+
-+ 		if (ret < 0) {
-+			hid_err(hdev, "ROG Ally [RC71L] failed to reset controller mode: %d\n", ret);
-+ 			goto asus_resume_err;
-+ 		}
-+ 	}
-+
-+
-+	/*
-+		 * On some devices such as the Asus RC71L leds are reset to default after sleep and sysfs attribute will report
-+		 * something that won't be true: resetting the user-provided value is necessary to maintain coherency and avoid
-+		 * flashing full brightness leds in face of the user.
-+	*/
-+	if (drvdata->kbd_backlight) {
-+		const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4, drvdata->kbd_backlight->cdev.brightness };
-+		ret = asus_kbd_set_report(hdev, buf, sizeof(buf));
-+			if (ret < 0) {
-+				hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret);
-+				goto asus_resume_err;
-+			}
-+
-+		hid_err(hdev, "Asus ROG Ally asus_reset_resume, leds reset: %d at brightness %d\n", ret, (int)drvdata->kbd_backlight->cdev.brightness);
-+	}
-+
-+	asus_resume_err:
-+ 	return ret;
-+}
-+
-+static int __maybe_unused asus_suspend(struct hid_device *hdev, struct pm_message)
-+ {
-+ 	struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
-+
-+	if (drvdata == NULL) {
-+		return 0;
-+ 	}
-+
-+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-+	struct usb_device *dev = interface_to_usbdev(intf);
-+
-+	int ret = 0;
-+
-+	if (dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L")) {
-+		// Send the USB ABORT_PIPE command
-+		int result = usb_control_msg(
-+			dev, usb_sndctrlpipe(dev, 0), USB_REQ_SET_FEATURE,
-+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT,
-+			USB_ENDPOINT_HALT, 0x02, NULL, 0, 1000);
-+
-+		if (result < 0) {
-+			printk("USB ABORT_PIPE failed: %d\n", result);
-+		} else {
-+			printk("USB ABORT_PIPE succeeded\n");
-+		}
-+	}
-+
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata != NULL) {
-+		mutex_lock(&rc71l_drvdata->mutex);
-+		// TODO: send ABORT_PIPE here
-+		mutex_unlock(&rc71l_drvdata->mutex);
-+	}
-+
-+	return ret;
-+}
-+#endif
-+
- static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
- {
- 	int ret;
-@@ -1021,6 +1807,8 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 		return -ENOMEM;
- 	}
-
-+	drvdata->rc71l_data = NULL;
-+
- 	hid_set_drvdata(hdev, drvdata);
-
- 	drvdata->quirks = id->driver_data;
-@@ -1109,6 +1897,51 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 		goto err_stop_hw;
- 	}
-
-+	if ((dmi_match(DMI_PRODUCT_NAME, "ROG Ally RC71L_RC71L")) && (hdev->rsize > 9) && (hdev->rdesc[7] == 0x85) && (hdev->rdesc[8] == 0x5a))
-+	{
-+		drvdata->rc71l_data = devm_kzalloc(&hdev->dev, sizeof(*drvdata->rc71l_data), GFP_KERNEL);
-+		if (drvdata->rc71l_data == NULL) {
-+			hid_err(hdev, "Can't alloc Asus ROG Ally [RC71L] descriptor\n");
-+			ret = -ENOMEM;
-+			goto err_stop_hw;
-+		}
-+
-+		mutex_init(&drvdata->rc71l_data->mutex);
-+
-+		struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-+		struct usb_device *dev = interface_to_usbdev(intf);
-+
-+		// default controller mode
-+		drvdata->rc71l_data->mode = rc71l_gamepad_mode;
-+
-+		// usb_device and endpoint
-+		drvdata->rc71l_data->usb_pipe = usb_sndctrlpipe(dev, 0);
-+
-+		// apply the default controller mode
-+		mutex_lock(&drvdata->rc71l_data->mutex);
-+		ret = rc71l_mode_change(hdev, drvdata->rc71l_data->mode);
-+		mutex_unlock(&drvdata->rc71l_data->mutex);
-+
-+		if (ret < 0) {
-+			hid_err(hdev, "Asus ROG Ally [RC71L] error setting the default controller mode: %d\n", ret);
-+			goto err_stop_hw;
-+		}
-+
-+		drvdata->rc71l_data->mcu_dev = platform_device_register_simple("asus-mcu", 0, NULL, 0);
-+		if (IS_ERR(drvdata->rc71l_data->mcu_dev)) {
-+			hid_err(hdev, "Error registering MCU platform device: %ld\n", PTR_ERR(drvdata->rc71l_data->mcu_dev));
-+			goto err_stop_hw;
-+		}
-+
-+		platform_set_drvdata(drvdata->rc71l_data->mcu_dev, hdev);
-+
-+		ret = devm_device_add_group(&drvdata->rc71l_data->mcu_dev->dev, &mcu_attr_group);
-+		if (ret != 0) {
-+			platform_device_unregister(drvdata->rc71l_data->mcu_dev);
-+			goto err_stop_hw;
-+		}
-+	}
-+
- 	if (drvdata->tp) {
- 		drvdata->input->name = "Asus TouchPad";
- 	} else {
-@@ -1140,6 +1973,16 @@ static void asus_remove(struct hid_device *hdev)
- 		cancel_work_sync(&drvdata->kbd_backlight->work);
- 	}
-
-+	struct asus_rc71l *rc71l_drvdata = drvdata->rc71l_data;
-+	if (rc71l_drvdata != NULL) {
-+		platform_device_unregister(rc71l_drvdata->mcu_dev);
-+
-+		mutex_lock(&rc71l_drvdata->mutex);
-+		platform_device_unregister(rc71l_drvdata->mcu_dev);
-+		// TODO: perform cleanup operations
-+		mutex_unlock(&rc71l_drvdata->mutex);
-+	}
-+
- 	hid_hw_stop(hdev);
- }
-
-@@ -1258,6 +2101,9 @@ static const struct hid_device_id asus_devices[] = {
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
- 	    USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3),
- 	  QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD },
-+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
-+	    USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY),
-+	  QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD },
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
- 	    USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD),
- 	  QUIRK_ROG_CLAYMORE_II_KEYBOARD },
-@@ -1294,6 +2140,8 @@ static struct hid_driver asus_driver = {
- 	.input_configured       = asus_input_configured,
- #ifdef CONFIG_PM
- 	.reset_resume           = asus_reset_resume,
-+	.resume			= asus_resume,
-+	.suspend		= asus_suspend,
- #endif
- 	.event			= asus_event,
- 	.raw_event		= asus_raw_event
-diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
-index d10ccfa17..213492ee8 100644
---- a/drivers/hid/hid-ids.h
-+++ b/drivers/hid/hid-ids.h
-@@ -208,6 +208,7 @@
- #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD	0x1866
- #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2	0x19b6
- #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3	0x1a30
-+#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY	0x1abe
- #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD	0x196b
- #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD	0x1869
-
---
-2.43.0
-
diff --git a/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch b/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch
deleted file mode 100644
index 24f6807..0000000
--- a/patches/nobara/0002-drm-i915-add-kernel-parameter-to-disable-async-page-.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan200101 <sentrycraft123@gmail.com>
-Date: Wed, 8 Mar 2023 20:51:16 +0100
-Subject: [PATCH] drm/i915: add kernel parameter to disable async page flipping
-
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/gpu/drm/i915/display/intel_display_driver.c | 2 +-
- drivers/gpu/drm/i915/i915_params.c           | 4 ++++
- drivers/gpu/drm/i915/i915_params.h           | 3 ++-
- 3 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
-index ade744cccfea..119be26b5641 100644
---- a/drivers/gpu/drm/i915/i915_params.c
-+++ b/drivers/gpu/drm/i915/i915_params.c
-@@ -222,6 +222,10 @@ i915_param_named_unsafe(lmem_size, uint, 0400,
- i915_param_named_unsafe(lmem_bar_size, uint, 0400,
- 			"Set the lmem bar size(in MiB).");
- 
-+i915_param_named_unsafe(disable_async_page_flip, bool, 0400,
-+			"Disable async page flipping"
-+			"(0=disabled [default], 1=enabled)");
-+
- static void _param_print_bool(struct drm_printer *p, const char *name,
- 			      bool val)
- {
-diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
-index 3f51f90145b6..37f25ec1b874 100644
---- a/drivers/gpu/drm/i915/i915_params.h
-+++ b/drivers/gpu/drm/i915/i915_params.h
-@@ -85,7 +85,8 @@ struct drm_printer;
- 	param(bool, verbose_state_checks, true, 0) \
- 	param(bool, nuclear_pageflip, false, 0400) \
- 	param(bool, enable_dp_mst, true, 0600) \
--	param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
-+	param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \
-+	param(bool, disable_async_page_flip, false, 0400)
- 
- #define MEMBER(T, member, ...) T member;
- struct i915_params {
-diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
-index b909814ae..918b8b589 100644
---- a/drivers/gpu/drm/i915/display/intel_display_driver.c
-+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
-@@ -121,7 +121,7 @@ static void intel_mode_config_init(struct drm_i915_private *i915)
- 	mode_config->funcs = &intel_mode_funcs;
- 	mode_config->helper_private = &intel_mode_config_funcs;
- 
--	mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915);
-+	mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915) && !i915->params.disable_async_page_flip;
- 
- 	/*
- 	 * Maximum framebuffer dimensions, chosen to match
diff --git a/patches/nobara/OpenRGB.patch b/patches/nobara/OpenRGB.patch
deleted file mode 100644
index 3ddf50e..0000000
--- a/patches/nobara/OpenRGB.patch
+++ /dev/null
@@ -1,703 +0,0 @@
-diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
-index 2ddca08f8a76..72647850f08e 100644
---- a/drivers/i2c/busses/Kconfig
-+++ b/drivers/i2c/busses/Kconfig
-@@ -217,6 +217,15 @@ config I2C_CHT_WC
- 	  combined with a FUSB302 Type-C port-controller as such it is advised
- 	  to also select CONFIG_TYPEC_FUSB302=m.
-
-+config I2C_NCT6775
-+	tristate "Nuvoton NCT6775 and compatible SMBus controller"
-+	help
-+		If you say yes to this option, support will be included for the
-+		Nuvoton NCT6775 and compatible SMBus controllers.
-+
-+		This driver can also be built as a module.  If so, the module
-+		will be called i2c-nct6775.
-+
- config I2C_NFORCE2
- 	tristate "Nvidia nForce2, nForce3 and nForce4"
- 	depends on PCI
-diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
-index 25d60889713c..3c2a9b237ac6 100644
---- a/drivers/i2c/busses/Makefile
-+++ b/drivers/i2c/busses/Makefile
-@@ -17,6 +17,7 @@ obj-$(CONFIG_I2C_CHT_WC)	+= i2c-cht-wc.o
- obj-$(CONFIG_I2C_I801)		+= i2c-i801.o
- obj-$(CONFIG_I2C_ISCH)		+= i2c-isch.o
- obj-$(CONFIG_I2C_ISMT)		+= i2c-ismt.o
-+obj-$(CONFIG_I2C_NCT6775)   += i2c-nct6775.o
- obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
- obj-$(CONFIG_I2C_NFORCE2_S4985)	+= i2c-nforce2-s4985.o
- obj-$(CONFIG_I2C_NVIDIA_GPU)	+= i2c-nvidia-gpu.o
-diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c
-new file mode 100644
-index 000000000000..0462f0952043
---- /dev/null
-+++ b/drivers/i2c/busses/i2c-nct6775.c
-@@ -0,0 +1,647 @@
-+/*
-+ * i2c-nct6775 - Driver for the SMBus master functionality of
-+ *	       Nuvoton NCT677x Super-I/O chips
-+ *
-+ * Copyright (C) 2019  Adam Honse <calcprogrammer1@gmail.com>
-+ *
-+ * Derived from nct6775 hwmon driver
-+ * Copyright (C) 2012  Guenter Roeck <linux@roeck-us.net>
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/slab.h>
-+#include <linux/jiffies.h>
-+#include <linux/platform_device.h>
-+#include <linux/hwmon.h>
-+#include <linux/hwmon-sysfs.h>
-+#include <linux/hwmon-vid.h>
-+#include <linux/err.h>
-+#include <linux/mutex.h>
-+#include <linux/delay.h>
-+#include <linux/ioport.h>
-+#include <linux/i2c.h>
-+#include <linux/acpi.h>
-+#include <linux/bitops.h>
-+#include <linux/dmi.h>
-+#include <linux/io.h>
-+#include <linux/nospec.h>
-+
-+#define DRVNAME "i2c-nct6775"
-+
-+/* Nuvoton SMBus address offsets */
-+#define SMBHSTDAT       (0 + nuvoton_nct6793d_smba)
-+#define SMBBLKSZ        (1 + nuvoton_nct6793d_smba)
-+#define SMBHSTCMD       (2 + nuvoton_nct6793d_smba)
-+#define SMBHSTIDX       (3 + nuvoton_nct6793d_smba)  //Index field is the Command field on other controllers
-+#define SMBHSTCTL       (4 + nuvoton_nct6793d_smba)
-+#define SMBHSTADD       (5 + nuvoton_nct6793d_smba)
-+#define SMBHSTERR       (9 + nuvoton_nct6793d_smba)
-+#define SMBHSTSTS       (0xE + nuvoton_nct6793d_smba)
-+
-+/* Command register */
-+#define NCT6793D_READ_BYTE      0
-+#define NCT6793D_READ_WORD      1
-+#define NCT6793D_READ_BLOCK     2
-+#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3
-+#define NCT6793D_PROC_CALL      4
-+#define NCT6793D_WRITE_BYTE     8
-+#define NCT6793D_WRITE_WORD     9
-+#define NCT6793D_WRITE_BLOCK    10
-+
-+/* Control register */
-+#define NCT6793D_MANUAL_START   128
-+#define NCT6793D_SOFT_RESET     64
-+
-+/* Error register */
-+#define NCT6793D_NO_ACK         32
-+
-+/* Status register */
-+#define NCT6793D_FIFO_EMPTY     1
-+#define NCT6793D_FIFO_FULL      2
-+#define NCT6793D_MANUAL_ACTIVE  4
-+
-+#define NCT6775_LD_SMBUS		0x0B
-+
-+/* Other settings */
-+#define MAX_RETRIES		400
-+
-+enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
-+	     nct6795, nct6796, nct6798 };
-+
-+struct nct6775_sio_data {
-+	int sioreg;
-+	enum kinds kind;
-+};
-+
-+/* used to set data->name = nct6775_device_names[data->sio_kind] */
-+static const char * const nct6775_device_names[] = {
-+	"nct6106",
-+	"nct6775",
-+	"nct6776",
-+	"nct6779",
-+	"nct6791",
-+	"nct6792",
-+	"nct6793",
-+	"nct6795",
-+	"nct6796",
-+	"nct6798",
-+};
-+
-+static const char * const nct6775_sio_names[] __initconst = {
-+	"NCT6106D",
-+	"NCT6775F",
-+	"NCT6776D/F",
-+	"NCT6779D",
-+	"NCT6791D",
-+	"NCT6792D",
-+	"NCT6793D",
-+	"NCT6795D",
-+	"NCT6796D",
-+	"NCT6798D",
-+};
-+
-+#define SIO_REG_LDSEL		0x07	/* Logical device select */
-+#define SIO_REG_DEVID		0x20	/* Device ID (2 bytes) */
-+#define SIO_REG_SMBA		0x62	/* SMBus base address register */
-+
-+#define SIO_NCT6106_ID		0xc450
-+#define SIO_NCT6775_ID		0xb470
-+#define SIO_NCT6776_ID		0xc330
-+#define SIO_NCT6779_ID		0xc560
-+#define SIO_NCT6791_ID		0xc800
-+#define SIO_NCT6792_ID		0xc910
-+#define SIO_NCT6793_ID		0xd120
-+#define SIO_NCT6795_ID		0xd350
-+#define SIO_NCT6796_ID		0xd420
-+#define SIO_NCT6798_ID		0xd428
-+#define SIO_ID_MASK			0xFFF0
-+
-+static inline void
-+superio_outb(int ioreg, int reg, int val)
-+{
-+	outb(reg, ioreg);
-+	outb(val, ioreg + 1);
-+}
-+
-+static inline int
-+superio_inb(int ioreg, int reg)
-+{
-+	outb(reg, ioreg);
-+	return inb(ioreg + 1);
-+}
-+
-+static inline void
-+superio_select(int ioreg, int ld)
-+{
-+	outb(SIO_REG_LDSEL, ioreg);
-+	outb(ld, ioreg + 1);
-+}
-+
-+static inline int
-+superio_enter(int ioreg)
-+{
-+	/*
-+	 * Try to reserve <ioreg> and <ioreg + 1> for exclusive access.
-+	 */
-+	if (!request_muxed_region(ioreg, 2, DRVNAME))
-+		return -EBUSY;
-+
-+	outb(0x87, ioreg);
-+	outb(0x87, ioreg);
-+
-+	return 0;
-+}
-+
-+static inline void
-+superio_exit(int ioreg)
-+{
-+	outb(0xaa, ioreg);
-+	outb(0x02, ioreg);
-+	outb(0x02, ioreg + 1);
-+	release_region(ioreg, 2);
-+}
-+
-+/*
-+ * ISA constants
-+ */
-+
-+#define IOREGION_ALIGNMENT	(~7)
-+#define IOREGION_LENGTH		2
-+#define ADDR_REG_OFFSET		0
-+#define DATA_REG_OFFSET		1
-+
-+#define NCT6775_REG_BANK	0x4E
-+#define NCT6775_REG_CONFIG	0x40
-+
-+static struct i2c_adapter *nct6775_adapter;
-+
-+struct i2c_nct6775_adapdata {
-+	unsigned short smba;
-+};
-+
-+/* Return negative errno on error. */
-+static s32 nct6775_access(struct i2c_adapter * adap, u16 addr,
-+		 unsigned short flags, char read_write,
-+		 u8 command, int size, union i2c_smbus_data * data)
-+{
-+	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
-+	unsigned short nuvoton_nct6793d_smba = adapdata->smba;
-+	int i, len, cnt;
-+	union i2c_smbus_data tmp_data;
-+	int timeout = 0;
-+
-+	tmp_data.word = 0;
-+	cnt = 0;
-+	len = 0;
-+
-+	outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL);
-+
-+	switch (size) {
-+		case I2C_SMBUS_QUICK:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			break;
-+		case I2C_SMBUS_BYTE_DATA:
-+			tmp_data.byte = data->byte;
-+		case I2C_SMBUS_BYTE:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				outb_p(tmp_data.byte, SMBHSTDAT);
-+				outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD);
-+			}
-+			else {
-+				outb_p(NCT6793D_READ_BYTE, SMBHSTCMD);
-+			}
-+			break;
-+		case I2C_SMBUS_WORD_DATA:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				outb_p(data->word & 0xff, SMBHSTDAT);
-+				outb_p((data->word & 0xff00) >> 8, SMBHSTDAT);
-+				outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD);
-+			}
-+			else {
-+				outb_p(NCT6793D_READ_WORD, SMBHSTCMD);
-+			}
-+			break;
-+		case I2C_SMBUS_BLOCK_DATA:
-+			outb_p((addr << 1) | read_write,
-+			       SMBHSTADD);
-+			outb_p(command, SMBHSTIDX);
-+			if (read_write == I2C_SMBUS_WRITE) {
-+				len = data->block[0];
-+				if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
-+					return -EINVAL;
-+				outb_p(len, SMBBLKSZ);
-+
-+				cnt = 1;
-+				if (len >= 4) {
-+					for (i = cnt; i <= 4; i++) {
-+						outb_p(data->block[i], SMBHSTDAT);
-+					}
-+
-+					len -= 4;
-+					cnt += 4;
-+				}
-+				else {
-+					for (i = cnt; i <= len; i++ ) {
-+						outb_p(data->block[i], SMBHSTDAT);
-+					}
-+
-+					len = 0;
-+				}
-+
-+				outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD);
-+			}
-+			else {
-+				return -ENOTSUPP;
-+			}
-+			break;
-+		default:
-+			dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
-+			return -EOPNOTSUPP;
-+	}
-+
-+	outb_p(NCT6793D_MANUAL_START, SMBHSTCTL);
-+
-+	while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) {
-+		if (read_write == I2C_SMBUS_WRITE) {
-+			timeout = 0;
-+			while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0)
-+			{
-+				if(timeout > MAX_RETRIES)
-+				{
-+					return -ETIMEDOUT;
-+				}
-+				usleep_range(250, 500);
-+				timeout++;
-+			}
-+
-+			//Load more bytes into FIFO
-+			if (len >= 4) {
-+				for (i = cnt; i <= (cnt + 4); i++) {
-+					outb_p(data->block[i], SMBHSTDAT);
-+				}
-+
-+				len -= 4;
-+				cnt += 4;
-+			}
-+			else {
-+				for (i = cnt; i <= (cnt + len); i++) {
-+					outb_p(data->block[i], SMBHSTDAT);
-+				}
-+
-+				len = 0;
-+			}
-+		}
-+		else {
-+			return -ENOTSUPP;
-+		}
-+		
-+	}
-+
-+	//wait for manual mode to complete
-+	timeout = 0;
-+	while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0)
-+	{
-+		if(timeout > MAX_RETRIES)
-+		{
-+			return -ETIMEDOUT;
-+		}
-+		usleep_range(250, 500);
-+		timeout++;
-+	}
-+
-+	if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) {    	
-+		return -ENXIO;
-+	}
-+	else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) {
-+		return 0;
-+	}
-+
-+	switch (size) {
-+		case I2C_SMBUS_QUICK:
-+		case I2C_SMBUS_BYTE_DATA:
-+			data->byte = inb_p(SMBHSTDAT);
-+			break;
-+		case I2C_SMBUS_WORD_DATA:
-+			data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8);
-+			break;
-+	}
-+	return 0;
-+}
-+
-+static u32 nct6775_func(struct i2c_adapter *adapter)
-+{
-+	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
-+	    I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
-+	    I2C_FUNC_SMBUS_BLOCK_DATA;
-+}
-+
-+static const struct i2c_algorithm smbus_algorithm = {
-+	.smbus_xfer	= nct6775_access,
-+	.functionality	= nct6775_func,
-+};
-+
-+static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap)
-+{
-+	struct i2c_adapter *adap;
-+	struct i2c_nct6775_adapdata *adapdata;
-+	int retval;
-+
-+	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
-+	if (adap == NULL) {
-+		return -ENOMEM;
-+	}
-+
-+	adap->owner = THIS_MODULE;
-+	adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-+	adap->algo = &smbus_algorithm;
-+
-+	adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
-+	if (adapdata == NULL) {
-+		kfree(adap);
-+		return -ENOMEM;
-+	}
-+
-+	adapdata->smba = smba;
-+
-+	snprintf(adap->name, sizeof(adap->name),
-+		"SMBus NCT67xx adapter%s at %04x", name, smba);
-+
-+	i2c_set_adapdata(adap, adapdata);
-+
-+	retval = i2c_add_adapter(adap);
-+	if (retval) {
-+		kfree(adapdata);
-+		kfree(adap);
-+		return retval;
-+	}
-+
-+	*padap = adap;
-+	return 0;
-+}
-+
-+static void nct6775_remove_adapter(struct i2c_adapter *adap)
-+{
-+	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
-+
-+	if (adapdata->smba) {
-+		i2c_del_adapter(adap);
-+		kfree(adapdata);
-+		kfree(adap);
-+	}
-+}
-+
-+//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
-+
-+/*
-+ * when Super-I/O functions move to a separate file, the Super-I/O
-+ * bus will manage the lifetime of the device and this module will only keep
-+ * track of the nct6775 driver. But since we use platform_device_alloc(), we
-+ * must keep track of the device
-+ */
-+static struct platform_device *pdev[2];
-+
-+static int nct6775_probe(struct platform_device *pdev)
-+{
-+	struct device *dev = &pdev->dev;
-+	struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
-+	struct resource *res;
-+
-+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-+	if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH,
-+				 DRVNAME))
-+		return -EBUSY;
-+
-+	switch (sio_data->kind) {
-+	case nct6791:
-+	case nct6792:
-+	case nct6793:
-+	case nct6795:
-+	case nct6796:
-+	case nct6798:
-+		nct6775_add_adapter(res->start, "", &nct6775_adapter);
-+		break;
-+	default:
-+		return -ENODEV;
-+	}
-+
-+	return 0;
-+}
-+/*
-+static void nct6791_enable_io_mapping(int sioaddr)
-+{
-+	int val;
-+
-+	val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE);
-+	if (val & 0x10) {
-+		pr_info("Enabling hardware monitor logical device mappings.\n");
-+		superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE,
-+			     val & ~0x10);
-+	}
-+}*/
-+
-+static struct platform_driver i2c_nct6775_driver = {
-+	.driver = {
-+		.name	= DRVNAME,
-+//		.pm	= &nct6775_dev_pm_ops,
-+	},
-+	.probe		= nct6775_probe,
-+};
-+
-+static void __exit i2c_nct6775_exit(void)
-+{
-+	int i;
-+
-+	if(nct6775_adapter)
-+		nct6775_remove_adapter(nct6775_adapter);
-+
-+	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-+		if (pdev[i])
-+			platform_device_unregister(pdev[i]);
-+	}
-+	platform_driver_unregister(&i2c_nct6775_driver);
-+}
-+
-+/* nct6775_find() looks for a '627 in the Super-I/O config space */
-+static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
-+{
-+	u16 val;
-+	int err;
-+	int addr;
-+
-+	err = superio_enter(sioaddr);
-+	if (err)
-+		return err;
-+
-+	val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) |
-+		superio_inb(sioaddr, SIO_REG_DEVID + 1);
-+
-+	switch (val & SIO_ID_MASK) {
-+	case SIO_NCT6106_ID:
-+		sio_data->kind = nct6106;
-+		break;
-+	case SIO_NCT6775_ID:
-+		sio_data->kind = nct6775;
-+		break;
-+	case SIO_NCT6776_ID:
-+		sio_data->kind = nct6776;
-+		break;
-+	case SIO_NCT6779_ID:
-+		sio_data->kind = nct6779;
-+		break;
-+	case SIO_NCT6791_ID:
-+		sio_data->kind = nct6791;
-+		break;
-+	case SIO_NCT6792_ID:
-+		sio_data->kind = nct6792;
-+		break;
-+	case SIO_NCT6793_ID:
-+		sio_data->kind = nct6793;
-+		break;
-+	case SIO_NCT6795_ID:
-+		sio_data->kind = nct6795;
-+		break;
-+	case SIO_NCT6796_ID:
-+		sio_data->kind = nct6796;
-+		break;
-+	case SIO_NCT6798_ID:
-+		sio_data->kind = nct6798;
-+		break;
-+	default:
-+		if (val != 0xffff)
-+			pr_debug("unsupported chip ID: 0x%04x\n", val);
-+		superio_exit(sioaddr);
-+		return -ENODEV;
-+	}
-+
-+	/* We have a known chip, find the SMBus I/O address */
-+	superio_select(sioaddr, NCT6775_LD_SMBUS);
-+	val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8)
-+	    | superio_inb(sioaddr, SIO_REG_SMBA + 1);
-+	addr = val & IOREGION_ALIGNMENT;
-+	if (addr == 0) {
-+		pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n");
-+		superio_exit(sioaddr);
-+		return -ENODEV;
-+	}
-+
-+	//if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-+	//    sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
-+	//    sio_data->kind == nct6796)
-+	//	nct6791_enable_io_mapping(sioaddr);
-+
-+	superio_exit(sioaddr);
-+	pr_info("Found %s or compatible chip at %#x:%#x\n",
-+		nct6775_sio_names[sio_data->kind], sioaddr, addr);
-+	sio_data->sioreg = sioaddr;
-+
-+	return addr;
-+}
-+
-+static int __init i2c_nct6775_init(void)
-+{
-+	int i, err;
-+	bool found = false;
-+	int address;
-+	struct resource res;
-+	struct nct6775_sio_data sio_data;
-+	int sioaddr[2] = { 0x2e, 0x4e };
-+
-+	err = platform_driver_register(&i2c_nct6775_driver);
-+	if (err)
-+		return err;
-+
-+	/*
-+	 * initialize sio_data->kind and sio_data->sioreg.
-+	 *
-+	 * when Super-I/O functions move to a separate file, the Super-I/O
-+	 * driver will probe 0x2e and 0x4e and auto-detect the presence of a
-+	 * nct6775 hardware monitor, and call probe()
-+	 */
-+	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-+		address = nct6775_find(sioaddr[i], &sio_data);
-+		if (address <= 0)
-+			continue;
-+
-+		found = true;
-+
-+		pdev[i] = platform_device_alloc(DRVNAME, address);
-+		if (!pdev[i]) {
-+			err = -ENOMEM;
-+			goto exit_device_unregister;
-+		}
-+
-+		err = platform_device_add_data(pdev[i], &sio_data,
-+					       sizeof(struct nct6775_sio_data));
-+		if (err)
-+			goto exit_device_put;
-+
-+		memset(&res, 0, sizeof(res));
-+		res.name = DRVNAME;
-+		res.start = address;
-+		res.end = address + IOREGION_LENGTH - 1;
-+		res.flags = IORESOURCE_IO;
-+
-+		err = acpi_check_resource_conflict(&res);
-+		if (err) {
-+			platform_device_put(pdev[i]);
-+			pdev[i] = NULL;
-+			continue;
-+		}
-+
-+		err = platform_device_add_resources(pdev[i], &res, 1);
-+		if (err)
-+			goto exit_device_put;
-+
-+		/* platform_device_add calls probe() */
-+		err = platform_device_add(pdev[i]);
-+		if (err)
-+			goto exit_device_put;
-+	}
-+	if (!found) {
-+		err = -ENODEV;
-+		goto exit_unregister;
-+	}
-+
-+	return 0;
-+
-+exit_device_put:
-+	platform_device_put(pdev[i]);
-+exit_device_unregister:
-+	while (--i >= 0) {
-+		if (pdev[i])
-+			platform_device_unregister(pdev[i]);
-+	}
-+exit_unregister:
-+	platform_driver_unregister(&i2c_nct6775_driver);
-+	return err;
-+}
-+
-+MODULE_AUTHOR("Adam Honse <calcprogrammer1@gmail.com>");
-+MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips");
-+MODULE_LICENSE("GPL");
-+
-+module_init(i2c_nct6775_init);
-+module_exit(i2c_nct6775_exit);
-diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
-index 30ded6422e7b..e25ce84c26af 100644
---- a/drivers/i2c/busses/i2c-piix4.c
-+++ b/drivers/i2c/busses/i2c-piix4.c
-@@ -467,11 +467,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter)
- 	if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */
- 		usleep_range(2000, 2100);
- 	else
--		usleep_range(250, 500);
-+		usleep_range(25, 50);
-
- 	while ((++timeout < MAX_TIMEOUT) &&
- 	       ((temp = inb_p(SMBHSTSTS)) & 0x01))
--		usleep_range(250, 500);
-+		usleep_range(25, 50);
-
- 	/* If the SMBus is still busy, we give up */
- 	if (timeout == MAX_TIMEOUT) {
diff --git a/patches/nobara/amdgpu-si-cik-default.patch b/patches/nobara/amdgpu-si-cik-default.patch
deleted file mode 100644
index d2d3178..0000000
--- a/patches/nobara/amdgpu-si-cik-default.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan200101 <sentrycraft123@gmail.com>
-Date: Mon, 27 Nov 2023 09:53:59 +0100
-Subject: [PATCH] drm/amdgpu: enable SI and CIK support by default
-
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ----------
- drivers/gpu/drm/radeon/radeon_drv.c     | 10 ++++++++++
- 2 files changed, 10 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-index 81edf66dbea8..5021d03089ff 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -582,13 +582,8 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
-  */
- #ifdef CONFIG_DRM_AMDGPU_SI
-
--#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
--int amdgpu_si_support = 0;
--MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
--#else
- int amdgpu_si_support = 1;
- MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
--#endif
-
- module_param_named(si_support, amdgpu_si_support, int, 0444);
- #endif
-@@ -601,13 +596,8 @@ module_param_named(si_support, amdgpu_si_support, int, 0444);
-  */
- #ifdef CONFIG_DRM_AMDGPU_CIK
-
--#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
--int amdgpu_cik_support = 0;
--MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
--#else
- int amdgpu_cik_support = 1;
- MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
--#endif
-
- module_param_named(cik_support, amdgpu_cik_support, int, 0444);
- #endif
-diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
-index 7bf08164140e..865f186f48c4 100644
---- a/drivers/gpu/drm/radeon/radeon_drv.c
-+++ b/drivers/gpu/drm/radeon/radeon_drv.c
-@@ -239,12 +239,22 @@ module_param_named(uvd, radeon_uvd, int, 0444);
- MODULE_PARM_DESC(vce, "vce enable/disable vce support (1 = enable, 0 = disable)");
- module_param_named(vce, radeon_vce, int, 0444);
-
-+#ifdef CONFIG_DRM_AMDGPU_SI
-+int radeon_si_support = 0;
-+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
-+#else
- int radeon_si_support = 1;
- MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
-+#endif
- module_param_named(si_support, radeon_si_support, int, 0444);
-
-+#ifdef CONFIG_DRM_AMDGPU_CIK
-+int radeon_cik_support = 0;
-+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
-+#else
- int radeon_cik_support = 1;
- MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
-+#endif
- module_param_named(cik_support, radeon_cik_support, int, 0444);
-
- static struct pci_device_id pciidlist[] = {
diff --git a/patches/nobara/lenovo-legion-laptop.patch b/patches/nobara/lenovo-legion-laptop.patch
deleted file mode 100644
index a2bb5c8..0000000
--- a/patches/nobara/lenovo-legion-laptop.patch
+++ /dev/null
@@ -1,6143 +0,0 @@
-From 26077d270f462eaf3da592ed047956df3436ed36 Mon Sep 17 00:00:00 2001
-From: John Martens <john.martens4@proton.me>
-Date: Fri, 29 Mar 2024 20:18:47 +0000
-Subject: [PATCH] Add legion-laptop v0.0.12
-
-Add extra support for Lenovo Legion laptops.
----
- drivers/platform/x86/Kconfig         |   10 +
- drivers/platform/x86/Makefile        |    1 +
- drivers/platform/x86/legion-laptop.c | 6089 ++++++++++++++++++++++++++
- 3 files changed, 6100 insertions(+)
- create mode 100644 drivers/platform/x86/legion-laptop.c
-
-diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
-index 49c2c4cd8..b7d70c20e 100644
---- a/drivers/platform/x86/Kconfig
-+++ b/drivers/platform/x86/Kconfig
-@@ -643,6 +643,16 @@ config THINKPAD_LMI
- 	  To compile this driver as a module, choose M here: the module will
- 	  be called think-lmi.
- 
-+config LEGION_LAPTOP
-+	tristate "Lenovo Legion Laptop Extras"
-+	depends on ACPI
-+	depends on ACPI_WMI || ACPI_WMI = n
-+	depends on HWMON || HWMON = n
-+	select ACPI_PLATFORM_PROFILE
-+	help
-+	  This is a driver for Lenovo Legion laptops and contains drivers for
-+	  hotkey, fan control, and power mode.
-+
- source "drivers/platform/x86/intel/Kconfig"
- 
- config MSI_EC
-diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
-index 52dfdf574..5f32dd9df 100644
---- a/drivers/platform/x86/Makefile
-+++ b/drivers/platform/x86/Makefile
-@@ -65,6 +65,7 @@ obj-$(CONFIG_LENOVO_YMC)	+= lenovo-ymc.o
- obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
- obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
- obj-$(CONFIG_THINKPAD_LMI)	+= think-lmi.o
-+obj-$(CONFIG_LEGION_LAPTOP)	+= legion-laptop.o
- obj-$(CONFIG_YOGABOOK)		+= lenovo-yogabook.o
- 
- # Intel
-diff --git a/drivers/platform/x86/legion-laptop.c b/drivers/platform/x86/legion-laptop.c
-new file mode 100644
-index 000000000..5ec0a518f
---- /dev/null
-+++ b/drivers/platform/x86/legion-laptop.c
-@@ -0,0 +1,6089 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ *  legion-laptop.c - Extra Lenovo Legion laptop support, in
-+ *   particular for fan curve control and power mode.
-+ *
-+ *  Copyright (C) 2022 johnfan <johnfan (at) example (dot) com>
-+ *
-+ *
-+ *  This driver might work on other Lenovo Legion models. If you
-+ *  want to try it you can pass force=1 as argument
-+ *  to the module which will force it to load even when the DMI
-+ *  data doesn't match the model AND FIRMWARE.
-+ *
-+ *  Support for other hardware of this model is already partially
-+ *  provided by the module ideapad-laptop.
-+ *
-+ *  The development page for this driver is located at
-+ *  https://github.com/johnfanv2/LenovoLegionLinux
-+ *
-+ *  This driver exports the files:
-+ *    - /sys/kernel/debug/legion/fancurve (ro)
-+ *        The fan curve stored in the firmware in the form of a
-+ *        human readable table.
-+ *
-+ *    - /sys/module/legion_laptop/drivers/platform\:legion/PNP0C09\:00/powermode (rw)
-+ *       0: balanced mode (white)
-+ *       1: performance mode (red)
-+ *       2: quiet mode (blue)
-+ *       ?: custom mode (pink)
-+ *
-+ *  NOTE: Writing to this will load the default fan curve from
-+ *        the firmware for this mode, so the fan curve might
-+ *        have to be reconfigured if needed.
-+ *
-+ *  It implements the usual hwmon interface to monitor fan speed and temmperature
-+ *  and allows to set the fan curve inside the firware.
-+ *
-+ *    - /sys/class/hwmon/X/fan1_input or /sys/class/hwmon/X/fan2_input  (ro)
-+ *        Current fan speed of fan1/fan2.
-+ *    - /sys/class/hwmon/X/temp1_input (ro)
-+ *    - /sys/class/hwmon/X/temp2_input (ro)
-+ *    - /sys/class/hwmon/X/temp3_input (ro)
-+ *        Temperature (Celsius) of CPU, GPU, and IC used for fan control.
-+ *    - /sys/class/hwmon/X/pwmY_auto_pointZ_pwm (rw)
-+ *          PWM (0-255) of the fan at the Y-level in the fan curve
-+ *    - /sys/class/hwmon/X/pwmY_auto_pointZ_temp (rw)
-+ *          upper temperature of tempZ (CPU, GPU, or IC) at the Y-level in the fan curve
-+ *    - /sys/class/hwmon/X/pwmY_auto_pointZ_temp_hyst (rw)
-+ *          hysteris (CPU, GPU, or IC) at the Y-level in the fan curve. The lower
-+ *          temperatue of the level is the upper temperature minus the hysteris
-+ *
-+ *
-+ *  Credits for reverse engineering the firmware to:
-+ *      - David Woodhouse: heavily inspired by lenovo_laptop.c
-+ *      - Luke Cama: Windows version "LegionFanControl"
-+ *      - SmokelessCPU: reverse engineering of custom registers in EC
-+ *                      and commincation method with EC via ports
-+ *      - 0x1F9F1: additional reverse engineering for complete fan curve
-+ */
-+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-+
-+#include <linux/acpi.h>
-+#include <asm/io.h>
-+#include <linux/debugfs.h>
-+#include <linux/delay.h>
-+#include <linux/dmi.h>
-+#include <linux/leds.h>
-+#include <linux/hwmon.h>
-+#include <linux/hwmon-sysfs.h>
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/moduleparam.h>
-+#include <linux/platform_device.h>
-+#include <linux/platform_profile.h>
-+#include <linux/types.h>
-+#include <linux/wmi.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("johnfan");
-+MODULE_DESCRIPTION("Lenovo Legion laptop extras");
-+
-+static bool force;
-+module_param(force, bool, 0440);
-+MODULE_PARM_DESC(
-+	force,
-+	"Force loading this module even if model or BIOS does not match.");
-+
-+static bool ec_readonly;
-+module_param(ec_readonly, bool, 0440);
-+MODULE_PARM_DESC(
-+	ec_readonly,
-+	"Only read from embedded controller but do not write or change settings.");
-+
-+static bool enable_platformprofile = true;
-+module_param(enable_platformprofile, bool, 0440);
-+MODULE_PARM_DESC(
-+	enable_platformprofile,
-+	"Enable the platform profile sysfs API to read and write the power mode.");
-+
-+#define LEGIONFEATURES \
-+	"fancurve powermode platformprofile platformprofilenotify minifancurve"
-+
-+//Size of fancurve stored in embedded controller
-+#define MAXFANCURVESIZE 10
-+
-+#define LEGION_DRVR_SHORTNAME "legion"
-+#define LEGION_HWMON_NAME LEGION_DRVR_SHORTNAME "_hwmon"
-+
-+struct legion_private;
-+
-+/* =============================== */
-+/* Embedded Controller Description */
-+/* =============================== */
-+
-+/* The configuration and registers to access the embedded controller
-+ * depending on different the version of the software on the
-+ * embedded controller or and the BIOS/UEFI firmware.
-+ *
-+ * To control fan curve in the embedded controller (EC) one has to
-+ * write to its "RAM". There are different possibilities:
-+ *  - EC RAM is memory mapped (write to it with ioremap)
-+ *  - access EC RAM via ported mapped IO (outb/inb)
-+ *  - access EC RAM via ACPI methods. It is only possible to write
-+ *    to part of it (first 0xFF bytes?)
-+ *
-+ * In later models the firmware directly exposes ACPI methods to
-+ * set the fan curve directly, without writing to EC RAM. This
-+ * is done inside the ACPI method.
-+ */
-+
-+/**
-+ * Offsets for interesting values inside the EC RAM  (0 = start of
-+ * EC RAM) These might change depending on the software inside of
-+ * the EC, which can be updated by a BIOS update from Lenovo.
-+ */
-+// TODO: same order as in initialization
-+struct ec_register_offsets {
-+	// Super I/O Configuration Registers
-+	// 7.15 General Control (GCTRL)
-+	// General Control (GCTRL)
-+	// (see EC Interface Registers  and 6.2 Plug and Play Configuration (PNPCFG)) in datasheet
-+	// note: these are in two places saved
-+	// in EC Interface Registers  and in super io configuration registers
-+	// Chip ID
-+	u16 ECHIPID1;
-+	u16 ECHIPID2;
-+	// Chip Version
-+	u16 ECHIPVER;
-+	u16 ECDEBUG;
-+
-+	// Lenovo Custom OEM extension
-+	// Firmware of ITE can be extended by
-+	// custom program using its own "variables"
-+	// These are the offsets to these "variables"
-+	u16 EXT_FAN_CUR_POINT;
-+	u16 EXT_FAN_POINTS_SIZE;
-+	u16 EXT_FAN1_BASE;
-+	u16 EXT_FAN2_BASE;
-+	u16 EXT_FAN_ACC_BASE;
-+	u16 EXT_FAN_DEC_BASE;
-+	u16 EXT_CPU_TEMP;
-+	u16 EXT_CPU_TEMP_HYST;
-+	u16 EXT_GPU_TEMP;
-+	u16 EXT_GPU_TEMP_HYST;
-+	u16 EXT_VRM_TEMP;
-+	u16 EXT_VRM_TEMP_HYST;
-+	u16 EXT_FAN1_RPM_LSB;
-+	u16 EXT_FAN1_RPM_MSB;
-+	u16 EXT_FAN2_RPM_LSB;
-+	u16 EXT_FAN2_RPM_MSB;
-+	u16 EXT_FAN1_TARGET_RPM;
-+	u16 EXT_FAN2_TARGET_RPM;
-+	u16 EXT_POWERMODE;
-+	u16 EXT_MINIFANCURVE_ON_COOL;
-+	// values
-+	// 0x04: enable mini fan curve if left for too long on cool level
-+	//      - this might be due to potential temp failure
-+	//      - or just because of really cool temps
-+	// 0xA0: disable it
-+	u16 EXT_LOCKFANCONTROLLER;
-+	u16 EXT_MAXIMUMFANSPEED;
-+	u16 EXT_WHITE_KEYBOARD_BACKLIGHT;
-+	u16 EXT_IC_TEMP_INPUT;
-+	u16 EXT_CPU_TEMP_INPUT;
-+	u16 EXT_GPU_TEMP_INPUT;
-+};
-+
-+enum access_method {
-+	ACCESS_METHOD_NO_ACCESS = 0,
-+	ACCESS_METHOD_EC = 1,
-+	ACCESS_METHOD_ACPI = 2,
-+	ACCESS_METHOD_WMI = 3,
-+	ACCESS_METHOD_WMI2 = 4,
-+	ACCESS_METHOD_WMI3 = 5,
-+	ACCESS_METHOD_EC2 = 10, // ideapad fancurve method
-+	ACCESS_METHOD_EC3 = 11, // loq
-+};
-+
-+struct model_config {
-+	const struct ec_register_offsets *registers;
-+	bool check_embedded_controller_id;
-+	u16 embedded_controller_id;
-+
-+	// first addr in EC we access/scan
-+	phys_addr_t memoryio_physical_ec_start;
-+	size_t memoryio_size;
-+
-+	// TODO: maybe use bitfield
-+	bool has_minifancurve;
-+	bool has_custom_powermode;
-+	enum access_method access_method_powermode;
-+
-+	enum access_method access_method_keyboard;
-+	enum access_method access_method_temperature;
-+	enum access_method access_method_fanspeed;
-+	enum access_method access_method_fancurve;
-+	enum access_method access_method_fanfullspeed;
-+	bool three_state_keyboard;
-+
-+	bool acpi_check_dev;
-+
-+	phys_addr_t ramio_physical_start;
-+	size_t ramio_size;
-+};
-+
-+/* =================================== */
-+/* Configuration for different models */
-+/* =================================== */
-+
-+// Idea by SmokelesssCPU (modified)
-+// - all default names and register addresses are supported by datasheet
-+// - register addresses for custom firmware by SmokelesssCPU
-+static const struct ec_register_offsets ec_register_offsets_v0 = {
-+	.ECHIPID1 = 0x2000,
-+	.ECHIPID2 = 0x2001,
-+	.ECHIPVER = 0x2002,
-+	.ECDEBUG = 0x2003,
-+	.EXT_FAN_CUR_POINT = 0xC534,
-+	.EXT_FAN_POINTS_SIZE = 0xC535,
-+	.EXT_FAN1_BASE = 0xC540,
-+	.EXT_FAN2_BASE = 0xC550,
-+	.EXT_FAN_ACC_BASE = 0xC560,
-+	.EXT_FAN_DEC_BASE = 0xC570,
-+	.EXT_CPU_TEMP = 0xC580,
-+	.EXT_CPU_TEMP_HYST = 0xC590,
-+	.EXT_GPU_TEMP = 0xC5A0,
-+	.EXT_GPU_TEMP_HYST = 0xC5B0,
-+	.EXT_VRM_TEMP = 0xC5C0,
-+	.EXT_VRM_TEMP_HYST = 0xC5D0,
-+	.EXT_FAN1_RPM_LSB = 0xC5E0,
-+	.EXT_FAN1_RPM_MSB = 0xC5E1,
-+	.EXT_FAN2_RPM_LSB = 0xC5E2,
-+	.EXT_FAN2_RPM_MSB = 0xC5E3,
-+	.EXT_MINIFANCURVE_ON_COOL = 0xC536,
-+	.EXT_LOCKFANCONTROLLER = 0xc4AB,
-+	.EXT_CPU_TEMP_INPUT = 0xc538,
-+	.EXT_GPU_TEMP_INPUT = 0xc539,
-+	.EXT_IC_TEMP_INPUT = 0xC5E8,
-+	.EXT_POWERMODE = 0xc420,
-+	.EXT_FAN1_TARGET_RPM = 0xc600,
-+	.EXT_FAN2_TARGET_RPM = 0xc601,
-+	.EXT_MAXIMUMFANSPEED = 0xBD,
-+	.EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400)
-+};
-+
-+static const struct ec_register_offsets ec_register_offsets_v1 = {
-+	.ECHIPID1 = 0x2000,
-+	.ECHIPID2 = 0x2001,
-+	.ECHIPVER = 0x2002,
-+	.ECDEBUG = 0x2003,
-+	.EXT_FAN_CUR_POINT = 0xC534,
-+	.EXT_FAN_POINTS_SIZE = 0xC535,
-+	.EXT_FAN1_BASE = 0xC540,
-+	.EXT_FAN2_BASE = 0xC550,
-+	.EXT_FAN_ACC_BASE = 0xC560,
-+	.EXT_FAN_DEC_BASE = 0xC570,
-+	.EXT_CPU_TEMP = 0xC580,
-+	.EXT_CPU_TEMP_HYST = 0xC590,
-+	.EXT_GPU_TEMP = 0xC5A0,
-+	.EXT_GPU_TEMP_HYST = 0xC5B0,
-+	.EXT_VRM_TEMP = 0xC5C0,
-+	.EXT_VRM_TEMP_HYST = 0xC5D0,
-+	.EXT_FAN1_RPM_LSB = 0xC5E0,
-+	.EXT_FAN1_RPM_MSB = 0xC5E1,
-+	.EXT_FAN2_RPM_LSB = 0xC5E2,
-+	.EXT_FAN2_RPM_MSB = 0xC5E3,
-+	.EXT_MINIFANCURVE_ON_COOL = 0xC536,
-+	.EXT_LOCKFANCONTROLLER = 0xc4AB,
-+	.EXT_CPU_TEMP_INPUT = 0xc538,
-+	.EXT_GPU_TEMP_INPUT = 0xc539,
-+	.EXT_IC_TEMP_INPUT = 0xC5E8,
-+	.EXT_POWERMODE = 0xc41D,
-+	.EXT_FAN1_TARGET_RPM = 0xc600,
-+	.EXT_FAN2_TARGET_RPM = 0xc601,
-+	.EXT_MAXIMUMFANSPEED = 0xBD,
-+	.EXT_WHITE_KEYBOARD_BACKLIGHT = (0x3B + 0xC400)
-+};
-+
-+static const struct ec_register_offsets ec_register_offsets_ideapad_v0 = {
-+	.ECHIPID1 = 0x2000,
-+	.ECHIPID2 = 0x2001,
-+	.ECHIPVER = 0x2002,
-+	.ECDEBUG = 0x2003,
-+	.EXT_FAN_CUR_POINT = 0xC5a0, // not found yet
-+	.EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0
-+	.EXT_FAN1_BASE = 0xC5a0,
-+	.EXT_FAN2_BASE = 0xC5a8,
-+	.EXT_FAN_ACC_BASE = 0xC5a0, // not found yet
-+	.EXT_FAN_DEC_BASE = 0xC5a0, // not found yet
-+	.EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes
-+	.EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes
-+	.EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes
-+	.EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes
-+	.EXT_VRM_TEMP = 0xC5a0, // does not exists or not found
-+	.EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet
-+	.EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found
-+	.EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found
-+	.EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_POWERMODE = 0xC5a0, // not found yet
-+	.EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet
-+	.EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet
-+};
-+
-+static const struct ec_register_offsets ec_register_offsets_ideapad_v1 = {
-+	.ECHIPID1 = 0x2000,
-+	.ECHIPID2 = 0x2001,
-+	.ECHIPVER = 0x2002,
-+	.ECDEBUG = 0x2003,
-+	.EXT_FAN_CUR_POINT = 0xC5a0, // not found yet
-+	.EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0
-+	.EXT_FAN1_BASE = 0xC5a0,
-+	.EXT_FAN2_BASE = 0xC5a8,
-+	.EXT_FAN_ACC_BASE = 0xC5a0, // not found yet
-+	.EXT_FAN_DEC_BASE = 0xC5a0, // not found yet
-+	.EXT_CPU_TEMP = 0xC550, // and repeated after 8 bytes
-+	.EXT_CPU_TEMP_HYST = 0xC590, // and repeated after 8 bytes
-+	.EXT_GPU_TEMP = 0xC5C0, // and repeated after 8 bytes
-+	.EXT_GPU_TEMP_HYST = 0xC5D0, // and repeated after 8 bytes
-+	.EXT_VRM_TEMP = 0xC5a0, // does not exists or not found
-+	.EXT_VRM_TEMP_HYST = 0xC5a0, // does not exists ot not found yet
-+	.EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // does not exists or not found
-+	.EXT_LOCKFANCONTROLLER = 0xC5a0, // does not exists or not found
-+	.EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_POWERMODE = 0xC5a0, // not found yet
-+	.EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet
-+	.EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet
-+};
-+
-+static const struct ec_register_offsets ec_register_offsets_loq_v0 = {
-+	.ECHIPID1 = 0x2000,
-+	.ECHIPID2 = 0x2001,
-+	.ECHIPVER = 0x2002,
-+	.ECDEBUG = 0x2003,
-+	.EXT_FAN_CUR_POINT = 0xC5a0,
-+	.EXT_FAN_POINTS_SIZE = 0xC5a0, // constant 0
-+	.EXT_FAN1_BASE = 0xC530,
-+	.EXT_FAN2_BASE = 0xC530, // same rpm as cpu
-+	.EXT_FAN_ACC_BASE = 0xC5a0, // not found yet
-+	.EXT_FAN_DEC_BASE = 0xC5a0, // not found yet
-+	.EXT_CPU_TEMP = 0xC52F,
-+	.EXT_CPU_TEMP_HYST = 0xC5a0, // not found yet
-+	.EXT_GPU_TEMP = 0xC531,
-+	.EXT_GPU_TEMP_HYST = 0xC5a0, // not found yet
-+	.EXT_VRM_TEMP = 0xC5a0, // not found yet
-+	.EXT_VRM_TEMP_HYST = 0xC5a0, // not found yet
-+	.EXT_FAN1_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN1_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_LSB = 0xC5a0, // not found yet
-+	.EXT_FAN2_RPM_MSB = 0xC5a0, // not found yet
-+	.EXT_MINIFANCURVE_ON_COOL = 0xC5a0, // not found yet
-+	.EXT_LOCKFANCONTROLLER = 0xC5a0, // not found yet
-+	.EXT_CPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_GPU_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_IC_TEMP_INPUT = 0xC5a0, // not found yet
-+	.EXT_POWERMODE = 0xc41D,
-+	.EXT_FAN1_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_FAN2_TARGET_RPM = 0xC5a0, // not found yet
-+	.EXT_MAXIMUMFANSPEED = 0xC5a0, // not found yet
-+	.EXT_WHITE_KEYBOARD_BACKLIGHT = 0xC5a0 // not found yet
-+};
-+
-+static const struct model_config model_v0 = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_j2cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_9vcn = {
-+	.registers = &ec_register_offsets_ideapad_v1,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8226,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_EC2,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_v2022 = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_4gcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8226,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_bvcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8226,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFC7E0800,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_bhcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8226,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = false,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_ACPI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_ACPI,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFF00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_kwcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x5507,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_m0cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x5507,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_m1cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x5507,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_m2cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_m6cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_k1cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x5263,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_lpcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x5507,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_WMI3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_kfcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_hacn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_k9cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400, // or replace 0xC400 by 0x0400  ?
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_eucn = {
-+	.registers = &ec_register_offsets_v1,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_fccn = {
-+	.registers = &ec_register_offsets_ideapad_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_ACPI,
-+	.access_method_fancurve = ACCESS_METHOD_EC2,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_h3cn = {
-+	//0xFE0B0800
-+	.registers = &ec_register_offsets_v1,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = false,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	// not implemented (properly) in WMI, RGB conrolled by USB
-+	.access_method_keyboard = ACCESS_METHOD_NO_ACCESS,
-+	// accessing fan speed is not implemented in ACPI
-+	// a variable in the operation region (or not found)
-+	// and not per WMI (methods returns constant 0)
-+	.access_method_fanspeed = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0800,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_e9cn = {
-+	//0xFE0B0800
-+	.registers = &ec_register_offsets_v1,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400, //0xFC7E0800
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = false,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	// not implemented (properly) in WMI, RGB conrolled by USB
-+	.access_method_keyboard = ACCESS_METHOD_NO_ACCESS,
-+	// accessing fan speed is not implemented in ACPI
-+	// a variable in the operation region (or not found)
-+	// and not per WMI (methods returns constant 0)
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFC7E0800,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_8jcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8226,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE00D400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct model_config model_jncn = {
-+	.registers = &ec_register_offsets_v1,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = false,
-+	.has_custom_powermode = false,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI,
-+	.access_method_temperature = ACCESS_METHOD_WMI,
-+	.access_method_fancurve = ACCESS_METHOD_NO_ACCESS,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFC7E0800,
-+	.ramio_size = 0x600
-+};
-+
-+// Yoga Model!
-+static const struct model_config model_j1cn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+// Yoga Model!
-+static const struct model_config model_dmcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = true,
-+	.ramio_physical_start = 0xFE700D00,
-+	.ramio_size = 0x600
-+};
-+
-+// Yoga Model!
-+static const struct model_config model_khcn = {
-+	.registers = &ec_register_offsets_v0,
-+	.check_embedded_controller_id = false,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_EC,
-+	.access_method_keyboard = ACCESS_METHOD_WMI,
-+	.access_method_fanspeed = ACCESS_METHOD_EC,
-+	.access_method_temperature = ACCESS_METHOD_EC,
-+	.access_method_fancurve = ACCESS_METHOD_EC,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+// LOQ Model
-+static const struct model_config model_lzcn = {
-+	.registers = &ec_register_offsets_loq_v0,
-+	.check_embedded_controller_id = true,
-+	.embedded_controller_id = 0x8227,
-+	.memoryio_physical_ec_start = 0xC400,
-+	.memoryio_size = 0x300,
-+	.has_minifancurve = true,
-+	.has_custom_powermode = true,
-+	.access_method_powermode = ACCESS_METHOD_WMI,
-+	.access_method_keyboard = ACCESS_METHOD_WMI2,
-+	.access_method_fanspeed = ACCESS_METHOD_WMI3,
-+	.access_method_temperature = ACCESS_METHOD_WMI3,
-+	.access_method_fancurve = ACCESS_METHOD_EC3,
-+	.access_method_fanfullspeed = ACCESS_METHOD_WMI3,
-+	.acpi_check_dev = false,
-+	.ramio_physical_start = 0xFE0B0400,
-+	.ramio_size = 0x600
-+};
-+
-+static const struct dmi_system_id denylist[] = { {} };
-+
-+static const struct dmi_system_id optimistic_allowlist[] = {
-+	{
-+		// Release year: 2021
-+		// Generation: 6
-+		// Name: Legion 5, Legion 5 pro, Legion 7
-+		// Family: Legion 5 15ACH6H, ...
-+		.ident = "GKCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "GKCN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2020
-+		.ident = "EUCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "EUCN"),
-+		},
-+		.driver_data = (void *)&model_eucn
-+	},
-+	{
-+		// Release year: 2020
-+		.ident = "EFCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "EFCN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2020
-+		.ident = "FSCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "FSCN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2021
-+		.ident = "HHCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "HHCN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2022
-+		.ident = "H1CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "H1CN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2022
-+		.ident = "J2CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "J2CN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2022
-+		.ident = "JUCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "JUCN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2022
-+		.ident = "KFCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "KFCN"),
-+		},
-+		.driver_data = (void *)&model_kfcn
-+	},
-+	{
-+		// Release year: 2021
-+		.ident = "HACN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "HACN"),
-+		},
-+		.driver_data = (void *)&model_hacn
-+	},
-+	{
-+		// Release year: 2021
-+		.ident = "G9CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "G9CN"),
-+		},
-+		.driver_data = (void *)&model_v0
-+	},
-+	{
-+		// Release year: 2022
-+		.ident = "K9CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "K9CN"),
-+		},
-+		.driver_data = (void *)&model_k9cn
-+	},
-+	{
-+		// e.g. IdeaPad Gaming 3 15ARH05
-+		.ident = "FCCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "FCCN"),
-+		},
-+		.driver_data = (void *)&model_fccn
-+	},
-+	{
-+		// e.g. IdeaPad Gaming 3 15ARH05 (8K21)
-+		.ident = "H4CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "H4CN"),
-+		},
-+		.driver_data = (void *)&model_fccn
-+	},
-+	{
-+		// e.g. Ideapad Gaming 3 15ACH6
-+		.ident = "H3CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "H3CN"),
-+		},
-+		.driver_data = (void *)&model_h3cn
-+	},
-+	{
-+		// e.g. IdeaPad Gaming 3 15ARH7 (2022)
-+		.ident = "JNCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "JNCN"),
-+		},
-+		.driver_data = (void *)&model_jncn
-+	},
-+	{
-+		// 2020, seems very different in ACPI dissassembly
-+		.ident = "E9CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "E9CN"),
-+		},
-+		.driver_data = (void *)&model_e9cn
-+	},
-+	{
-+		// e.g. Legion Y7000 (older version)
-+		.ident = "8JCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "8JCN"),
-+		},
-+		.driver_data = (void *)&model_8jcn
-+	},
-+	{
-+		// e.g. Legion 7i Pro 2023
-+		.ident = "KWCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "KWCN"),
-+		},
-+		.driver_data = (void *)&model_kwcn
-+	},
-+	{
-+		// e.g. Legion Pro 5 2023 or R9000P
-+		.ident = "LPCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "LPCN"),
-+		},
-+		.driver_data = (void *)&model_lpcn
-+	},
-+	{
-+		// e.g. Lenovo Legion 5i/Y7000 2019 PG0
-+		.ident = "BHCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "BHCN"),
-+		},
-+		.driver_data = (void *)&model_bhcn
-+	},
-+	{
-+		// e.g. Lenovo 7 16IAX7
-+		.ident = "K1CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "K1CN"),
-+		},
-+		.driver_data = (void *)&model_k1cn
-+	},
-+	{
-+		// e.g. Legion Y720
-+		.ident = "4GCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "4GCN"),
-+		},
-+		.driver_data = (void *)&model_4gcn
-+	},
-+	{
-+		// e.g. Legion Slim 5 16APH8 2023
-+		.ident = "M3CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "M3CN"),
-+		},
-+		.driver_data = (void *)&model_lpcn
-+	},
-+	{
-+		// e.g. Legion Y7000p-1060
-+		.ident = "9VCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "9VCN"),
-+		},
-+		.driver_data = (void *)&model_9vcn
-+	},
-+	{
-+		// e.g. Legion Y9000X
-+		.ident = "JYCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "JYCN"),
-+		},
-+		.driver_data = (void *)&model_v2022
-+	},
-+	{
-+		// e.g. Legion Y740-15IRH, older model e.g. with GTX 1660
-+		.ident = "BVCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "BVCN"),
-+		},
-+		.driver_data = (void *)&model_bvcn
-+	},
-+	{
-+		// e.g. Legion 5 Pro 16IAH7H with a RTX 3070 Ti
-+		.ident = "J2CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "J2CN"),
-+		},
-+		.driver_data = (void *)&model_j2cn
-+	},
-+	{
-+		// e.g. Lenovo Yoga 7 16IAH7 with GPU Intel DG2 Arc A370M
-+		.ident = "J1CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "J1CN"),
-+		},
-+		.driver_data = (void *)&model_j1cn
-+	},
-+	{
-+		// e.g. Legion Slim 7 16IRH8 (2023) with RTX 4070
-+		.ident = "M0CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "M0CN"),
-+		},
-+		.driver_data = (void *)&model_m0cn
-+	},
-+	{
-+		// e.g. Legion Slim 7 16IRH8 (2023) AMD Ryzen 7 7840HS with RTX 4060
-+		.ident = "M1CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "M1CN"),
-+		},
-+		.driver_data = (void *)&model_m1cn
-+	},
-+	{
-+		// e.g. Legion Slim 5 16IRH8 (2023) with RTX 4070
-+		.ident = "M2CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "M2CN"),
-+		},
-+		.driver_data = (void *)&model_m2cn
-+	},
-+	{
-+		// e.g. Lenovo Yoga Slim 7 gen 8 (2023)
-+		.ident = "M6CN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "M6CN"),
-+		},
-+		.driver_data = (void *)&model_m6cn
-+	},
-+	{
-+		// e.g. Yoga Slim 7-14ARE05
-+		.ident = "DMCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "DMCN"),
-+		},
-+		.driver_data = (void *)&model_dmcn
-+	},
-+	{
-+		// e.g. Yoga Slim 7 Pro 14ARH7
-+		.ident = "KHCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "KHCN"),
-+		},
-+		.driver_data = (void *)&model_khcn
-+	},
-+	{
-+		// e.g. LOQ 15IRH8
-+		.ident = "LZCN",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+			DMI_MATCH(DMI_BIOS_VERSION, "LZCN"),
-+		},
-+		.driver_data = (void *)&model_lzcn
-+	},
-+	{}
-+};
-+
-+/* ================================= */
-+/* ACPI and WMI access               */
-+/* ================================= */
-+
-+// function from ideapad-laptop.c
-+static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
-+{
-+	unsigned long long result;
-+	acpi_status status;
-+
-+	status = acpi_evaluate_integer(handle, (char *)name, NULL, &result);
-+	if (ACPI_FAILURE(status))
-+		return -EIO;
-+
-+	*res = result;
-+
-+	return 0;
-+}
-+
-+// function from ideapad-laptop.c
-+static int exec_simple_method(acpi_handle handle, const char *name,
-+			      unsigned long arg)
-+{
-+	acpi_status status =
-+		acpi_execute_simple_method(handle, (char *)name, arg);
-+
-+	return ACPI_FAILURE(status) ? -EIO : 0;
-+}
-+
-+// function from ideapad-laptop.c
-+static int exec_sbmc(acpi_handle handle, unsigned long arg)
-+{
-+	// \_SB.PCI0.LPC0.EC0.VPC0.SBMC
-+	return exec_simple_method(handle, "VPC0.SBMC", arg);
-+}
-+
-+//static int eval_qcho(acpi_handle handle, unsigned long *res)
-+//{
-+//	// \_SB.PCI0.LPC0.EC0.QCHO
-+//	return eval_int(handle, "QCHO", res);
-+//}
-+
-+static int eval_gbmd(acpi_handle handle, unsigned long *res)
-+{
-+	return eval_int(handle, "VPC0.GBMD", res);
-+}
-+
-+static int eval_spmo(acpi_handle handle, unsigned long *res)
-+{
-+	// \_SB.PCI0.LPC0.EC0.QCHO
-+	return eval_int(handle, "VPC0.BTSM", res);
-+}
-+
-+static int acpi_process_buffer_to_ints(const char *id_name, int id_nr,
-+				       acpi_status status,
-+				       struct acpi_buffer *out_buffer, u8 *res,
-+				       size_t ressize)
-+{
-+	// seto to NULL call kfree on NULL if next function call fails
-+	union acpi_object *out = NULL;
-+	size_t i;
-+	int error = 0;
-+
-+	if (ACPI_FAILURE(status)) {
-+		pr_info("ACPI evaluation error for: %s:%d\n", id_name, id_nr);
-+		error = -EFAULT;
-+		goto err;
-+	}
-+
-+	out = out_buffer->pointer;
-+	if (!out) {
-+		pr_info("Unexpected ACPI result for %s:%d\n", id_name, id_nr);
-+		error = -AE_ERROR;
-+		goto err;
-+	}
-+
-+	if (out->type != ACPI_TYPE_BUFFER || out->buffer.length != ressize) {
-+		pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d; expected length %lu but got %u;\n",
-+			id_name, id_nr, ACPI_TYPE_BUFFER, out->type, ressize,
-+			out->buffer.length);
-+		error = -AE_ERROR;
-+		goto err;
-+	}
-+
-+// Reduced verbosity (only printing when ACPI result have bad parameters)
-+//	pr_info("ACPI result for %s:%d: ACPI buffer length: %u\n", id_name,
-+//		id_nr, out->buffer.length);
-+
-+	for (i = 0; i < ressize; ++i)
-+		res[i] = out->buffer.pointer[i];
-+	error = 0;
-+
-+err:
-+	kfree(out);
-+	return error;
-+}
-+
-+//static int exec_ints(acpi_handle handle, const char *method_name,
-+//		     struct acpi_object_list *params, u8 *res, size_t ressize)
-+//{
-+//	acpi_status status;
-+//	struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-+
-+//	status = acpi_evaluate_object(handle, (acpi_string)method_name, params,
-+//				      &out_buffer);
-+
-+//	return acpi_process_buffer_to_ints(method_name, 0, status, &out_buffer,
-+//					   res, ressize);
-+//}
-+
-+static int wmi_exec_ints(const char *guid, u8 instance, u32 method_id,
-+			 const struct acpi_buffer *params, u8 *res,
-+			 size_t ressize)
-+{
-+	acpi_status status;
-+	struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-+
-+	status = wmi_evaluate_method(guid, instance, method_id, params,
-+				     &out_buffer);
-+	return acpi_process_buffer_to_ints(guid, method_id, status, &out_buffer,
-+					   res, ressize);
-+}
-+
-+static int wmi_exec_int(const char *guid, u8 instance, u32 method_id,
-+			const struct acpi_buffer *params, unsigned long *res)
-+{
-+	acpi_status status;
-+	struct acpi_buffer out_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-+	// set to NULL and call kfree on NULL if next function call fails
-+	union acpi_object *out = NULL;
-+	int error = 0;
-+
-+	status = wmi_evaluate_method(guid, instance, method_id, params,
-+				     &out_buffer);
-+
-+	if (ACPI_FAILURE(status)) {
-+		pr_info("WMI evaluation error for: %s:%d\n", guid, method_id);
-+		error = -EFAULT;
-+		goto err;
-+	}
-+
-+	out = out_buffer.pointer;
-+	if (!out) {
-+		pr_info("Unexpected ACPI result for %s:%d", guid, method_id);
-+		error = -AE_ERROR;
-+		goto err;
-+	}
-+
-+	if (out->type != ACPI_TYPE_INTEGER) {
-+		pr_info("Unexpected ACPI result for %s:%d: expected type %d but got %d\n",
-+			guid, method_id, ACPI_TYPE_INTEGER, out->type);
-+		error = -AE_ERROR;
-+		goto err;
-+	}
-+
-+	*res = out->integer.value;
-+	error = 0;
-+
-+err:
-+	kfree(out);
-+	return error;
-+}
-+
-+static int wmi_exec_noarg_int(const char *guid, u8 instance, u32 method_id,
-+			      unsigned long *res)
-+{
-+	struct acpi_buffer params;
-+
-+	params.length = 0;
-+	params.pointer = NULL;
-+	return wmi_exec_int(guid, instance, method_id, &params, res);
-+}
-+
-+static int wmi_exec_noarg_ints(const char *guid, u8 instance, u32 method_id,
-+			       u8 *res, size_t ressize)
-+{
-+	struct acpi_buffer params;
-+
-+	params.length = 0;
-+	params.pointer = NULL;
-+	return wmi_exec_ints(guid, instance, method_id, &params, res, ressize);
-+}
-+
-+static int wmi_exec_arg(const char *guid, u8 instance, u32 method_id, void *arg,
-+			size_t arg_size)
-+{
-+	struct acpi_buffer params;
-+	acpi_status status;
-+
-+	params.length = arg_size;
-+	params.pointer = arg;
-+	status = wmi_evaluate_method(guid, instance, method_id, &params, NULL);
-+
-+	if (ACPI_FAILURE(status))
-+		return -EIO;
-+	return 0;
-+}
-+
-+/* ================================= */
-+/* Lenovo WMI config                 */
-+/* ================================= */
-+#define LEGION_WMI_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0"
-+// GPU over clock
-+#define WMI_METHOD_ID_ISSUPPORTGPUOC 4
-+
-+// Fan speed
-+// only fully implemented for some models here
-+// often implemented in other classes and methods too
-+// below
-+#define WMI_METHOD_ID_GETFAN1SPEED 8
-+#define WMI_METHOD_ID_GETFAN2SPEED 9
-+
-+// Version of ACPI
-+#define WMI_METHOD_ID_GETVERSION 11
-+// Does it support CPU overclock?
-+#define WMI_METHOD_ID_ISSUPPORTCPUOC 14
-+// Temperatures
-+// only fully implemented for some models here
-+// often implemented in other classes and methods too
-+// below
-+#define WMI_METHOD_ID_GETCPUTEMP 18
-+#define WMI_METHOD_ID_GETGPUTEMP 19
-+
-+// two state keyboard light
-+#define WMI_METHOD_ID_GETKEYBOARDLIGHT 37
-+#define WMI_METHOD_ID_SETKEYBOARDLIGHT 36
-+// toggle win key
-+// 0 = win key enabled; 1 = win key disabled
-+#define WMI_METHOD_ID_ISSUPPORTDISABLEWINKEY 21
-+#define WMI_METHOD_ID_GETWINKEYSTATUS 23
-+#define WMI_METHOD_ID_SETWINKEYSTATUS 22
-+// toggle touchpad
-+//0 = touchpad enabled; 1 = touchpad disabled
-+#define WMI_METHOD_ID_ISSUPPORTDISABLETP 24
-+#define WMI_METHOD_ID_GETTPSTATUS 26
-+#define WMI_METHOD_ID_SETTPSTATUS 25
-+// GSync
-+#define WMI_METHOD_ID_ISSUPPORTGSYNC 40
-+#define WMI_METHOD_ID_GETGSYNCSTATUS 41
-+#define WMI_METHOD_ID_SETGSYNCSTATUS 42
-+//smartFanMode = powermode
-+#define WMI_METHOD_ID_ISSUPPORTSMARTFAN 49
-+#define WMI_METHOD_ID_GETSMARTFANMODE 45
-+#define WMI_METHOD_ID_SETSMARTFANMODE 44
-+// power charge mode
-+#define WMI_METHOD_ID_GETPOWERCHARGEMODE 47
-+// overdrive of display to reduce latency
-+// 0=off, 1=on
-+#define WMI_METHOD_ID_ISSUPPORTOD 49
-+#define WMI_METHOD_ID_GETODSTATUS 50
-+#define WMI_METHOD_ID_SETODSTATUS 51
-+// thermal mode = power mode used for cooling
-+#define WMI_METHOD_ID_GETTHERMALMODE 55
-+// get max frequency of core 0
-+#define WMI_METHOD_ID_GETCPUMAXFREQUENCY 60
-+// check if AC adapter has enough power to overclock
-+#define WMI_METHOD_ID_ISACFITFOROC 62
-+// set iGPU (GPU packaged with CPU) state
-+#define WMI_METHOD_ID_ISSUPPORTIGPUMODE 63
-+#define WMI_METHOD_ID_GETIGPUMODESTATUS 64
-+#define WMI_METHOD_ID_SETIGPUMODESTATUS 65
-+#define WMI_METHOD_ID_NOTIFYDGPUSTATUS 66
-+enum IGPUState {
-+	IGPUState_default = 0,
-+	IGPUState_iGPUOnly = 1,
-+	IGPUState_auto = 2
-+};
-+
-+#define WMI_GUID_LENOVO_CPU_METHOD "14afd777-106f-4c9b-b334-d388dc7809be"
-+#define WMI_METHOD_ID_CPU_GET_SUPPORT_OC_STATUS 15
-+#define WMI_METHOD_ID_CPU_GET_OC_STATUS 1
-+#define WMI_METHOD_ID_CPU_SET_OC_STATUS 2
-+
-+// ppt limit slow
-+#define WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT 3
-+#define WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT 4
-+// ppt stapm
-+#define WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT 5
-+#define WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT 6
-+// default power limit
-+#define WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT 7
-+// peak power limit
-+#define WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT 8
-+#define WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT 9
-+// apu sppt powerlimit
-+#define WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT 12
-+#define WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT 13
-+// cross loading powerlimit
-+#define WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT 16
-+#define WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT 17
-+
-+#define WMI_GUID_LENOVO_GPU_METHOD "da7547f1-824d-405f-be79-d9903e29ced7"
-+// overclock GPU possible
-+#define WMI_METHOD_ID_GPU_GET_OC_STATUS 1
-+#define WMI_METHOD_ID_GPU_SET_OC_STATUS 2
-+// dynamic boost power
-+#define WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT 3
-+#define WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT 4
-+// configurable TGP (power)
-+#define WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT 5
-+#define WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT 6
-+// ppab/ctgp powerlimit
-+#define WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT 7
-+// temperature limit
-+#define WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT 8
-+#define WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT 9
-+// boost clock
-+#define WMI_METHOD_ID_GPU_GET_BOOST_CLOCK 10
-+
-+#define WMI_GUID_LENOVO_FAN_METHOD "92549549-4bde-4f06-ac04-ce8bf898dbaa"
-+// set fan to maximal speed; dust cleaning mode
-+// only works in custom power mode
-+#define WMI_METHOD_ID_FAN_GET_FULLSPEED 1
-+#define WMI_METHOD_ID_FAN_SET_FULLSPEED 2
-+// max speed of fan
-+#define WMI_METHOD_ID_FAN_GET_MAXSPEED 3
-+#define WMI_METHOD_ID_FAN_SET_MAXSPEED 4
-+// fan table in custom mode
-+#define WMI_METHOD_ID_FAN_GET_TABLE 5
-+#define WMI_METHOD_ID_FAN_SET_TABLE 6
-+// get speed of fans
-+#define WMI_METHOD_ID_FAN_GETCURRENTFANSPEED 7
-+// get temperatures of CPU and GPU used for controlling cooling
-+#define WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE 8
-+
-+// do not implement following
-+// #define WMI_METHOD_ID_Fan_SetCurrentFanSpeed 9
-+
-+#define LEGION_WMI_KBBACKLIGHT_GUID "8C5B9127-ECD4-4657-980F-851019F99CA5"
-+// access the keyboard backlight with 3 states
-+#define WMI_METHOD_ID_KBBACKLIGHTGET 0x1
-+#define WMI_METHOD_ID_KBBACKLIGHTSET 0x2
-+
-+// new method in newer methods to get or set most of the values
-+// with the two methods GetFeatureValue or SetFeatureValue.
-+// They are called like GetFeatureValue(feature_id) where
-+// feature_id is a id for the feature
-+#define LEGION_WMI_LENOVO_OTHER_METHOD_GUID \
-+	"dc2a8805-3a8c-41ba-a6f7-092e0089cd3b"
-+#define WMI_METHOD_ID_GET_FEATURE_VALUE 17
-+#define WMI_METHOD_ID_SET_FEATURE_VALUE 18
-+
-+enum OtherMethodFeature {
-+	OtherMethodFeature_U1 = 0x010000, //->PC00.LPCB.EC0.REJF
-+	OtherMethodFeature_U2 = 0x0F0000, //->C00.PEG1.PXP._STA?
-+	OtherMethodFeature_U3 = 0x030000, //->PC00.LPCB.EC0.FLBT?
-+	OtherMethodFeature_CPU_SHORT_TERM_POWER_LIMIT = 0x01010000,
-+	OtherMethodFeature_CPU_LONG_TERM_POWER_LIMIT = 0x01020000,
-+	OtherMethodFeature_CPU_PEAK_POWER_LIMIT = 0x01030000,
-+	OtherMethodFeature_CPU_TEMPERATURE_LIMIT = 0x01040000,
-+
-+	OtherMethodFeature_APU_PPT_POWER_LIMIT = 0x01050000,
-+
-+	OtherMethodFeature_CPU_CROSS_LOAD_POWER_LIMIT = 0x01060000,
-+	OtherMethodFeature_CPU_L1_TAU = 0x01070000,
-+
-+	OtherMethodFeature_GPU_POWER_BOOST = 0x02010000,
-+	OtherMethodFeature_GPU_cTGP = 0x02020000,
-+	OtherMethodFeature_GPU_TEMPERATURE_LIMIT = 0x02030000,
-+	OtherMethodFeature_GPU_POWER_TARGET_ON_AC_OFFSET_FROM_BASELINE =
-+		0x02040000,
-+
-+	OtherMethodFeature_FAN_SPEED_1 = 0x04030001,
-+	OtherMethodFeature_FAN_SPEED_2 = 0x04030002,
-+
-+	OtherMethodFeature_C_U1 = 0x05010000,
-+	OtherMethodFeature_TEMP_CPU = 0x05040000,
-+	OtherMethodFeature_TEMP_GPU = 0x05050000,
-+};
-+
-+static ssize_t wmi_other_method_get_value(enum OtherMethodFeature feature_id,
-+					  int *value)
-+{
-+	struct acpi_buffer params;
-+	int error;
-+	unsigned long res;
-+	u32 param1 = feature_id;
-+
-+	params.length = sizeof(param1);
-+	params.pointer = &param1;
-+	error = wmi_exec_int(LEGION_WMI_LENOVO_OTHER_METHOD_GUID, 0,
-+			     WMI_METHOD_ID_GET_FEATURE_VALUE, &params, &res);
-+	if (!error)
-+		*value = res;
-+	return error;
-+}
-+
-+/* =================================== */
-+/* EC RAM Access with memory mapped IO */
-+/* =================================== */
-+
-+struct ecram_memoryio {
-+	// TODO: start of remapped memory in EC RAM is assumed to be 0
-+	// u16 ecram_start;
-+
-+	// physical address of remapped IO, depends on model and firmware
-+	phys_addr_t physical_start;
-+	// start adress of region in ec memory
-+	phys_addr_t physical_ec_start;
-+	// virtual address of remapped IO
-+	u8 *virtual_start;
-+	// size of remapped access
-+	size_t size;
-+};
-+
-+/**
-+ * physical_start : corresponds to EC RAM 0 inside EC
-+ * size: size of remapped region
-+ *
-+ * strong exception safety
-+ */
-+static ssize_t ecram_memoryio_init(struct ecram_memoryio *ec_memoryio,
-+				   phys_addr_t physical_start,
-+				   phys_addr_t physical_ec_start, size_t size)
-+{
-+	void *virtual_start = ioremap(physical_start, size);
-+
-+	if (!IS_ERR_OR_NULL(virtual_start)) {
-+		ec_memoryio->virtual_start = virtual_start;
-+		ec_memoryio->physical_start = physical_start;
-+		ec_memoryio->physical_ec_start = physical_ec_start;
-+		ec_memoryio->size = size;
-+		pr_info("Successfully mapped embedded controller: 0x%llx (in RAM)/0x%llx (in EC) to virtual 0x%p\n",
-+			ec_memoryio->physical_start,
-+			ec_memoryio->physical_ec_start,
-+			ec_memoryio->virtual_start);
-+	} else {
-+		pr_info("Error mapping embedded controller memory at 0x%llx\n",
-+			physical_start);
-+		return -ENOMEM;
-+	}
-+	return 0;
-+}
-+
-+static void ecram_memoryio_exit(struct ecram_memoryio *ec_memoryio)
-+{
-+	if (ec_memoryio->virtual_start != NULL) {
-+		pr_info("Unmapping embedded controller memory at 0x%llx (in RAM)/0x%llx (in EC) at virtual 0x%p\n",
-+			ec_memoryio->physical_start,
-+			ec_memoryio->physical_ec_start,
-+			ec_memoryio->virtual_start);
-+		iounmap(ec_memoryio->virtual_start);
-+		ec_memoryio->virtual_start = NULL;
-+	}
-+}
-+
-+/* Read a byte from the EC RAM.
-+ *
-+ * Return status because of commong signature for alle
-+ * methods to access EC RAM.
-+ */
-+static ssize_t ecram_memoryio_read(const struct ecram_memoryio *ec_memoryio,
-+				   u16 ec_offset, u8 *value)
-+{
-+	if (ec_offset < ec_memoryio->physical_ec_start) {
-+		pr_info("Unexpected read at offset %d into EC RAM\n",
-+			ec_offset);
-+		return -1;
-+	}
-+	*value = *(ec_memoryio->virtual_start +
-+		   (ec_offset - ec_memoryio->physical_ec_start));
-+	return 0;
-+}
-+
-+/* Write a byte to the EC RAM.
-+ *
-+ * Return status because of commong signature for alle
-+ * methods to access EC RAM.
-+ */
-+ssize_t ecram_memoryio_write(const struct ecram_memoryio *ec_memoryio,
-+			     u16 ec_offset, u8 value)
-+{
-+	if (ec_offset < ec_memoryio->physical_ec_start) {
-+		pr_info("Unexpected write at offset %d into EC RAM\n",
-+			ec_offset);
-+		return -1;
-+	}
-+	*(ec_memoryio->virtual_start +
-+	  (ec_offset - ec_memoryio->physical_ec_start)) = value;
-+	return 0;
-+}
-+
-+/* ================================= */
-+/* EC RAM Access with port-mapped IO */
-+/* ================================= */
-+
-+/*
-+ * See datasheet of e.g. IT8502E/F/G, e.g.
-+ * 6.2 Plug and Play Configuration (PNPCFG)
-+ *
-+ * Depending on configured BARDSEL register
-+ * the ports
-+ *   ECRAM_PORTIO_ADDR_PORT and
-+ *   ECRAM_PORTIO_DATA_PORT
-+ * are configured.
-+ *
-+ * By performing IO on these ports one can
-+ * read/write to registers in the EC.
-+ *
-+ * "To access a register of PNPCFG, write target index to
-+ *  address port and access this PNPCFG register via
-+ *  data port" [datasheet, 6.2 Plug and Play Configuration]
-+ */
-+
-+// IO ports used to write to communicate with embedded controller
-+// Start of used ports
-+#define ECRAM_PORTIO_START_PORT 0x4E
-+// Number of used ports
-+#define ECRAM_PORTIO_PORTS_SIZE 2
-+// Port used to specify address in EC RAM to read/write
-+// 0x4E/0x4F is the usual port for IO super controller
-+// 0x2E/0x2F also common (ITE can also be configured to use these)
-+#define ECRAM_PORTIO_ADDR_PORT 0x4E
-+// Port to send/receive the value to write/read
-+#define ECRAM_PORTIO_DATA_PORT 0x4F
-+// Name used to request ports
-+#define ECRAM_PORTIO_NAME "legion"
-+
-+struct ecram_portio {
-+	/* protects read/write to EC RAM performed
-+	 * as a certain sequence of outb, inb
-+	 * commands on the IO ports. There can
-+	 * be at most one.
-+	 */
-+	struct mutex io_port_mutex;
-+};
-+
-+static ssize_t ecram_portio_init(struct ecram_portio *ec_portio)
-+{
-+	if (!request_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE,
-+			    ECRAM_PORTIO_NAME)) {
-+		pr_info("Cannot init ecram_portio the %x ports starting at %x\n",
-+			ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT);
-+		return -ENODEV;
-+	}
-+	//pr_info("Reserved %x ports starting at %x\n", ECRAM_PORTIO_PORTS_SIZE, ECRAM_PORTIO_START_PORT);
-+	mutex_init(&ec_portio->io_port_mutex);
-+	return 0;
-+}
-+
-+static void ecram_portio_exit(struct ecram_portio *ec_portio)
-+{
-+	release_region(ECRAM_PORTIO_START_PORT, ECRAM_PORTIO_PORTS_SIZE);
-+}
-+
-+/* Read a byte from the EC RAM.
-+ *
-+ * Return status because of commong signature for alle
-+ * methods to access EC RAM.
-+ */
-+static ssize_t ecram_portio_read(struct ecram_portio *ec_portio, u16 offset,
-+				 u8 *value)
-+{
-+	mutex_lock(&ec_portio->io_port_mutex);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x11, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	// TODO: no explicit cast between types seems to be sometimes
-+	// done and sometimes not
-+	outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x10, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x12, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	*value = inb(ECRAM_PORTIO_DATA_PORT);
-+
-+	mutex_unlock(&ec_portio->io_port_mutex);
-+	return 0;
-+}
-+
-+/* Write a byte to the EC RAM.
-+ *
-+ * Return status because of commong signature for alle
-+ * methods to access EC RAM.
-+ */
-+static ssize_t ecram_portio_write(struct ecram_portio *ec_portio, u16 offset,
-+				  u8 value)
-+{
-+	mutex_lock(&ec_portio->io_port_mutex);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x11, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	// TODO: no explicit cast between types seems to be sometimes
-+	// done and sometimes not
-+	outb((u8)((offset >> 8) & 0xFF), ECRAM_PORTIO_DATA_PORT);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x10, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	outb((u8)(offset & 0xFF), ECRAM_PORTIO_DATA_PORT);
-+
-+	outb(0x2E, ECRAM_PORTIO_ADDR_PORT);
-+	outb(0x12, ECRAM_PORTIO_DATA_PORT);
-+	outb(0x2F, ECRAM_PORTIO_ADDR_PORT);
-+	outb(value, ECRAM_PORTIO_DATA_PORT);
-+
-+	mutex_unlock(&ec_portio->io_port_mutex);
-+	// TODO: remove this
-+	//pr_info("Writing %d to addr %x\n", value, offset);
-+	return 0;
-+}
-+
-+/* =================================== */
-+/* EC RAM Access                       */
-+/* =================================== */
-+
-+struct ecram {
-+	struct ecram_portio portio;
-+};
-+
-+static ssize_t ecram_init(struct ecram *ecram,
-+			  phys_addr_t memoryio_ec_physical_start,
-+			  size_t region_size)
-+{
-+	ssize_t err;
-+
-+	err = ecram_portio_init(&ecram->portio);
-+	if (err) {
-+		pr_info("Failed ecram_portio_init\n");
-+		goto err_ecram_portio_init;
-+	}
-+
-+	return 0;
-+
-+err_ecram_portio_init:
-+	return err;
-+}
-+
-+static void ecram_exit(struct ecram *ecram)
-+{
-+	pr_info("Unloading legion ecram\n");
-+	ecram_portio_exit(&ecram->portio);
-+	pr_info("Unloading legion ecram done\n");
-+}
-+
-+/** Read from EC RAM
-+ * ecram_offset address on the EC
-+ */
-+static u8 ecram_read(struct ecram *ecram, u16 ecram_offset)
-+{
-+	u8 value;
-+	int err;
-+
-+	err = ecram_portio_read(&ecram->portio, ecram_offset, &value);
-+	if (err)
-+		pr_info("Error reading EC RAM at 0x%x.\n", ecram_offset);
-+	return value;
-+}
-+
-+static void ecram_write(struct ecram *ecram, u16 ecram_offset, u8 value)
-+{
-+	int err;
-+
-+	if (ec_readonly) {
-+		pr_info("Skipping writing EC RAM to 0x%x: Read-Only.\n",
-+			ecram_offset);
-+		return;
-+	}
-+	err = ecram_portio_write(&ecram->portio, ecram_offset, value);
-+	if (err)
-+		pr_info("Error writing EC RAM to 0x%x: Read-Only.\n", ecram_offset);
-+}
-+
-+/* =============================== */
-+/* Reads from EC  */
-+/* ===============================  */
-+
-+static u16 read_ec_id(struct ecram *ecram, const struct model_config *model)
-+{
-+	u8 id1 = ecram_read(ecram, model->registers->ECHIPID1);
-+	u8 id2 = ecram_read(ecram, model->registers->ECHIPID2);
-+
-+	return (id1 << 8) + id2;
-+}
-+
-+static u16 read_ec_version(struct ecram *ecram,
-+			   const struct model_config *model)
-+{
-+	u8 vers = ecram_read(ecram, model->registers->ECHIPVER);
-+	u8 debug = ecram_read(ecram, model->registers->ECDEBUG);
-+
-+	return (vers << 8) + debug;
-+}
-+
-+/* ============================= */
-+/* Data model for sensor values  */
-+/* ============================= */
-+
-+struct sensor_values {
-+	u16 fan1_rpm; // current speed in rpm of fan 1
-+	u16 fan2_rpm; // current speed in rpm of fan2
-+	u16 fan1_target_rpm; // target speed in rpm of fan 1
-+	u16 fan2_target_rpm; // target speed in rpm of fan 2
-+	u8 cpu_temp_celsius; // cpu temperature in celcius
-+	u8 gpu_temp_celsius; // gpu temperature in celcius
-+	u8 ic_temp_celsius; // ic temperature in celcius
-+};
-+
-+enum SENSOR_ATTR {
-+	SENSOR_CPU_TEMP_ID = 1,
-+	SENSOR_GPU_TEMP_ID = 2,
-+	SENSOR_IC_TEMP_ID = 3,
-+	SENSOR_FAN1_RPM_ID = 4,
-+	SENSOR_FAN2_RPM_ID = 5,
-+	SENSOR_FAN1_TARGET_RPM_ID = 6,
-+	SENSOR_FAN2_TARGET_RPM_ID = 7
-+};
-+
-+/* ============================= */
-+/* Data model for fan curve      */
-+/* ============================= */
-+
-+struct fancurve_point {
-+	// rpm1 devided by 100
-+	u8 rpm1_raw;
-+	// rpm2 devided by 100
-+	u8 rpm2_raw;
-+	// >=2 , <=5 (lower is faster); must increase by level
-+	u8 accel;
-+	// >=2 , <=5 (lower is faster); must increase by level
-+	u8 decel;
-+
-+	// min must be lower than or equal to max
-+	// last level max must be 127
-+	// <=127 cpu max temp for this level; must increase by level
-+	u8 cpu_max_temp_celsius;
-+	// <=127 cpu min temp for this level; must increase by level
-+	u8 cpu_min_temp_celsius;
-+	// <=127 gpu min temp for this level; must increase by level
-+	u8 gpu_max_temp_celsius;
-+	// <=127 gpu max temp for this level; must increase by level
-+	u8 gpu_min_temp_celsius;
-+	// <=127 ic max temp for this level; must increase by level
-+	u8 ic_max_temp_celsius;
-+	// <=127 ic max temp for this level; must increase by level
-+	u8 ic_min_temp_celsius;
-+};
-+
-+enum FANCURVE_ATTR {
-+	FANCURVE_ATTR_PWM1 = 1,
-+	FANCURVE_ATTR_PWM2 = 2,
-+	FANCURVE_ATTR_CPU_TEMP = 3,
-+	FANCURVE_ATTR_CPU_HYST = 4,
-+	FANCURVE_ATTR_GPU_TEMP = 5,
-+	FANCURVE_ATTR_GPU_HYST = 6,
-+	FANCURVE_ATTR_IC_TEMP = 7,
-+	FANCURVE_ATTR_IC_HYST = 8,
-+	FANCURVE_ATTR_ACCEL = 9,
-+	FANCURVE_ATTR_DECEL = 10,
-+	FANCURVE_SIZE = 11,
-+	FANCURVE_MINIFANCURVE_ON_COOL = 12
-+};
-+
-+// used for clearing table entries
-+static const struct fancurve_point fancurve_point_zero = { 0, 0, 0, 0, 0,
-+							   0, 0, 0, 0, 0 };
-+
-+struct fancurve {
-+	struct fancurve_point points[MAXFANCURVESIZE];
-+	// number of points used; must be <= MAXFANCURVESIZE
-+	size_t size;
-+	// the point at which fans are run currently
-+	size_t current_point_i;
-+};
-+
-+// validation functions
-+
-+static bool fancurve_is_valid_min_temp(int min_temp)
-+{
-+	return min_temp >= 0 && min_temp <= 127;
-+}
-+
-+static bool fancurve_is_valid_max_temp(int max_temp)
-+{
-+	return max_temp >= 0 && max_temp <= 127;
-+}
-+
-+// setters with validation
-+// - make hwmon implementation easier
-+// - keep fancurve valid, otherwise EC will not properly control fan
-+
-+static bool fancurve_set_rpm1(struct fancurve *fancurve, int point_id, int rpm)
-+{
-+	bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500);
-+
-+	if (valid)
-+		fancurve->points[point_id].rpm1_raw = rpm / 100;
-+	return valid;
-+}
-+
-+static bool fancurve_set_rpm2(struct fancurve *fancurve, int point_id, int rpm)
-+{
-+	bool valid = point_id == 0 ? rpm == 0 : (rpm >= 0 && rpm <= 4500);
-+
-+	if (valid)
-+		fancurve->points[point_id].rpm2_raw = rpm / 100;
-+	return valid;
-+}
-+
-+// TODO: remove { ... } from single line if body
-+
-+static bool fancurve_set_accel(struct fancurve *fancurve, int point_id,
-+			       int accel)
-+{
-+	bool valid = accel >= 2 && accel <= 5;
-+
-+	if (valid)
-+		fancurve->points[point_id].accel = accel;
-+	return valid;
-+}
-+
-+static bool fancurve_set_decel(struct fancurve *fancurve, int point_id,
-+			       int decel)
-+{
-+	bool valid = decel >= 2 && decel <= 5;
-+
-+	if (valid)
-+		fancurve->points[point_id].decel = decel;
-+	return valid;
-+}
-+
-+static bool fancurve_set_cpu_temp_max(struct fancurve *fancurve, int point_id,
-+				      int value)
-+{
-+	bool valid = fancurve_is_valid_max_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].cpu_max_temp_celsius = value;
-+
-+	return valid;
-+}
-+
-+static bool fancurve_set_gpu_temp_max(struct fancurve *fancurve, int point_id,
-+				      int value)
-+{
-+	bool valid = fancurve_is_valid_max_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].gpu_max_temp_celsius = value;
-+	return valid;
-+}
-+
-+static bool fancurve_set_ic_temp_max(struct fancurve *fancurve, int point_id,
-+				     int value)
-+{
-+	bool valid = fancurve_is_valid_max_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].ic_max_temp_celsius = value;
-+	return valid;
-+}
-+
-+static bool fancurve_set_cpu_temp_min(struct fancurve *fancurve, int point_id,
-+				      int value)
-+{
-+	bool valid = fancurve_is_valid_max_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].cpu_min_temp_celsius = value;
-+	return valid;
-+}
-+
-+static bool fancurve_set_gpu_temp_min(struct fancurve *fancurve, int point_id,
-+				      int value)
-+{
-+	bool valid = fancurve_is_valid_min_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].gpu_min_temp_celsius = value;
-+	return valid;
-+}
-+
-+static bool fancurve_set_ic_temp_min(struct fancurve *fancurve, int point_id,
-+				     int value)
-+{
-+	bool valid = fancurve_is_valid_min_temp(value);
-+
-+	if (valid)
-+		fancurve->points[point_id].ic_min_temp_celsius = value;
-+	return valid;
-+}
-+
-+static bool fancurve_set_size(struct fancurve *fancurve, int size,
-+			      bool init_values)
-+{
-+	bool valid = size >= 1 && size <= MAXFANCURVESIZE;
-+
-+	if (!valid)
-+		return false;
-+	if (init_values && size < fancurve->size) {
-+		// fancurve size is decreased, but last entry always needs 127 temperatures
-+		// Note: size >=1
-+		fancurve->points[size - 1].cpu_max_temp_celsius = 127;
-+		fancurve->points[size - 1].ic_max_temp_celsius = 127;
-+		fancurve->points[size - 1].gpu_max_temp_celsius = 127;
-+	}
-+	if (init_values && size > fancurve->size) {
-+		// fancurve increased, so new entries need valid values
-+		int i;
-+		int last = fancurve->size > 0 ? fancurve->size - 1 : 0;
-+
-+		for (i = fancurve->size; i < size; ++i)
-+			fancurve->points[i] = fancurve->points[last];
-+	}
-+	return true;
-+}
-+
-+static ssize_t fancurve_print_seqfile(const struct fancurve *fancurve,
-+				      struct seq_file *s)
-+{
-+	int i;
-+
-+	seq_printf(
-+		s,
-+		"rpm1|rpm2|acceleration|deceleration|cpu_min_temp|cpu_max_temp|gpu_min_temp|gpu_max_temp|ic_min_temp|ic_max_temp\n");
-+	for (i = 0; i < fancurve->size; ++i) {
-+		const struct fancurve_point *point = &fancurve->points[i];
-+
-+		seq_printf(
-+			s, "%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\n",
-+			point->rpm1_raw * 100, point->rpm2_raw * 100,
-+			point->accel, point->decel, point->cpu_min_temp_celsius,
-+			point->cpu_max_temp_celsius,
-+			point->gpu_min_temp_celsius,
-+			point->gpu_max_temp_celsius, point->ic_min_temp_celsius,
-+			point->ic_max_temp_celsius);
-+	}
-+	return 0;
-+}
-+
-+struct light {
-+	bool initialized;
-+	struct led_classdev led;
-+	unsigned int last_brightness;
-+	u8 light_id;
-+	unsigned int lower_limit;
-+	unsigned int upper_limit;
-+};
-+
-+/* =============================  */
-+/* Global and shared data between */
-+/* all calls to this module       */
-+/* =============================  */
-+// Implemented like ideapad-laptop.c but currently still
-+// without dynamic memory allocation (instead global _priv)
-+struct legion_private {
-+	struct platform_device *platform_device;
-+	// TODO: remove or keep? init?
-+	struct acpi_device *adev;
-+
-+	// Method to access ECRAM
-+	struct ecram ecram;
-+	// Configuration with registers and ECRAM access method
-+	const struct model_config *conf;
-+
-+	// TODO: maybe refactor and keep only local to each function
-+	// last known fan curve
-+	struct fancurve fancurve;
-+	// configured fan curve from user space
-+	struct fancurve fancurve_configured;
-+
-+	// update lock, when partial values of fancurve are changed
-+	struct mutex fancurve_mutex;
-+
-+	//interfaces
-+	struct dentry *debugfs_dir;
-+	struct device *hwmon_dev;
-+	struct platform_profile_handler platform_profile_handler;
-+
-+	struct light kbd_bl;
-+	struct light ylogo_light;
-+	struct light iport_light;
-+
-+	// TODO: remove?
-+	bool loaded;
-+
-+	// TODO: remove, only for reverse enginnering
-+	struct ecram_memoryio ec_memoryio;
-+};
-+
-+// shared between different drivers: WMI, platform and protected by mutex
-+static struct legion_private *legion_shared;
-+static struct legion_private _priv;
-+static DEFINE_MUTEX(legion_shared_mutex);
-+
-+static int legion_shared_init(struct legion_private *priv)
-+{
-+	int ret;
-+
-+	mutex_lock(&legion_shared_mutex);
-+
-+	if (!legion_shared) {
-+		legion_shared = priv;
-+		mutex_init(&legion_shared->fancurve_mutex);
-+		ret = 0;
-+	} else {
-+		pr_warn("Found multiple platform devices\n");
-+		ret = -EINVAL;
-+	}
-+
-+	priv->loaded = true;
-+	mutex_unlock(&legion_shared_mutex);
-+
-+	return ret;
-+}
-+
-+static void legion_shared_exit(struct legion_private *priv)
-+{
-+	pr_info("Unloading legion shared\n");
-+	mutex_lock(&legion_shared_mutex);
-+
-+	if (legion_shared == priv)
-+		legion_shared = NULL;
-+
-+	mutex_unlock(&legion_shared_mutex);
-+	pr_info("Unloading legion shared done\n");
-+}
-+
-+static int get_simple_wmi_attribute(struct legion_private *priv,
-+				    const char *guid, u8 instance,
-+				    u32 method_id, bool invert,
-+				    unsigned long scale, unsigned long *value)
-+{
-+	unsigned long state = 0;
-+	int err;
-+
-+	if (scale == 0) {
-+		pr_info("Scale cannot be 0\n");
-+		return -EINVAL;
-+	}
-+	err = wmi_exec_noarg_int(guid, instance, method_id, &state);
-+	if (err)
-+		return -EINVAL;
-+
-+	// TODO: remove later
-+	pr_info("%swith raw value: %ld\n", __func__, state);
-+
-+	state = state * scale;
-+
-+	if (invert)
-+		state = !state;
-+	*value = state;
-+	return 0;
-+}
-+
-+static int get_simple_wmi_attribute_bool(struct legion_private *priv,
-+					 const char *guid, u8 instance,
-+					 u32 method_id, bool invert,
-+					 unsigned long scale, bool *value)
-+{
-+	unsigned long int_val = *value;
-+	int err = get_simple_wmi_attribute(priv, guid, instance, method_id,
-+					   invert, scale, &int_val);
-+	*value = int_val;
-+	return err;
-+}
-+
-+static int set_simple_wmi_attribute(struct legion_private *priv,
-+				    const char *guid, u8 instance,
-+				    u32 method_id, bool invert, int scale,
-+				    int state)
-+{
-+	int err;
-+	u8 in_param;
-+
-+	if (scale == 0) {
-+		pr_info("Scale cannot be 0\n");
-+		return -EINVAL;
-+	}
-+
-+	if (invert)
-+		state = !state;
-+
-+	in_param = state / scale;
-+
-+	err = wmi_exec_arg(guid, instance, method_id, &in_param,
-+			   sizeof(in_param));
-+	return err;
-+}
-+
-+/* ============================= */
-+/* Sensor value reading/writing */
-+/* ============================= */
-+
-+static int ec_read_sensor_values(struct ecram *ecram,
-+				 const struct model_config *model,
-+				 struct sensor_values *values)
-+{
-+	values->fan1_target_rpm =
-+		100 * ecram_read(ecram, model->registers->EXT_FAN1_TARGET_RPM);
-+	values->fan2_target_rpm =
-+		100 * ecram_read(ecram, model->registers->EXT_FAN2_TARGET_RPM);
-+
-+	values->fan1_rpm =
-+		ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) +
-+		(((int)ecram_read(ecram, model->registers->EXT_FAN1_RPM_MSB))
-+		 << 8);
-+	values->fan2_rpm =
-+		ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) +
-+		(((int)ecram_read(ecram, model->registers->EXT_FAN2_RPM_MSB))
-+		 << 8);
-+
-+	values->cpu_temp_celsius =
-+		ecram_read(ecram, model->registers->EXT_CPU_TEMP_INPUT);
-+	values->gpu_temp_celsius =
-+		ecram_read(ecram, model->registers->EXT_GPU_TEMP_INPUT);
-+	values->ic_temp_celsius =
-+		ecram_read(ecram, model->registers->EXT_IC_TEMP_INPUT);
-+
-+	values->cpu_temp_celsius = ecram_read(ecram, 0xC5E6);
-+	values->gpu_temp_celsius = ecram_read(ecram, 0xC5E7);
-+	values->ic_temp_celsius = ecram_read(ecram, 0xC5E8);
-+
-+	return 0;
-+}
-+
-+static ssize_t ec_read_temperature(struct ecram *ecram,
-+				   const struct model_config *model,
-+				   int sensor_id, int *temperature)
-+{
-+	int err = 0;
-+	unsigned long res;
-+
-+	if (sensor_id == 0) {
-+		res = ecram_read(ecram, 0xC5E6);
-+	} else if (sensor_id == 1) {
-+		res = ecram_read(ecram, 0xC5E7);
-+	} else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+	if (!err)
-+		*temperature = res;
-+	return err;
-+}
-+
-+static ssize_t ec_read_fanspeed(struct ecram *ecram,
-+				const struct model_config *model, int fan_id,
-+				int *fanspeed_rpm)
-+{
-+	int err = 0;
-+	unsigned long res;
-+
-+	if (fan_id == 0) {
-+		res = ecram_read(ecram, model->registers->EXT_FAN1_RPM_LSB) +
-+		      (((int)ecram_read(ecram,
-+					model->registers->EXT_FAN1_RPM_MSB))
-+		       << 8);
-+	} else if (fan_id == 1) {
-+		res = ecram_read(ecram, model->registers->EXT_FAN2_RPM_LSB) +
-+		      (((int)ecram_read(ecram,
-+					model->registers->EXT_FAN2_RPM_MSB))
-+		       << 8);
-+	} else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+	if (!err)
-+		*fanspeed_rpm = res;
-+	return err;
-+}
-+
-+// '\_SB.PCI0.LPC0.EC0.FANS
-+#define ACPI_PATH_FAN_SPEED1 "FANS"
-+// '\_SB.PCI0.LPC0.EC0.FA2S
-+#define ACPI_PATH_FAN_SPEED2 "FA2S"
-+
-+static ssize_t acpi_read_fanspeed(struct legion_private *priv, int fan_id,
-+				  int *value)
-+{
-+	int err;
-+	unsigned long acpi_value;
-+	const char *acpi_path;
-+
-+	if (fan_id == 0) {
-+		acpi_path = ACPI_PATH_FAN_SPEED1;
-+	} else if (fan_id == 1) {
-+		acpi_path = ACPI_PATH_FAN_SPEED2;
-+	} else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+	err = eval_int(priv->adev->handle, acpi_path, &acpi_value);
-+	if (!err)
-+		*value = (int)acpi_value * 100;
-+	return err;
-+}
-+
-+// '\_SB.PCI0.LPC0.EC0.CPUT
-+#define ACPI_PATH_CPU_TEMP "CPUT"
-+// '\_SB.PCI0.LPC0.EC0.GPUT
-+#define ACPI_PATH_GPU_TEMP "GPUT"
-+
-+static ssize_t acpi_read_temperature(struct legion_private *priv, int fan_id,
-+				     int *value)
-+{
-+	int err;
-+	unsigned long acpi_value;
-+	const char *acpi_path;
-+
-+	if (fan_id == 0) {
-+		acpi_path = ACPI_PATH_CPU_TEMP;
-+	} else if (fan_id == 1) {
-+		acpi_path = ACPI_PATH_GPU_TEMP;
-+	} else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+	err = eval_int(priv->adev->handle, acpi_path, &acpi_value);
-+	if (!err)
-+		*value = (int)acpi_value;
-+	return err;
-+}
-+
-+// fan_id: 0 or 1
-+static ssize_t wmi_read_fanspeed(int fan_id, int *fanspeed_rpm)
-+{
-+	int err;
-+	unsigned long res;
-+	struct acpi_buffer params;
-+
-+	params.length = 1;
-+	params.pointer = &fan_id;
-+
-+	err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0,
-+			   WMI_METHOD_ID_FAN_GETCURRENTFANSPEED, &params, &res);
-+
-+	if (!err)
-+		*fanspeed_rpm = res;
-+	return err;
-+}
-+
-+//sensor_id: cpu = 0, gpu = 1
-+static ssize_t wmi_read_temperature(int sensor_id, int *temperature)
-+{
-+	int err;
-+	unsigned long res;
-+	struct acpi_buffer params;
-+
-+	if (sensor_id == 0)
-+		sensor_id = 0x03;
-+	else if (sensor_id == 1)
-+		sensor_id = 0x04;
-+	else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+
-+	params.length = 1;
-+	params.pointer = &sensor_id;
-+
-+	err = wmi_exec_int(WMI_GUID_LENOVO_FAN_METHOD, 0,
-+			   WMI_METHOD_ID_FAN_GETCURRENTSENSORTEMPERATURE,
-+			   &params, &res);
-+
-+	if (!err)
-+		*temperature = res;
-+	return err;
-+}
-+
-+// fan_id: 0 or 1
-+static ssize_t wmi_read_fanspeed_gz(int fan_id, int *fanspeed_rpm)
-+{
-+	int err;
-+	u32 method_id;
-+	unsigned long res;
-+
-+	if (fan_id == 0)
-+		method_id = WMI_METHOD_ID_GETFAN1SPEED;
-+	else if (fan_id == 1)
-+		method_id = WMI_METHOD_ID_GETFAN2SPEED;
-+	else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+	err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res);
-+
-+	if (!err)
-+		*fanspeed_rpm = res;
-+	return err;
-+}
-+
-+//sensor_id: cpu = 0, gpu = 1
-+static ssize_t wmi_read_temperature_gz(int sensor_id, int *temperature)
-+{
-+	int err;
-+	u32 method_id;
-+	unsigned long res;
-+
-+	if (sensor_id == 0)
-+		method_id = WMI_METHOD_ID_GETCPUTEMP;
-+	else if (sensor_id == 1)
-+		method_id = WMI_METHOD_ID_GETGPUTEMP;
-+	else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+
-+	err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0, method_id, &res);
-+
-+	if (!err)
-+		*temperature = res;
-+	return err;
-+}
-+
-+// fan_id: 0 or 1
-+static ssize_t wmi_read_fanspeed_other(int fan_id, int *fanspeed_rpm)
-+{
-+	int err;
-+	enum OtherMethodFeature featured_id;
-+	int res;
-+
-+	if (fan_id == 0)
-+		featured_id = OtherMethodFeature_FAN_SPEED_1;
-+	else if (fan_id == 1)
-+		featured_id = OtherMethodFeature_FAN_SPEED_2;
-+	else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+
-+	err = wmi_other_method_get_value(featured_id, &res);
-+
-+	if (!err)
-+		*fanspeed_rpm = res;
-+	return err;
-+}
-+
-+//sensor_id: cpu = 0, gpu = 1
-+static ssize_t wmi_read_temperature_other(int sensor_id, int *temperature)
-+{
-+	int err;
-+	enum OtherMethodFeature featured_id;
-+	int res;
-+
-+	if (sensor_id == 0)
-+		featured_id = OtherMethodFeature_TEMP_CPU;
-+	else if (sensor_id == 1)
-+		featured_id = OtherMethodFeature_TEMP_GPU;
-+	else {
-+		// TODO: use all correct error codes
-+		return -EEXIST;
-+	}
-+
-+	err = wmi_other_method_get_value(featured_id, &res);
-+	if (!err)
-+		*temperature = res;
-+	return err;
-+}
-+
-+static ssize_t read_fanspeed(struct legion_private *priv, int fan_id,
-+			     int *speed_rpm)
-+{
-+	// TODO: use enums or function pointers?
-+	switch (priv->conf->access_method_fanspeed) {
-+	case ACCESS_METHOD_EC:
-+		return ec_read_fanspeed(&priv->ecram, priv->conf, fan_id,
-+					speed_rpm);
-+	case ACCESS_METHOD_ACPI:
-+		return acpi_read_fanspeed(priv, fan_id, speed_rpm);
-+	case ACCESS_METHOD_WMI:
-+		return wmi_read_fanspeed_gz(fan_id, speed_rpm);
-+	case ACCESS_METHOD_WMI2:
-+		return wmi_read_fanspeed(fan_id, speed_rpm);
-+	case ACCESS_METHOD_WMI3:
-+		return wmi_read_fanspeed_other(fan_id, speed_rpm);
-+	default:
-+		pr_info("No access method for fanspeed: %d\n",
-+			priv->conf->access_method_fanspeed);
-+		return -EINVAL;
-+	}
-+}
-+
-+static ssize_t read_temperature(struct legion_private *priv, int sensor_id,
-+				int *temperature)
-+{
-+	// TODO: use enums or function pointers?
-+	switch (priv->conf->access_method_temperature) {
-+	case ACCESS_METHOD_EC:
-+		return ec_read_temperature(&priv->ecram, priv->conf, sensor_id,
-+					   temperature);
-+	case ACCESS_METHOD_ACPI:
-+		return acpi_read_temperature(priv, sensor_id, temperature);
-+	case ACCESS_METHOD_WMI:
-+		return wmi_read_temperature_gz(sensor_id, temperature);
-+	case ACCESS_METHOD_WMI2:
-+		return wmi_read_temperature(sensor_id, temperature);
-+	case ACCESS_METHOD_WMI3:
-+		return wmi_read_temperature_other(sensor_id, temperature);
-+	default:
-+		pr_info("No access method for temperature: %d\n",
-+			priv->conf->access_method_temperature);
-+		return -EINVAL;
-+	}
-+}
-+
-+/* ============================= */
-+/* Fancurve reading/writing      */
-+/* ============================= */
-+
-+/* Fancurve from WMI
-+ * This allows changing fewer parameters.
-+ * It is only available on newer models.
-+ */
-+
-+struct WMIFanTable {
-+	u8 FSTM; //FSMD
-+	u8 FSID;
-+	u32 FSTL; //FSST
-+	u16 FSS0;
-+	u16 FSS1;
-+	u16 FSS2;
-+	u16 FSS3;
-+	u16 FSS4;
-+	u16 FSS5;
-+	u16 FSS6;
-+	u16 FSS7;
-+	u16 FSS8;
-+	u16 FSS9;
-+} __packed;
-+
-+struct WMIFanTableRead {
-+	u32 FSFL;
-+	u32 FSS0;
-+	u32 FSS1;
-+	u32 FSS2;
-+	u32 FSS3;
-+	u32 FSS4;
-+	u32 FSS5;
-+	u32 FSS6;
-+	u32 FSS7;
-+	u32 FSS8;
-+	u32 FSS9;
-+	u32 FSSA;
-+} __packed;
-+
-+static ssize_t wmi_read_fancurve_custom(const struct model_config *model,
-+					struct fancurve *fancurve)
-+{
-+	u8 buffer[88];
-+	int err;
-+
-+	// The output buffer from the ACPI call is 88 bytes and larger
-+	// than the returned object
-+	pr_info("Size of object: %lu\n", sizeof(struct WMIFanTableRead));
-+	err = wmi_exec_noarg_ints(WMI_GUID_LENOVO_FAN_METHOD, 0,
-+				  WMI_METHOD_ID_FAN_GET_TABLE, buffer,
-+				  sizeof(buffer));
-+	print_hex_dump(KERN_INFO, "legion_laptop fan table wmi buffer",
-+		       DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer),
-+		       true);
-+	if (!err) {
-+		struct WMIFanTableRead *fantable =
-+			(struct WMIFanTableRead *)&buffer[0];
-+		fancurve->current_point_i = 0;
-+		fancurve->size = 10;
-+		fancurve->points[0].rpm1_raw = fantable->FSS0;
-+		fancurve->points[1].rpm1_raw = fantable->FSS1;
-+		fancurve->points[2].rpm1_raw = fantable->FSS2;
-+		fancurve->points[3].rpm1_raw = fantable->FSS3;
-+		fancurve->points[4].rpm1_raw = fantable->FSS4;
-+		fancurve->points[5].rpm1_raw = fantable->FSS5;
-+		fancurve->points[6].rpm1_raw = fantable->FSS6;
-+		fancurve->points[7].rpm1_raw = fantable->FSS7;
-+		fancurve->points[8].rpm1_raw = fantable->FSS8;
-+		fancurve->points[9].rpm1_raw = fantable->FSS9;
-+		//fancurve->points[10].rpm1_raw = fantable->FSSA;
-+	}
-+	return err;
-+}
-+
-+static ssize_t wmi_write_fancurve_custom(const struct model_config *model,
-+					 const struct fancurve *fancurve)
-+{
-+	u8 buffer[0x20];
-+	int err;
-+
-+	// The buffer is read like this in ACPI firmware
-+	//
-+	// CreateByteField (Arg2, Zero, FSTM)
-+	// CreateByteField (Arg2, One, FSID)
-+	// CreateDWordField (Arg2, 0x02, FSTL)
-+	// CreateByteField (Arg2, 0x06, FSS0)
-+	// CreateByteField (Arg2, 0x08, FSS1)
-+	// CreateByteField (Arg2, 0x0A, FSS2)
-+	// CreateByteField (Arg2, 0x0C, FSS3)
-+	// CreateByteField (Arg2, 0x0E, FSS4)
-+	// CreateByteField (Arg2, 0x10, FSS5)
-+	// CreateByteField (Arg2, 0x12, FSS6)
-+	// CreateByteField (Arg2, 0x14, FSS7)
-+	// CreateByteField (Arg2, 0x16, FSS8)
-+	// CreateByteField (Arg2, 0x18, FSS9)
-+
-+	memset(buffer, 0, sizeof(buffer));
-+	buffer[0x06] = fancurve->points[0].rpm1_raw;
-+	buffer[0x08] = fancurve->points[1].rpm1_raw;
-+	buffer[0x0A] = fancurve->points[2].rpm1_raw;
-+	buffer[0x0C] = fancurve->points[3].rpm1_raw;
-+	buffer[0x0E] = fancurve->points[4].rpm1_raw;
-+	buffer[0x10] = fancurve->points[5].rpm1_raw;
-+	buffer[0x12] = fancurve->points[6].rpm1_raw;
-+	buffer[0x14] = fancurve->points[7].rpm1_raw;
-+	buffer[0x16] = fancurve->points[8].rpm1_raw;
-+	buffer[0x18] = fancurve->points[9].rpm1_raw;
-+
-+	print_hex_dump(KERN_INFO, "legion_laptop fan table wmi write buffer",
-+		       DUMP_PREFIX_ADDRESS, 16, 1, buffer, sizeof(buffer),
-+		       true);
-+	err = wmi_exec_arg(WMI_GUID_LENOVO_FAN_METHOD, 0,
-+			   WMI_METHOD_ID_FAN_SET_TABLE, buffer, sizeof(buffer));
-+	return err;
-+}
-+
-+/* Read the fan curve from the EC.
-+ *
-+ * In newer models (>=2022) there is an ACPI/WMI to read fan curve as
-+ * a whole. So read/write fan table as a whole to use the
-+ * same interface for both cases.
-+ *
-+ * It reads all points from EC memory, even if stored fancurve is smaller, so
-+ * it can contain 0 entries.
-+ */
-+static int ec_read_fancurve_legion(struct ecram *ecram,
-+				   const struct model_config *model,
-+				   struct fancurve *fancurve)
-+{
-+	size_t i = 0;
-+
-+	for (i = 0; i < MAXFANCURVESIZE; ++i) {
-+		struct fancurve_point *point = &fancurve->points[i];
-+
-+		point->rpm1_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN1_BASE + i);
-+		point->rpm2_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN2_BASE + i);
-+
-+		point->accel = ecram_read(
-+			ecram, model->registers->EXT_FAN_ACC_BASE + i);
-+		point->decel = ecram_read(
-+			ecram, model->registers->EXT_FAN_DEC_BASE + i);
-+		point->cpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_CPU_TEMP + i);
-+		point->cpu_min_temp_celsius = ecram_read(
-+			ecram, model->registers->EXT_CPU_TEMP_HYST + i);
-+		point->gpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_GPU_TEMP + i);
-+		point->gpu_min_temp_celsius = ecram_read(
-+			ecram, model->registers->EXT_GPU_TEMP_HYST + i);
-+		point->ic_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_VRM_TEMP + i);
-+		point->ic_min_temp_celsius = ecram_read(
-+			ecram, model->registers->EXT_VRM_TEMP_HYST + i);
-+	}
-+
-+	// Do not trust that hardware; It might suddenly report
-+	// a larger size, so clamp it.
-+	fancurve->size =
-+		ecram_read(ecram, model->registers->EXT_FAN_POINTS_SIZE);
-+	fancurve->size =
-+		min(fancurve->size, (typeof(fancurve->size))(MAXFANCURVESIZE));
-+	fancurve->current_point_i =
-+		ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT);
-+	fancurve->current_point_i =
-+		min(fancurve->current_point_i, fancurve->size);
-+	return 0;
-+}
-+
-+static int ec_write_fancurve_legion(struct ecram *ecram,
-+				    const struct model_config *model,
-+				    const struct fancurve *fancurve,
-+				    bool write_size)
-+{
-+	size_t i;
-+
-+	// Reset fan update counters (try to avoid any race conditions)
-+	ecram_write(ecram, 0xC5FE, 0);
-+	ecram_write(ecram, 0xC5FF, 0);
-+	for (i = 0; i < MAXFANCURVESIZE; ++i) {
-+		// Entries for points larger than fancurve size should be cleared
-+		// to 0
-+		const struct fancurve_point *point =
-+			i < fancurve->size ? &fancurve->points[i] :
-+					     &fancurve_point_zero;
-+
-+		ecram_write(ecram, model->registers->EXT_FAN1_BASE + i,
-+			    point->rpm1_raw);
-+		ecram_write(ecram, model->registers->EXT_FAN2_BASE + i,
-+			    point->rpm2_raw);
-+
-+		ecram_write(ecram, model->registers->EXT_FAN_ACC_BASE + i,
-+			    point->accel);
-+		ecram_write(ecram, model->registers->EXT_FAN_DEC_BASE + i,
-+			    point->decel);
-+
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP + i,
-+			    point->cpu_max_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i,
-+			    point->cpu_min_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP + i,
-+			    point->gpu_max_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i,
-+			    point->gpu_min_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_VRM_TEMP + i,
-+			    point->ic_max_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_VRM_TEMP_HYST + i,
-+			    point->ic_min_temp_celsius);
-+	}
-+
-+	if (write_size) {
-+		ecram_write(ecram, model->registers->EXT_FAN_POINTS_SIZE,
-+			    fancurve->size);
-+	}
-+
-+	// Reset current fan level to 0, so algorithm in EC
-+	// selects fan curve point again and resetting hysterisis
-+	// effects
-+	ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0);
-+
-+	// Reset internal fan levels
-+	ecram_write(ecram, 0xC634, 0); // CPU
-+	ecram_write(ecram, 0xC635, 0); // GPU
-+	ecram_write(ecram, 0xC636, 0); // SENSOR
-+
-+	return 0;
-+}
-+
-+#define FANCURVESIZE_IDEAPDAD 8
-+
-+static int ec_read_fancurve_ideapad(struct ecram *ecram,
-+				    const struct model_config *model,
-+				    struct fancurve *fancurve)
-+{
-+	size_t i = 0;
-+
-+	for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) {
-+		struct fancurve_point *point = &fancurve->points[i];
-+
-+		point->rpm1_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN1_BASE + i);
-+		point->rpm2_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN2_BASE + i);
-+
-+		point->accel = 0;
-+		point->decel = 0;
-+		point->cpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_CPU_TEMP + i);
-+		point->cpu_min_temp_celsius = ecram_read(
-+			ecram, model->registers->EXT_CPU_TEMP_HYST + i);
-+		point->gpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_GPU_TEMP + i);
-+		point->gpu_min_temp_celsius = ecram_read(
-+			ecram, model->registers->EXT_GPU_TEMP_HYST + i);
-+		point->ic_max_temp_celsius = 0;
-+		point->ic_min_temp_celsius = 0;
-+	}
-+
-+	// Do not trust that hardware; It might suddenly report
-+	// a larger size, so clamp it.
-+	fancurve->size = FANCURVESIZE_IDEAPDAD;
-+	fancurve->current_point_i =
-+		ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT);
-+	fancurve->current_point_i =
-+		min(fancurve->current_point_i, fancurve->size);
-+	return 0;
-+}
-+
-+static int ec_write_fancurve_ideapad(struct ecram *ecram,
-+				     const struct model_config *model,
-+				     const struct fancurve *fancurve)
-+{
-+	size_t i;
-+	int valr1;
-+	int valr2;
-+
-+	// add this later: maybe other addresses needed
-+	// therefore, fan curve might not be effective immediately but
-+	// only after temp change
-+	// Reset fan update counters (try to avoid any race conditions)
-+	ecram_write(ecram, 0xC5FE, 0);
-+	ecram_write(ecram, 0xC5FF, 0);
-+	for (i = 0; i < FANCURVESIZE_IDEAPDAD; ++i) {
-+		const struct fancurve_point *point = &fancurve->points[i];
-+
-+		ecram_write(ecram, model->registers->EXT_FAN1_BASE + i,
-+			    point->rpm1_raw);
-+		valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + i);
-+		ecram_write(ecram, model->registers->EXT_FAN2_BASE + i,
-+			    point->rpm2_raw);
-+		valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + i);
-+		pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw,
-+			valr1);
-+		pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw,
-+			valr2);
-+
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP + i,
-+			    point->cpu_max_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP + 8 + i,
-+			    point->cpu_max_temp_celsius);
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + i,
-+			    point->cpu_min_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP_HYST + 8 + i,
-+			    point->cpu_min_temp_celsius);
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP + i,
-+			    point->gpu_max_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP + 8 + i,
-+			    point->gpu_max_temp_celsius);
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + i,
-+			    point->gpu_min_temp_celsius);
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP_HYST + 8 + i,
-+			    point->gpu_min_temp_celsius);
-+	}
-+
-+	// add this later: maybe other addresses needed
-+	// therefore, fan curve might not be effective immediately but
-+	// only after temp change
-+	// // Reset current fan level to 0, so algorithm in EC
-+	// // selects fan curve point again and resetting hysterisis
-+	// // effects
-+	// ecram_write(ecram, model->registers->EXT_FAN_CUR_POINT, 0);
-+
-+	// // Reset internal fan levels
-+	// ecram_write(ecram, 0xC634, 0); // CPU
-+	// ecram_write(ecram, 0xC635, 0); // GPU
-+	// ecram_write(ecram, 0xC636, 0); // SENSOR
-+
-+	return 0;
-+}
-+
-+#define FANCURVESIZE_LOQ 10
-+
-+static int ec_read_fancurve_loq(struct ecram *ecram,
-+				    const struct model_config *model,
-+				    struct fancurve *fancurve)
-+{
-+	size_t i = 0;
-+	size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8}
-+
-+	for (i = 0; i < FANCURVESIZE_LOQ; ++i) {
-+		struct fancurve_point *point = &fancurve->points[i];
-+
-+		point->rpm1_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset));
-+		point->rpm2_raw =
-+			ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset));
-+
-+		point->accel = 0;
-+		point->decel = 0;
-+		point->cpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset));
-+		point->gpu_max_temp_celsius =
-+			ecram_read(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset));
-+		point->cpu_min_temp_celsius = 0;
-+		point->gpu_min_temp_celsius = 0;
-+		point->ic_max_temp_celsius = 0;
-+		point->ic_min_temp_celsius = 0;
-+	}
-+
-+	fancurve->size = FANCURVESIZE_LOQ;
-+	fancurve->current_point_i =
-+		ecram_read(ecram, model->registers->EXT_FAN_CUR_POINT);
-+	fancurve->current_point_i =
-+		min(fancurve->current_point_i, fancurve->size);
-+	return 0;
-+}
-+
-+static int ec_write_fancurve_loq(struct ecram *ecram,
-+				     const struct model_config *model,
-+				     const struct fancurve *fancurve)
-+{
-+	size_t i;
-+	int valr1;
-+	int valr2;
-+	size_t struct_offset = 3; // {cpu_temp: u8, rpm: u8, gpu_temp?: u8}
-+
-+	for (i = 0; i < FANCURVESIZE_LOQ; ++i) {
-+		const struct fancurve_point *point = &fancurve->points[i];
-+
-+		ecram_write(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset),
-+			    point->rpm1_raw);
-+		valr1 = ecram_read(ecram, model->registers->EXT_FAN1_BASE + (i * struct_offset));
-+		ecram_write(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset),
-+			    point->rpm2_raw);
-+		valr2 = ecram_read(ecram, model->registers->EXT_FAN2_BASE + (i * struct_offset));
-+		pr_info("Writing fan1: %d; reading fan1: %d\n", point->rpm1_raw,
-+			valr1);
-+		pr_info("Writing fan2: %d; reading fan2: %d\n", point->rpm2_raw,
-+			valr2);
-+
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_CPU_TEMP + (i * struct_offset),
-+			    point->cpu_max_temp_celsius);
-+		// write to memory and repeat 8 bytes later again
-+		ecram_write(ecram, model->registers->EXT_GPU_TEMP + (i * struct_offset),
-+			    point->gpu_max_temp_celsius);
-+	}
-+
-+	return 0;
-+}
-+
-+static int read_fancurve(struct legion_private *priv, struct fancurve *fancurve)
-+{
-+	// TODO: use enums or function pointers?
-+	switch (priv->conf->access_method_fancurve) {
-+	case ACCESS_METHOD_EC:
-+		return ec_read_fancurve_legion(&priv->ecram, priv->conf,
-+					       fancurve);
-+	case ACCESS_METHOD_EC2:
-+		return ec_read_fancurve_ideapad(&priv->ecram, priv->conf,
-+						fancurve);
-+	case ACCESS_METHOD_EC3:
-+		return ec_read_fancurve_loq(&priv->ecram, priv->conf,
-+						fancurve);
-+	case ACCESS_METHOD_WMI3:
-+		return wmi_read_fancurve_custom(priv->conf, fancurve);
-+	default:
-+		pr_info("No access method for fancurve: %d\n",
-+			priv->conf->access_method_fancurve);
-+		return -EINVAL;
-+	}
-+}
-+
-+static int write_fancurve(struct legion_private *priv,
-+			  const struct fancurve *fancurve, bool write_size)
-+{
-+	// TODO: use enums or function pointers?
-+	switch (priv->conf->access_method_fancurve) {
-+	case ACCESS_METHOD_EC:
-+		return ec_write_fancurve_legion(&priv->ecram, priv->conf,
-+						fancurve, write_size);
-+	case ACCESS_METHOD_EC2:
-+		return ec_write_fancurve_ideapad(&priv->ecram, priv->conf,
-+						 fancurve);
-+	case ACCESS_METHOD_EC3:
-+		return ec_write_fancurve_loq(&priv->ecram, priv->conf,
-+						 fancurve);
-+	case ACCESS_METHOD_WMI3:
-+		return wmi_write_fancurve_custom(priv->conf, fancurve);
-+	default:
-+		pr_info("No access method for fancurve: %d\n",
-+			priv->conf->access_method_fancurve);
-+		return -EINVAL;
-+	}
-+}
-+
-+#define MINIFANCUVE_ON_COOL_ON 0x04
-+#define MINIFANCUVE_ON_COOL_OFF 0xA0
-+
-+static int ec_read_minifancurve(struct ecram *ecram,
-+				const struct model_config *model, bool *state)
-+{
-+	int value =
-+		ecram_read(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL);
-+
-+	switch (value) {
-+	case MINIFANCUVE_ON_COOL_ON:
-+		*state = true;
-+		break;
-+	case MINIFANCUVE_ON_COOL_OFF:
-+		*state = false;
-+		break;
-+	default:
-+		pr_info("Unexpected value in MINIFANCURVE register: %d\n",
-+			value);
-+		return -1;
-+	}
-+	return 0;
-+}
-+
-+static ssize_t ec_write_minifancurve(struct ecram *ecram,
-+				     const struct model_config *model,
-+				     bool state)
-+{
-+	u8 val = state ? MINIFANCUVE_ON_COOL_ON : MINIFANCUVE_ON_COOL_OFF;
-+
-+	ecram_write(ecram, model->registers->EXT_MINIFANCURVE_ON_COOL, val);
-+	return 0;
-+}
-+
-+#define EC_LOCKFANCONTROLLER_ON 8
-+#define EC_LOCKFANCONTROLLER_OFF 0
-+
-+static ssize_t ec_write_lockfancontroller(struct ecram *ecram,
-+					  const struct model_config *model,
-+					  bool state)
-+{
-+	u8 val = state ? EC_LOCKFANCONTROLLER_ON : EC_LOCKFANCONTROLLER_OFF;
-+
-+	ecram_write(ecram, model->registers->EXT_LOCKFANCONTROLLER, val);
-+	return 0;
-+}
-+
-+static int ec_read_lockfancontroller(struct ecram *ecram,
-+				     const struct model_config *model,
-+				     bool *state)
-+{
-+	int value = ecram_read(ecram, model->registers->EXT_LOCKFANCONTROLLER);
-+
-+	switch (value) {
-+	case EC_LOCKFANCONTROLLER_ON:
-+		*state = true;
-+		break;
-+	case EC_LOCKFANCONTROLLER_OFF:
-+		*state = false;
-+		break;
-+	default:
-+		pr_info("Unexpected value in lockfanspeed register: %d\n",
-+			value);
-+		return -1;
-+	}
-+	return 0;
-+}
-+
-+#define EC_FANFULLSPEED_ON 0x40
-+#define EC_FANFULLSPEED_OFF 0x00
-+
-+static int ec_read_fanfullspeed(struct ecram *ecram,
-+				const struct model_config *model, bool *state)
-+{
-+	int value = ecram_read(ecram, model->registers->EXT_MAXIMUMFANSPEED);
-+
-+	switch (value) {
-+	case EC_FANFULLSPEED_ON:
-+		*state = true;
-+		break;
-+	case EC_FANFULLSPEED_OFF:
-+		*state = false;
-+		break;
-+	default:
-+		pr_info("Unexpected value in maximumfanspeed register: %d\n",
-+			value);
-+		return -1;
-+	}
-+	return 0;
-+}
-+
-+static ssize_t ec_write_fanfullspeed(struct ecram *ecram,
-+				     const struct model_config *model,
-+				     bool state)
-+{
-+	u8 val = state ? EC_FANFULLSPEED_ON : EC_FANFULLSPEED_OFF;
-+
-+	ecram_write(ecram, model->registers->EXT_MAXIMUMFANSPEED, val);
-+	return 0;
-+}
-+
-+static ssize_t wmi_read_fanfullspeed(struct legion_private *priv, bool *state)
-+{
-+	return get_simple_wmi_attribute_bool(priv, WMI_GUID_LENOVO_FAN_METHOD,
-+					     0, WMI_METHOD_ID_FAN_GET_FULLSPEED,
-+					     false, 1, state);
-+}
-+
-+static ssize_t wmi_write_fanfullspeed(struct legion_private *priv, bool state)
-+{
-+	return set_simple_wmi_attribute(priv, WMI_GUID_LENOVO_FAN_METHOD, 0,
-+					WMI_METHOD_ID_FAN_SET_FULLSPEED, false,
-+					1, state);
-+}
-+
-+static ssize_t read_fanfullspeed(struct legion_private *priv, bool *state)
-+{
-+	// TODO: use enums or function pointers?
-+	switch (priv->conf->access_method_fanfullspeed) {
-+	case ACCESS_METHOD_EC:
-+		return ec_read_fanfullspeed(&priv->ecram, priv->conf, state);
-+	case ACCESS_METHOD_WMI:
-+		return wmi_read_fanfullspeed(priv, state);
-+	default:
-+		pr_info("No access method for fan full speed: %d\n",
-+			priv->conf->access_method_fanfullspeed);
-+		return -EINVAL;
-+	}
-+}
-+
-+static ssize_t write_fanfullspeed(struct legion_private *priv, bool state)
-+{
-+	ssize_t res;
-+
-+	switch (priv->conf->access_method_fanfullspeed) {
-+	case ACCESS_METHOD_EC:
-+		res = ec_write_fanfullspeed(&priv->ecram, priv->conf, state);
-+		return res;
-+	case ACCESS_METHOD_WMI:
-+		return wmi_write_fanfullspeed(priv, state);
-+	default:
-+		pr_info("No access method for fan full speed: %d\n",
-+			priv->conf->access_method_fanfullspeed);
-+		return -EINVAL;
-+	}
-+}
-+
-+/* ============================= */
-+/* Power mode reading/writing    */
-+/* ============================= */
-+
-+enum legion_ec_powermode {
-+	LEGION_EC_POWERMODE_QUIET = 2,
-+	LEGION_EC_POWERMODE_BALANCED = 0,
-+	LEGION_EC_POWERMODE_PERFORMANCE = 1,
-+	LEGION_EC_POWERMODE_CUSTOM = 3
-+};
-+
-+enum legion_wmi_powermode {
-+	LEGION_WMI_POWERMODE_QUIET = 1,
-+	LEGION_WMI_POWERMODE_BALANCED = 2,
-+	LEGION_WMI_POWERMODE_PERFORMANCE = 3,
-+	LEGION_WMI_POWERMODE_CUSTOM = 255
-+};
-+
-+enum legion_wmi_powermode ec_to_wmi_powermode(int ec_mode)
-+{
-+	switch (ec_mode) {
-+	case LEGION_EC_POWERMODE_QUIET:
-+		return LEGION_WMI_POWERMODE_QUIET;
-+	case LEGION_EC_POWERMODE_BALANCED:
-+		return LEGION_WMI_POWERMODE_BALANCED;
-+	case LEGION_EC_POWERMODE_PERFORMANCE:
-+		return LEGION_WMI_POWERMODE_PERFORMANCE;
-+	case LEGION_EC_POWERMODE_CUSTOM:
-+		return LEGION_WMI_POWERMODE_CUSTOM;
-+	default:
-+		return LEGION_WMI_POWERMODE_BALANCED;
-+	}
-+}
-+
-+enum legion_ec_powermode wmi_to_ec_powermode(enum legion_wmi_powermode wmi_mode)
-+{
-+	switch (wmi_mode) {
-+	case LEGION_WMI_POWERMODE_QUIET:
-+		return LEGION_EC_POWERMODE_QUIET;
-+	case LEGION_WMI_POWERMODE_BALANCED:
-+		return LEGION_EC_POWERMODE_BALANCED;
-+	case LEGION_WMI_POWERMODE_PERFORMANCE:
-+		return LEGION_EC_POWERMODE_PERFORMANCE;
-+	case LEGION_WMI_POWERMODE_CUSTOM:
-+		return LEGION_EC_POWERMODE_CUSTOM;
-+	default:
-+		return LEGION_EC_POWERMODE_BALANCED;
-+	}
-+}
-+
-+static ssize_t ec_read_powermode(struct legion_private *priv, int *powermode)
-+{
-+	*powermode =
-+		ecram_read(&priv->ecram, priv->conf->registers->EXT_POWERMODE);
-+	return 0;
-+}
-+
-+static ssize_t ec_write_powermode(struct legion_private *priv, u8 value)
-+{
-+	if (!((value >= 0 && value <= 2) || value == 255)) {
-+		pr_info("Unexpected power mode value ignored: %d\n", value);
-+		return -ENOMEM;
-+	}
-+	ecram_write(&priv->ecram, priv->conf->registers->EXT_POWERMODE, value);
-+	return 0;
-+}
-+
-+static ssize_t acpi_read_powermode(struct legion_private *priv, int *powermode)
-+{
-+	unsigned long acpi_powermode;
-+	int err;
-+
-+	// spmo method not always available
-+	// \_SB.PCI0.LPC0.EC0.SPMO
-+	err = eval_spmo(priv->adev->handle, &acpi_powermode);
-+	*powermode = (int)acpi_powermode;
-+	return err;
-+}
-+
-+static ssize_t wmi_read_powermode(int *powermode)
-+{
-+	int err;
-+	unsigned long res;
-+
-+	err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0,
-+				 WMI_METHOD_ID_GETSMARTFANMODE, &res);
-+
-+	if (!err)
-+		*powermode = res;
-+	return err;
-+}
-+
-+static ssize_t wmi_write_powermode(u8 value)
-+{
-+	if (!((value >= LEGION_WMI_POWERMODE_QUIET &&
-+	       value <= LEGION_WMI_POWERMODE_PERFORMANCE) ||
-+	      value == LEGION_WMI_POWERMODE_CUSTOM)) {
-+		pr_info("Unexpected power mode value ignored: %d\n", value);
-+		return -ENOMEM;
-+	}
-+	return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0,
-+			    WMI_METHOD_ID_SETSMARTFANMODE, &value,
-+			    sizeof(value));
-+}
-+
-+static ssize_t read_powermode(struct legion_private *priv, int *powermode)
-+{
-+	ssize_t res;
-+
-+	switch (priv->conf->access_method_powermode) {
-+	case ACCESS_METHOD_EC:
-+		res = ec_read_powermode(priv, powermode);
-+		*powermode = ec_to_wmi_powermode(*powermode);
-+		return res;
-+	case ACCESS_METHOD_ACPI:
-+		return acpi_read_powermode(priv, powermode);
-+	case ACCESS_METHOD_WMI:
-+		return wmi_read_powermode(powermode);
-+	default:
-+		pr_info("No access method for powermode: %d\n",
-+			priv->conf->access_method_powermode);
-+		return -EINVAL;
-+	}
-+}
-+
-+static ssize_t write_powermode(struct legion_private *priv,
-+			       enum legion_wmi_powermode value)
-+{
-+	ssize_t res;
-+
-+	//TODO: remove again
-+	pr_info("Set powermode\n");
-+
-+	switch (priv->conf->access_method_powermode) {
-+	case ACCESS_METHOD_EC:
-+		res = ec_write_powermode(priv, wmi_to_ec_powermode(value));
-+		return res;
-+	case ACCESS_METHOD_WMI:
-+		return wmi_write_powermode(value);
-+	default:
-+		pr_info("No access method for powermode: %d\n",
-+			priv->conf->access_method_powermode);
-+		return -EINVAL;
-+	}
-+}
-+
-+/**
-+ * Shortly toggle powermode to a different mode
-+ * and switch back, e.g. to reset fan curve.
-+ */
-+static void toggle_powermode(struct legion_private *priv)
-+{
-+	int old_powermode;
-+	int next_powermode;
-+
-+	read_powermode(priv, &old_powermode);
-+	next_powermode = old_powermode == 0 ? 1 : 0;
-+
-+	write_powermode(priv, next_powermode);
-+	mdelay(1500);
-+	write_powermode(priv, old_powermode);
-+}
-+
-+/* ============================= */
-+/* Charging mode reading/writing */
-+/* ============================- */
-+
-+#define FCT_RAPID_CHARGE_ON 0x07
-+#define FCT_RAPID_CHARGE_OFF 0x08
-+#define RAPID_CHARGE_ON 0x0
-+#define RAPID_CHARGE_OFF 0x1
-+
-+static int acpi_read_rapidcharge(struct acpi_device *adev, bool *state)
-+{
-+	unsigned long result;
-+	int err;
-+
-+	//also works? which one is better?
-+	/*
-+	 * err = eval_qcho(adev->handle, &result);
-+	 * if (err)
-+	 *  return err;
-+	 * state = result;
-+	 * return 0;
-+	 */
-+
-+	err = eval_gbmd(adev->handle, &result);
-+	if (err)
-+		return err;
-+
-+	*state = result & 0x04;
-+	return 0;
-+}
-+
-+static int acpi_write_rapidcharge(struct acpi_device *adev, bool state)
-+{
-+	int err;
-+	unsigned long fct_nr = state > 0 ? FCT_RAPID_CHARGE_ON :
-+					   FCT_RAPID_CHARGE_OFF;
-+
-+	err = exec_sbmc(adev->handle, fct_nr);
-+	pr_info("Set rapidcharge to %d by calling %lu: result: %d\n", state,
-+		fct_nr, err);
-+	return err;
-+}
-+
-+/* ============================= */
-+/* Keyboard backlight read/write */
-+/* ============================= */
-+
-+static ssize_t legion_kbd_bl2_brightness_get(struct legion_private *priv)
-+{
-+	unsigned long state = 0;
-+	int err;
-+
-+	err = wmi_exec_noarg_int(LEGION_WMI_GAMEZONE_GUID, 0,
-+				 WMI_METHOD_ID_GETKEYBOARDLIGHT, &state);
-+	if (err)
-+		return -EINVAL;
-+
-+	return state;
-+}
-+
-+//static int legion_kbd_bl2_brightness_set(struct legion_private *priv,
-+//					 unsigned int brightness)
-+//{
-+//	u8 in_param = brightness;
-+
-+//	return wmi_exec_arg(LEGION_WMI_GAMEZONE_GUID, 0,
-+//			    WMI_METHOD_ID_SETKEYBOARDLIGHT, &in_param,
-+//			    sizeof(in_param));
-+//}
-+
-+//min: 1, max: 3
-+#define LIGHT_ID_KEYBOARD 0x00
-+//min: 0, max: 1
-+#define LIGHT_ID_YLOGO 0x03
-+//min: 1, max: 2
-+#define LIGHT_ID_IOPORT 0x05
-+
-+static int legion_wmi_light_get(struct legion_private *priv, u8 light_id,
-+				unsigned int min_value, unsigned int max_value)
-+{
-+	struct acpi_buffer params;
-+	u8 in;
-+	u8 result[2];
-+	u8 value;
-+	int err;
-+
-+	params.length = 1;
-+	params.pointer = &in;
-+	in = light_id;
-+	err = wmi_exec_ints(LEGION_WMI_KBBACKLIGHT_GUID, 0,
-+			    WMI_METHOD_ID_KBBACKLIGHTGET, &params, result,
-+			    ARRAY_SIZE(result));
-+	if (err) {
-+		pr_info("Error for WMI method call to get brightness\n");
-+		return -EIO;
-+	}
-+
-+	value = result[1];
-+	if (!(value >= min_value && value <= max_value)) {
-+		pr_info("Error WMI call for reading brightness: expected a value between %u and %u, but got %d\n",
-+			min_value, max_value, value);
-+		return -EFAULT;
-+	}
-+
-+	return value - min_value;
-+}
-+
-+static int legion_wmi_light_set(struct legion_private *priv, u8 light_id,
-+				unsigned int min_value, unsigned int max_value,
-+				unsigned int brightness)
-+{
-+	struct acpi_buffer buffer;
-+	u8 in_buffer_param[8];
-+	unsigned long result;
-+	int err;
-+
-+	buffer.length = 3;
-+	buffer.pointer = &in_buffer_param[0];
-+	in_buffer_param[0] = light_id;
-+	in_buffer_param[1] = 0x01;
-+	in_buffer_param[2] =
-+		clamp(brightness + min_value, min_value, max_value);
-+
-+	err = wmi_exec_int(LEGION_WMI_KBBACKLIGHT_GUID, 0,
-+			   WMI_METHOD_ID_KBBACKLIGHTSET, &buffer, &result);
-+	if (err) {
-+		pr_info("Error for WMI method call to set brightness on light: %d\n",
-+			light_id);
-+		return -EIO;
-+	}
-+
-+	return 0;
-+}
-+
-+static int legion_kbd_bl_brightness_get(struct legion_private *priv)
-+{
-+	return legion_wmi_light_get(priv, LIGHT_ID_KEYBOARD, 1, 3);
-+}
-+
-+static int legion_kbd_bl_brightness_set(struct legion_private *priv,
-+					unsigned int brightness)
-+{
-+	return legion_wmi_light_set(priv, LIGHT_ID_KEYBOARD, 1, 3, brightness);
-+}
-+
-+/* =============================  */
-+/* debugfs interface              */
-+/* ============================   */
-+
-+static int debugfs_ecmemory_show(struct seq_file *s, void *unused)
-+{
-+	struct legion_private *priv = s->private;
-+	size_t offset;
-+
-+	for (offset = 0; offset < priv->conf->memoryio_size; ++offset) {
-+		char value = ecram_read(&priv->ecram,
-+					priv->conf->memoryio_physical_ec_start +
-+						offset);
-+
-+		seq_write(s, &value, 1);
-+	}
-+	return 0;
-+}
-+
-+DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemory);
-+
-+static int debugfs_ecmemoryram_show(struct seq_file *s, void *unused)
-+{
-+	struct legion_private *priv = s->private;
-+	size_t offset;
-+	ssize_t err;
-+	u8 value;
-+
-+	for (offset = 0; offset < priv->conf->ramio_size; ++offset) {
-+		err = ecram_memoryio_read(&priv->ec_memoryio, offset, &value);
-+		if (!err)
-+			seq_write(s, &value, 1);
-+		else
-+			return -EACCES;
-+	}
-+	return 0;
-+}
-+
-+DEFINE_SHOW_ATTRIBUTE(debugfs_ecmemoryram);
-+
-+//TODO: make (almost) all methods static
-+
-+static void seq_file_print_with_error(struct seq_file *s, const char *name,
-+				      ssize_t err, int value)
-+{
-+	seq_printf(s, "%s error: %ld\n", name, err);
-+	seq_printf(s, "%s: %d\n", name, value);
-+}
-+
-+static int debugfs_fancurve_show(struct seq_file *s, void *unused)
-+{
-+	struct legion_private *priv = s->private;
-+	bool is_minifancurve;
-+	bool is_lockfancontroller;
-+	bool is_maximumfanspeed;
-+	bool is_rapidcharge = false;
-+	int powermode;
-+	int temperature;
-+	int fanspeed;
-+	int err;
-+	unsigned long cfg;
-+	struct fancurve wmi_fancurve;
-+	//int kb_backlight;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+
-+	seq_printf(s, "EC Chip ID: %x\n", read_ec_id(&priv->ecram, priv->conf));
-+	seq_printf(s, "EC Chip Version: %x\n",
-+		   read_ec_version(&priv->ecram, priv->conf));
-+	seq_printf(s, "legion_laptop features: %s\n", LEGIONFEATURES);
-+	seq_printf(s, "legion_laptop ec_readonly: %d\n", ec_readonly);
-+
-+	err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg);
-+	seq_printf(s, "ACPI CFG error: %d\n", err);
-+	seq_printf(s, "ACPI CFG: %lu\n", cfg);
-+
-+	seq_printf(s, "temperature access method: %d\n",
-+		   priv->conf->access_method_temperature);
-+	err = read_temperature(priv, 0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature", err, temperature);
-+	err = ec_read_temperature(&priv->ecram, priv->conf, 0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature EC", err, temperature);
-+	err = acpi_read_temperature(priv, 0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature ACPI", err, temperature);
-+	err = wmi_read_temperature_gz(0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature WMI", err, temperature);
-+	err = wmi_read_temperature(0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature WMI2", err, temperature);
-+	err = wmi_read_temperature_other(0, &temperature);
-+	seq_file_print_with_error(s, "CPU temperature WMI3", err, temperature);
-+
-+	err = read_temperature(priv, 1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature", err, temperature);
-+	err = ec_read_temperature(&priv->ecram, priv->conf, 1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature EC", err, temperature);
-+	err = acpi_read_temperature(priv, 1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature ACPI", err, temperature);
-+	err = wmi_read_temperature_gz(1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature WMI", err, temperature);
-+	err = wmi_read_temperature(1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature WMI2", err, temperature);
-+	err = wmi_read_temperature_other(1, &temperature);
-+	seq_file_print_with_error(s, "GPU temperature WMI3", err, temperature);
-+
-+	seq_printf(s, "fan speed access method: %d\n",
-+		   priv->conf->access_method_fanspeed);
-+	err = read_fanspeed(priv, 0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed", err, fanspeed);
-+	err = ec_read_fanspeed(&priv->ecram, priv->conf, 0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed EC", err, fanspeed);
-+	err = acpi_read_fanspeed(priv, 0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed ACPI", err, fanspeed);
-+	err = wmi_read_fanspeed_gz(0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed WMI", err, fanspeed);
-+	err = wmi_read_fanspeed(0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed WMI2", err, fanspeed);
-+	err = wmi_read_fanspeed_other(0, &fanspeed);
-+	seq_file_print_with_error(s, "1 fanspeed WMI3", err, fanspeed);
-+
-+	err = read_fanspeed(priv, 1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed", err, fanspeed);
-+	err = ec_read_fanspeed(&priv->ecram, priv->conf, 1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed EC", err, fanspeed);
-+	err = acpi_read_fanspeed(priv, 1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed ACPI", err, fanspeed);
-+	err = wmi_read_fanspeed_gz(1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed WMI", err, fanspeed);
-+	err = wmi_read_fanspeed(1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed WMI2", err, fanspeed);
-+	err = wmi_read_fanspeed_other(1, &fanspeed);
-+	seq_file_print_with_error(s, "2 fanspeed WMI3", err, fanspeed);
-+
-+	seq_printf(s, "powermode access method: %d\n",
-+		   priv->conf->access_method_powermode);
-+	err = read_powermode(priv, &powermode);
-+	seq_file_print_with_error(s, "powermode", err, powermode);
-+	err = ec_read_powermode(priv, &powermode);
-+	seq_file_print_with_error(s, "powermode EC", err, powermode);
-+	err = acpi_read_powermode(priv, &powermode);
-+	seq_file_print_with_error(s, "powermode ACPI", err, powermode);
-+	err = wmi_read_powermode(&powermode);
-+	seq_file_print_with_error(s, "powermode WMI", err, powermode);
-+	seq_printf(s, "has custom powermode: %d\n",
-+		   priv->conf->has_custom_powermode);
-+
-+	err = acpi_read_rapidcharge(priv->adev, &is_rapidcharge);
-+	seq_printf(s, "ACPI rapidcharge error: %d\n", err);
-+	seq_printf(s, "ACPI rapidcharge: %d\n", is_rapidcharge);
-+
-+	seq_printf(s, "WMI backlight 2 state: %ld\n",
-+		   legion_kbd_bl2_brightness_get(priv));
-+	seq_printf(s, "WMI backlight 3 state: %d\n",
-+		   legion_kbd_bl_brightness_get(priv));
-+
-+	seq_printf(s, "WMI light IO port: %d\n",
-+		   legion_wmi_light_get(priv, LIGHT_ID_IOPORT, 0, 4));
-+
-+	seq_printf(s, "WMI light Y logo/lid: %d\n",
-+		   legion_wmi_light_get(priv, LIGHT_ID_YLOGO, 0, 4));
-+
-+	seq_printf(s, "EC minifancurve feature enabled: %d\n",
-+		   priv->conf->has_minifancurve);
-+	err = ec_read_minifancurve(&priv->ecram, priv->conf, &is_minifancurve);
-+	seq_printf(s, "EC minifancurve on cool: %s\n",
-+		   err ? "error" : (is_minifancurve ? "true" : "false"));
-+
-+	err = ec_read_lockfancontroller(&priv->ecram, priv->conf,
-+					&is_lockfancontroller);
-+	seq_printf(s, "EC lockfancontroller error: %d\n", err);
-+	seq_printf(s, "EC lockfancontroller: %s\n",
-+		   err ? "error" : (is_lockfancontroller ? "true" : "false"));
-+
-+	err = read_fanfullspeed(priv, &is_maximumfanspeed);
-+	seq_file_print_with_error(s, "fanfullspeed", err, is_maximumfanspeed);
-+
-+	err = ec_read_fanfullspeed(&priv->ecram, priv->conf,
-+				   &is_maximumfanspeed);
-+	seq_file_print_with_error(s, "fanfullspeed EC", err,
-+				  is_maximumfanspeed);
-+
-+	read_fancurve(priv, &priv->fancurve);
-+	seq_printf(s, "EC fan curve current point id: %ld\n",
-+		   priv->fancurve.current_point_i);
-+	seq_printf(s, "EC fan curve points size: %ld\n", priv->fancurve.size);
-+
-+	seq_puts(s, "Current fan curve in hardware:\n");
-+	fancurve_print_seqfile(&priv->fancurve, s);
-+	seq_puts(s, "=====================\n");
-+	mutex_unlock(&priv->fancurve_mutex);
-+
-+	seq_puts(s, "Current fan curve in hardware (WMI; might be empty)\n");
-+	wmi_fancurve.size = 0;
-+	err = wmi_read_fancurve_custom(priv->conf, &wmi_fancurve);
-+	fancurve_print_seqfile(&wmi_fancurve, s);
-+	seq_puts(s, "=====================\n");
-+	return 0;
-+}
-+
-+DEFINE_SHOW_ATTRIBUTE(debugfs_fancurve);
-+
-+static void legion_debugfs_init(struct legion_private *priv)
-+{
-+	struct dentry *dir;
-+
-+	// TODO: remove this note
-+	// Note: like other kernel modules, do not catch errors here
-+	// because if kernel is build without debugfs this
-+	// will return an error but module still has to
-+	// work, just without debugfs
-+	// TODO: what permissions; some modules do 400
-+	// other do 444
-+	dir = debugfs_create_dir(LEGION_DRVR_SHORTNAME, NULL);
-+	debugfs_create_file("fancurve", 0444, dir, priv,
-+			    &debugfs_fancurve_fops);
-+	debugfs_create_file("ecmemory", 0444, dir, priv,
-+			    &debugfs_ecmemory_fops);
-+	debugfs_create_file("ecmemoryram", 0444, dir, priv,
-+			    &debugfs_ecmemoryram_fops);
-+
-+	priv->debugfs_dir = dir;
-+}
-+
-+static void legion_debugfs_exit(struct legion_private *priv)
-+{
-+	pr_info("Unloading legion dubugfs\n");
-+	// The following is does nothing if pointer is NULL
-+	debugfs_remove_recursive(priv->debugfs_dir);
-+	priv->debugfs_dir = NULL;
-+	pr_info("Unloading legion dubugfs done\n");
-+}
-+
-+/* =============================  */
-+/* sysfs interface                */
-+/* ============================   */
-+
-+static int show_simple_wmi_attribute(struct device *dev,
-+				     struct device_attribute *attr, char *buf,
-+				     const char *guid, u8 instance,
-+				     u32 method_id, bool invert,
-+				     unsigned long scale)
-+{
-+	unsigned long state = 0;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = get_simple_wmi_attribute(priv, guid, instance, method_id, invert,
-+				       scale, &state);
-+	mutex_unlock(&priv->fancurve_mutex);
-+
-+	if (err)
-+		return -EINVAL;
-+
-+	return sysfs_emit(buf, "%lu\n", state);
-+}
-+
-+static int show_simple_wmi_attribute_from_buffer(struct device *dev,
-+						 struct device_attribute *attr,
-+						 char *buf, const char *guid,
-+						 u8 instance, u32 method_id,
-+						 size_t ressize, size_t i,
-+						 int scale)
-+{
-+	u8 res[16];
-+	int err;
-+	int out;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	if (ressize > ARRAY_SIZE(res)) {
-+		pr_info("Buffer too small for WMI result\n");
-+		return -EINVAL;
-+	}
-+	if (i >= ressize) {
-+		pr_info("Index not within buffer size\n");
-+		return -EINVAL;
-+	}
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = wmi_exec_noarg_ints(guid, instance, method_id, res, ressize);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	out = scale * res[i];
-+	return sysfs_emit(buf, "%d\n", out);
-+}
-+
-+static int store_simple_wmi_attribute(struct device *dev,
-+				      struct device_attribute *attr,
-+				      const char *buf, size_t count,
-+				      const char *guid, u8 instance,
-+				      u32 method_id, bool invert, int scale)
-+{
-+	int state;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	err = kstrtouint(buf, 0, &state);
-+	if (err)
-+		return err;
-+	err = set_simple_wmi_attribute(priv, guid, instance, method_id, invert,
-+				       scale, state);
-+	if (err)
-+		return err;
-+	return count;
-+}
-+
-+static ssize_t lockfancontroller_show(struct device *dev,
-+				      struct device_attribute *attr, char *buf)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	bool is_lockfancontroller;
-+	int err;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_read_lockfancontroller(&priv->ecram, priv->conf,
-+					&is_lockfancontroller);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return sysfs_emit(buf, "%d\n", is_lockfancontroller);
-+}
-+
-+static ssize_t lockfancontroller_store(struct device *dev,
-+				       struct device_attribute *attr,
-+				       const char *buf, size_t count)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	bool is_lockfancontroller;
-+	int err;
-+
-+	err = kstrtobool(buf, &is_lockfancontroller);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_write_lockfancontroller(&priv->ecram, priv->conf,
-+					 is_lockfancontroller);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return count;
-+}
-+
-+static DEVICE_ATTR_RW(lockfancontroller);
-+
-+static ssize_t rapidcharge_show(struct device *dev,
-+				struct device_attribute *attr, char *buf)
-+{
-+	bool state = false;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = acpi_read_rapidcharge(priv->adev, &state);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return sysfs_emit(buf, "%d\n", state);
-+}
-+
-+static ssize_t rapidcharge_store(struct device *dev,
-+				 struct device_attribute *attr, const char *buf,
-+				 size_t count)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int state;
-+	int err;
-+
-+	err = kstrtouint(buf, 0, &state);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = acpi_write_rapidcharge(priv->adev, state);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return count;
-+}
-+
-+static DEVICE_ATTR_RW(rapidcharge);
-+
-+static ssize_t issupportgpuoc_show(struct device *dev,
-+				   struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_ISSUPPORTGPUOC, false,
-+					 1);
-+}
-+
-+static DEVICE_ATTR_RO(issupportgpuoc);
-+
-+static ssize_t aslcodeversion_show(struct device *dev,
-+				   struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETVERSION, false, 1);
-+}
-+
-+static DEVICE_ATTR_RO(aslcodeversion);
-+
-+static ssize_t issupportcpuoc_show(struct device *dev,
-+				   struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_ISSUPPORTCPUOC, false,
-+					 1);
-+}
-+
-+static DEVICE_ATTR_RO(issupportcpuoc);
-+
-+static ssize_t winkey_show(struct device *dev, struct device_attribute *attr,
-+			   char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETWINKEYSTATUS, true,
-+					 1);
-+}
-+
-+static ssize_t winkey_store(struct device *dev, struct device_attribute *attr,
-+			    const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  LEGION_WMI_GAMEZONE_GUID, 0,
-+					  WMI_METHOD_ID_SETWINKEYSTATUS, true,
-+					  1);
-+}
-+
-+static DEVICE_ATTR_RW(winkey);
-+
-+// on newer models the touchpad feature in ideapad does not work anymore, so
-+// we need this
-+static ssize_t touchpad_show(struct device *dev, struct device_attribute *attr,
-+			     char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETTPSTATUS, true, 1);
-+}
-+
-+static ssize_t touchpad_store(struct device *dev, struct device_attribute *attr,
-+			      const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  LEGION_WMI_GAMEZONE_GUID, 0,
-+					  WMI_METHOD_ID_SETTPSTATUS, true, 1);
-+}
-+
-+static DEVICE_ATTR_RW(touchpad);
-+
-+static ssize_t gsync_show(struct device *dev, struct device_attribute *attr,
-+			  char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETGSYNCSTATUS, true, 1);
-+}
-+
-+static ssize_t gsync_store(struct device *dev, struct device_attribute *attr,
-+			   const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  LEGION_WMI_GAMEZONE_GUID, 0,
-+					  WMI_METHOD_ID_SETGSYNCSTATUS, true,
-+					  1);
-+}
-+
-+static DEVICE_ATTR_RW(gsync);
-+
-+static ssize_t powerchargemode_show(struct device *dev,
-+				    struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETPOWERCHARGEMODE,
-+					 false, 1);
-+}
-+static DEVICE_ATTR_RO(powerchargemode);
-+
-+static ssize_t overdrive_show(struct device *dev, struct device_attribute *attr,
-+			      char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETODSTATUS, false, 1);
-+}
-+
-+static ssize_t overdrive_store(struct device *dev,
-+			       struct device_attribute *attr, const char *buf,
-+			       size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  LEGION_WMI_GAMEZONE_GUID, 0,
-+					  WMI_METHOD_ID_SETODSTATUS, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(overdrive);
-+
-+static ssize_t thermalmode_show(struct device *dev,
-+				struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETTHERMALMODE, false,
-+					 1);
-+}
-+static DEVICE_ATTR_RO(thermalmode);
-+
-+// TOOD: probably remove again because provided by other means; only useful for overclocking
-+static ssize_t cpumaxfrequency_show(struct device *dev,
-+				    struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETCPUMAXFREQUENCY,
-+					 false, 1);
-+}
-+static DEVICE_ATTR_RO(cpumaxfrequency);
-+
-+static ssize_t isacfitforoc_show(struct device *dev,
-+				 struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_ISACFITFOROC, false, 1);
-+}
-+static DEVICE_ATTR_RO(isacfitforoc);
-+
-+static ssize_t igpumode_show(struct device *dev, struct device_attribute *attr,
-+			     char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 LEGION_WMI_GAMEZONE_GUID, 0,
-+					 WMI_METHOD_ID_GETIGPUMODESTATUS, false,
-+					 1);
-+}
-+
-+static ssize_t igpumode_store(struct device *dev, struct device_attribute *attr,
-+			      const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  LEGION_WMI_GAMEZONE_GUID, 0,
-+					  WMI_METHOD_ID_SETIGPUMODESTATUS,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(igpumode);
-+
-+static ssize_t cpu_oc_show(struct device *dev, struct device_attribute *attr,
-+			   char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_OC_STATUS, 16, 0, 1);
-+}
-+
-+static ssize_t cpu_oc_store(struct device *dev, struct device_attribute *attr,
-+			    const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_CPU_METHOD, 0,
-+					  WMI_METHOD_ID_CPU_SET_OC_STATUS,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_oc);
-+
-+static ssize_t cpu_shortterm_powerlimit_show(struct device *dev,
-+					     struct device_attribute *attr,
-+					     char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_SHORTTERM_POWERLIMIT, 16, 0, 1);
-+}
-+
-+static ssize_t cpu_shortterm_powerlimit_store(struct device *dev,
-+					      struct device_attribute *attr,
-+					      const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(
-+		dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_SET_SHORTTERM_POWERLIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_shortterm_powerlimit);
-+
-+static ssize_t cpu_longterm_powerlimit_show(struct device *dev,
-+					    struct device_attribute *attr,
-+					    char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_LONGTERM_POWERLIMIT, 16, 0, 1);
-+}
-+
-+static ssize_t cpu_longterm_powerlimit_store(struct device *dev,
-+					     struct device_attribute *attr,
-+					     const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(
-+		dev, attr, buf, count, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_SET_LONGTERM_POWERLIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_longterm_powerlimit);
-+
-+static ssize_t cpu_default_powerlimit_show(struct device *dev,
-+					   struct device_attribute *attr,
-+					   char *buf)
-+{
-+	return show_simple_wmi_attribute(
-+		dev, attr, buf, WMI_GUID_LENOVO_CPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_DEFAULT_POWERLIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RO(cpu_default_powerlimit);
-+
-+static ssize_t cpu_peak_powerlimit_show(struct device *dev,
-+					struct device_attribute *attr,
-+					char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					 WMI_METHOD_ID_CPU_GET_PEAK_POWERLIMIT,
-+					 false, 1);
-+}
-+
-+static ssize_t cpu_peak_powerlimit_store(struct device *dev,
-+					 struct device_attribute *attr,
-+					 const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					  WMI_METHOD_ID_CPU_SET_PEAK_POWERLIMIT,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_peak_powerlimit);
-+
-+static ssize_t cpu_apu_sppt_powerlimit_show(struct device *dev,
-+					    struct device_attribute *attr,
-+					    char *buf)
-+{
-+	return show_simple_wmi_attribute(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_APU_SPPT_POWERLIMIT, false, 1);
-+}
-+
-+static ssize_t cpu_apu_sppt_powerlimit_store(struct device *dev,
-+					     struct device_attribute *attr,
-+					     const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(
-+		dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_SET_APU_SPPT_POWERLIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_apu_sppt_powerlimit);
-+
-+static ssize_t cpu_cross_loading_powerlimit_show(struct device *dev,
-+						 struct device_attribute *attr,
-+						 char *buf)
-+{
-+	return show_simple_wmi_attribute(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_GET_CROSS_LOADING_POWERLIMIT, false, 1);
-+}
-+
-+static ssize_t cpu_cross_loading_powerlimit_store(struct device *dev,
-+						  struct device_attribute *attr,
-+						  const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(
-+		dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_CPU_SET_CROSS_LOADING_POWERLIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(cpu_cross_loading_powerlimit);
-+
-+static ssize_t gpu_oc_show(struct device *dev, struct device_attribute *attr,
-+			   char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					 WMI_METHOD_ID_GPU_GET_OC_STATUS, false,
-+					 1);
-+}
-+
-+static ssize_t gpu_oc_store(struct device *dev, struct device_attribute *attr,
-+			    const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					  WMI_METHOD_ID_GPU_SET_OC_STATUS,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(gpu_oc);
-+
-+static ssize_t gpu_ppab_powerlimit_show(struct device *dev,
-+					struct device_attribute *attr,
-+					char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_GET_PPAB_POWERLIMIT, 16, 0, 1);
-+}
-+
-+static ssize_t gpu_ppab_powerlimit_store(struct device *dev,
-+					 struct device_attribute *attr,
-+					 const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					  WMI_METHOD_ID_GPU_SET_PPAB_POWERLIMIT,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(gpu_ppab_powerlimit);
-+
-+static ssize_t gpu_ctgp_powerlimit_show(struct device *dev,
-+					struct device_attribute *attr,
-+					char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0, 1);
-+}
-+
-+static ssize_t gpu_ctgp_powerlimit_store(struct device *dev,
-+					 struct device_attribute *attr,
-+					 const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					  WMI_METHOD_ID_GPU_SET_CTGP_POWERLIMIT,
-+					  false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(gpu_ctgp_powerlimit);
-+
-+static ssize_t gpu_ctgp2_powerlimit_show(struct device *dev,
-+					 struct device_attribute *attr,
-+					 char *buf)
-+{
-+	return show_simple_wmi_attribute_from_buffer(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_GET_CTGP_POWERLIMIT, 16, 0x0C, 1);
-+}
-+
-+static DEVICE_ATTR_RO(gpu_ctgp2_powerlimit);
-+
-+// TOOD: probably remove again because provided by other means; only useful for overclocking
-+static ssize_t
-+gpu_default_ppab_ctrgp_powerlimit_show(struct device *dev,
-+				       struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_GET_DEFAULT_PPAB_CTGP_POWERLIMIT, false, 1);
-+}
-+static DEVICE_ATTR_RO(gpu_default_ppab_ctrgp_powerlimit);
-+
-+static ssize_t gpu_temperature_limit_show(struct device *dev,
-+					  struct device_attribute *attr,
-+					  char *buf)
-+{
-+	return show_simple_wmi_attribute(
-+		dev, attr, buf, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_GET_TEMPERATURE_LIMIT, false, 1);
-+}
-+
-+static ssize_t gpu_temperature_limit_store(struct device *dev,
-+					   struct device_attribute *attr,
-+					   const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(
-+		dev, attr, buf, count, WMI_GUID_LENOVO_GPU_METHOD, 0,
-+		WMI_METHOD_ID_GPU_SET_TEMPERATURE_LIMIT, false, 1);
-+}
-+
-+static DEVICE_ATTR_RW(gpu_temperature_limit);
-+
-+// TOOD: probably remove again because provided by other means; only useful for overclocking
-+static ssize_t gpu_boost_clock_show(struct device *dev,
-+				    struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 WMI_GUID_LENOVO_GPU_METHOD, 0,
-+					 WMI_METHOD_ID_GPU_GET_BOOST_CLOCK,
-+					 false, 1);
-+}
-+static DEVICE_ATTR_RO(gpu_boost_clock);
-+
-+static ssize_t fan_fullspeed_show(struct device *dev,
-+				  struct device_attribute *attr, char *buf)
-+{
-+	bool state = false;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = read_fanfullspeed(priv, &state);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return sysfs_emit(buf, "%d\n", state);
-+}
-+
-+static ssize_t fan_fullspeed_store(struct device *dev,
-+				   struct device_attribute *attr,
-+				   const char *buf, size_t count)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int state;
-+	int err;
-+
-+	err = kstrtouint(buf, 0, &state);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = write_fanfullspeed(priv, state);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	return count;
-+}
-+
-+static DEVICE_ATTR_RW(fan_fullspeed);
-+
-+static ssize_t fan_maxspeed_show(struct device *dev,
-+				 struct device_attribute *attr, char *buf)
-+{
-+	return show_simple_wmi_attribute(dev, attr, buf,
-+					 WMI_GUID_LENOVO_FAN_METHOD, 0,
-+					 WMI_METHOD_ID_FAN_GET_MAXSPEED, false,
-+					 1);
-+}
-+
-+static ssize_t fan_maxspeed_store(struct device *dev,
-+				  struct device_attribute *attr,
-+				  const char *buf, size_t count)
-+{
-+	return store_simple_wmi_attribute(dev, attr, buf, count,
-+					  WMI_GUID_LENOVO_FAN_METHOD, 0,
-+					  WMI_METHOD_ID_FAN_SET_MAXSPEED, false,
-+					  1);
-+}
-+
-+static DEVICE_ATTR_RW(fan_maxspeed);
-+
-+static ssize_t powermode_show(struct device *dev, struct device_attribute *attr,
-+			      char *buf)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int power_mode;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	read_powermode(priv, &power_mode);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return sysfs_emit(buf, "%d\n", power_mode);
-+}
-+
-+static void legion_platform_profile_notify(void);
-+
-+static ssize_t powermode_store(struct device *dev,
-+			       struct device_attribute *attr, const char *buf,
-+			       size_t count)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int powermode;
-+	int err;
-+
-+	err = kstrtouint(buf, 0, &powermode);
-+	if (err)
-+		return err;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = write_powermode(priv, powermode);
-+	mutex_unlock(&priv->fancurve_mutex);
-+	if (err)
-+		return -EINVAL;
-+
-+	// TODO: better?
-+	// we have to wait a bit before change is done in hardware and
-+	// readback done after notifying returns correct value, otherwise
-+	// the notified reader will read old value
-+	msleep(500);
-+	legion_platform_profile_notify();
-+
-+	return count;
-+}
-+
-+static DEVICE_ATTR_RW(powermode);
-+
-+static struct attribute *legion_sysfs_attributes[] = {
-+	&dev_attr_powermode.attr,
-+	&dev_attr_lockfancontroller.attr,
-+	&dev_attr_rapidcharge.attr,
-+	&dev_attr_winkey.attr,
-+	&dev_attr_touchpad.attr,
-+	&dev_attr_gsync.attr,
-+	&dev_attr_powerchargemode.attr,
-+	&dev_attr_overdrive.attr,
-+	&dev_attr_cpumaxfrequency.attr,
-+	&dev_attr_isacfitforoc.attr,
-+	&dev_attr_cpu_oc.attr,
-+	&dev_attr_cpu_shortterm_powerlimit.attr,
-+	&dev_attr_cpu_longterm_powerlimit.attr,
-+	&dev_attr_cpu_apu_sppt_powerlimit.attr,
-+	&dev_attr_cpu_default_powerlimit.attr,
-+	&dev_attr_cpu_peak_powerlimit.attr,
-+	&dev_attr_cpu_cross_loading_powerlimit.attr,
-+	&dev_attr_gpu_oc.attr,
-+	&dev_attr_gpu_ppab_powerlimit.attr,
-+	&dev_attr_gpu_ctgp_powerlimit.attr,
-+	&dev_attr_gpu_ctgp2_powerlimit.attr,
-+	&dev_attr_gpu_default_ppab_ctrgp_powerlimit.attr,
-+	&dev_attr_gpu_temperature_limit.attr,
-+	&dev_attr_gpu_boost_clock.attr,
-+	&dev_attr_fan_fullspeed.attr,
-+	&dev_attr_fan_maxspeed.attr,
-+	&dev_attr_thermalmode.attr,
-+	&dev_attr_issupportcpuoc.attr,
-+	&dev_attr_issupportgpuoc.attr,
-+	&dev_attr_aslcodeversion.attr,
-+	&dev_attr_igpumode.attr,
-+	NULL
-+};
-+
-+static const struct attribute_group legion_attribute_group = {
-+	.attrs = legion_sysfs_attributes
-+};
-+
-+static int legion_sysfs_init(struct legion_private *priv)
-+{
-+	return device_add_group(&priv->platform_device->dev,
-+				&legion_attribute_group);
-+}
-+
-+static void legion_sysfs_exit(struct legion_private *priv)
-+{
-+	pr_info("Unloading legion sysfs\n");
-+	device_remove_group(&priv->platform_device->dev,
-+			    &legion_attribute_group);
-+	pr_info("Unloading legion sysfs done\n");
-+}
-+
-+/* =============================  */
-+/* WMI + ACPI                     */
-+/* ============================   */
-+// heavily based on ideapad_laptop.c
-+
-+// TODO: proper names if meaning of all events is clear
-+enum LEGION_WMI_EVENT {
-+	LEGION_WMI_EVENT_GAMEZONE = 1,
-+	LEGION_EVENT_A,
-+	LEGION_EVENT_B,
-+	LEGION_EVENT_C,
-+	LEGION_EVENT_D,
-+	LEGION_EVENT_E,
-+	LEGION_EVENT_F,
-+	LEGION_EVENT_G
-+};
-+
-+struct legion_wmi_private {
-+	enum LEGION_WMI_EVENT event;
-+};
-+
-+//static void legion_wmi_notify2(u32 value, void *context)
-+//    {
-+//	pr_info("WMI notify\n" );
-+//    }
-+
-+static void legion_wmi_notify(struct wmi_device *wdev, union acpi_object *data)
-+{
-+	struct legion_wmi_private *wpriv;
-+	struct legion_private *priv;
-+
-+	mutex_lock(&legion_shared_mutex);
-+	priv = legion_shared;
-+	if ((!priv) && (priv->loaded)) {
-+		pr_info("Received WMI event while not initialized!\n");
-+		goto unlock;
-+	}
-+
-+	wpriv = dev_get_drvdata(&wdev->dev);
-+	switch (wpriv->event) {
-+	case LEGION_EVENT_A:
-+		pr_info("Fan event: legion type: %d;  acpi type: %d (%d=integer)",
-+			wpriv->event, data->type, ACPI_TYPE_INTEGER);
-+		// TODO: here it is too early (first unlock mutext, then wait a bit)
-+		//legion_platform_profile_notify();
-+		break;
-+	default:
-+		pr_info("Event: legion type: %d;  acpi type: %d (%d=integer)",
-+			wpriv->event, data->type, ACPI_TYPE_INTEGER);
-+		break;
-+	}
-+
-+unlock:
-+	mutex_unlock(&legion_shared_mutex);
-+	// todo; fix that!
-+	// problem: we get an event just before the powermode change (from the key?),
-+	// so if we notify too early, it will read the old power mode/platform profile
-+	msleep(500);
-+	legion_platform_profile_notify();
-+}
-+
-+static int legion_wmi_probe(struct wmi_device *wdev, const void *context)
-+{
-+	struct legion_wmi_private *wpriv;
-+
-+	wpriv = devm_kzalloc(&wdev->dev, sizeof(*wpriv), GFP_KERNEL);
-+	if (!wpriv)
-+		return -ENOMEM;
-+
-+	*wpriv = *(const struct legion_wmi_private *)context;
-+
-+	dev_set_drvdata(&wdev->dev, wpriv);
-+	dev_info(&wdev->dev, "Register after probing for WMI.\n");
-+	return 0;
-+}
-+
-+static const struct legion_wmi_private legion_wmi_context_gamezone = {
-+	.event = LEGION_WMI_EVENT_GAMEZONE
-+};
-+static const struct legion_wmi_private legion_wmi_context_a = {
-+	.event = LEGION_EVENT_A
-+};
-+static const struct legion_wmi_private legion_wmi_context_b = {
-+	.event = LEGION_EVENT_B
-+};
-+static const struct legion_wmi_private legion_wmi_context_c = {
-+	.event = LEGION_EVENT_C
-+};
-+static const struct legion_wmi_private legion_wmi_context_d = {
-+	.event = LEGION_EVENT_D
-+};
-+static const struct legion_wmi_private legion_wmi_context_e = {
-+	.event = LEGION_EVENT_E
-+};
-+static const struct legion_wmi_private legion_wmi_context_f = {
-+	.event = LEGION_EVENT_F
-+};
-+
-+#define LEGION_WMI_GUID_FAN_EVENT "D320289E-8FEA-41E0-86F9-611D83151B5F"
-+#define LEGION_WMI_GUID_FAN2_EVENT "bc72a435-e8c1-4275-b3e2-d8b8074aba59"
-+#define LEGION_WMI_GUID_GAMEZONE_KEY_EVENT \
-+	"10afc6d9-ea8b-4590-a2e7-1cd3c84bb4b1"
-+#define LEGION_WMI_GUID_GAMEZONE_GPU_EVENT \
-+	"bfd42481-aee3-4502-a107-afb68425c5f8"
-+#define LEGION_WMI_GUID_GAMEZONE_OC_EVENT "d062906b-12d4-4510-999d-4831ee80e985"
-+#define LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT \
-+	"bfd42481-aee3-4501-a107-afb68425c5f8"
-+//#define LEGION_WMI_GUID_GAMEZONE_DATA_EVENT  "887b54e3-dddc-4b2c-8b88-68a26a8835d0"
-+
-+static const struct wmi_device_id legion_wmi_ids[] = {
-+	{ LEGION_WMI_GAMEZONE_GUID, &legion_wmi_context_gamezone },
-+	{ LEGION_WMI_GUID_FAN_EVENT, &legion_wmi_context_a },
-+	{ LEGION_WMI_GUID_FAN2_EVENT, &legion_wmi_context_b },
-+	{ LEGION_WMI_GUID_GAMEZONE_KEY_EVENT, &legion_wmi_context_c },
-+	{ LEGION_WMI_GUID_GAMEZONE_GPU_EVENT, &legion_wmi_context_d },
-+	{ LEGION_WMI_GUID_GAMEZONE_OC_EVENT, &legion_wmi_context_e },
-+	{ LEGION_WMI_GUID_GAMEZONE_TEMP_EVENT, &legion_wmi_context_f },
-+	{ "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294",
-+	  &legion_wmi_context_gamezone }, /* Legion 5 */
-+	{},
-+};
-+MODULE_DEVICE_TABLE(wmi, legion_wmi_ids);
-+
-+static struct wmi_driver legion_wmi_driver = {
-+	.driver = {
-+		.name = "legion_wmi",
-+	},
-+	.id_table = legion_wmi_ids,
-+	.probe = legion_wmi_probe,
-+	.notify = legion_wmi_notify,
-+};
-+
-+//acpi_status status = wmi_install_notify_handler(LEGION_WMI_GAMEZONE_GUID,
-+//				legion_wmi_notify2, NULL);
-+//if (ACPI_FAILURE(status)) {
-+//    return -ENODEV;
-+//}
-+//return 0;
-+
-+static int legion_wmi_init(void)
-+{
-+	return wmi_driver_register(&legion_wmi_driver);
-+}
-+
-+static void legion_wmi_exit(void)
-+{
-+	// TODO: remove this
-+	pr_info("Unloading legion WMI\n");
-+
-+	//wmi_remove_notify_handler(LEGION_WMI_GAMEZONE_GUID);
-+	wmi_driver_unregister(&legion_wmi_driver);
-+	pr_info("Unloading legion WMI done\n");
-+}
-+
-+/* =============================  */
-+/* Platform profile               */
-+/* ============================   */
-+
-+static void legion_platform_profile_notify(void)
-+{
-+	if (!enable_platformprofile)
-+		pr_info("Skipping platform_profile_notify because enable_platformprofile is false\n");
-+
-+	platform_profile_notify();
-+}
-+
-+static int legion_platform_profile_get(struct platform_profile_handler *pprof,
-+				       enum platform_profile_option *profile)
-+{
-+	int powermode;
-+	struct legion_private *priv;
-+
-+	priv = container_of(pprof, struct legion_private,
-+			    platform_profile_handler);
-+	read_powermode(priv, &powermode);
-+
-+	switch (powermode) {
-+	case LEGION_WMI_POWERMODE_BALANCED:
-+		*profile = PLATFORM_PROFILE_BALANCED;
-+		break;
-+	case LEGION_WMI_POWERMODE_PERFORMANCE:
-+		*profile = PLATFORM_PROFILE_PERFORMANCE;
-+		break;
-+	case LEGION_WMI_POWERMODE_QUIET:
-+		*profile = PLATFORM_PROFILE_QUIET;
-+		break;
-+	case LEGION_WMI_POWERMODE_CUSTOM:
-+		*profile = PLATFORM_PROFILE_BALANCED_PERFORMANCE;
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
-+	return 0;
-+}
-+
-+static int legion_platform_profile_set(struct platform_profile_handler *pprof,
-+				       enum platform_profile_option profile)
-+{
-+	int powermode;
-+	struct legion_private *priv;
-+
-+	priv = container_of(pprof, struct legion_private,
-+			    platform_profile_handler);
-+
-+	switch (profile) {
-+	case PLATFORM_PROFILE_BALANCED:
-+		powermode = LEGION_WMI_POWERMODE_BALANCED;
-+		break;
-+	case PLATFORM_PROFILE_PERFORMANCE:
-+		powermode = LEGION_WMI_POWERMODE_PERFORMANCE;
-+		break;
-+	case PLATFORM_PROFILE_QUIET:
-+		powermode = LEGION_WMI_POWERMODE_QUIET;
-+		break;
-+	case PLATFORM_PROFILE_BALANCED_PERFORMANCE:
-+		powermode = LEGION_WMI_POWERMODE_CUSTOM;
-+		break;
-+	default:
-+		return -EOPNOTSUPP;
-+	}
-+
-+	return write_powermode(priv, powermode);
-+}
-+
-+static int legion_platform_profile_init(struct legion_private *priv)
-+{
-+	int err;
-+
-+	if (!enable_platformprofile) {
-+		pr_info("Skipping creating platform profile support because enable_platformprofile is false\n");
-+		return 0;
-+	}
-+
-+	priv->platform_profile_handler.profile_get =
-+		legion_platform_profile_get;
-+	priv->platform_profile_handler.profile_set =
-+		legion_platform_profile_set;
-+
-+	set_bit(PLATFORM_PROFILE_QUIET, priv->platform_profile_handler.choices);
-+	set_bit(PLATFORM_PROFILE_BALANCED,
-+		priv->platform_profile_handler.choices);
-+	set_bit(PLATFORM_PROFILE_PERFORMANCE,
-+		priv->platform_profile_handler.choices);
-+	if (priv->conf->has_custom_powermode &&
-+	    priv->conf->access_method_powermode == ACCESS_METHOD_WMI) {
-+		set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE,
-+			priv->platform_profile_handler.choices);
-+	}
-+
-+	err = platform_profile_register(&priv->platform_profile_handler);
-+	if (err)
-+		return err;
-+
-+	return 0;
-+}
-+
-+static void legion_platform_profile_exit(struct legion_private *priv)
-+{
-+	if (!enable_platformprofile) {
-+		pr_info("Skipping unloading platform profile support because enable_platformprofile is false\n");
-+		return;
-+	}
-+	pr_info("Unloading legion platform profile\n");
-+	platform_profile_remove();
-+	pr_info("Unloading legion platform profile done\n");
-+}
-+
-+/* =============================  */
-+/* hwom interface              */
-+/* ============================   */
-+
-+// hw-mon interface
-+
-+// todo: register_group or register_info?
-+
-+// TODO: use one common function (like here) or one function per attribute?
-+static ssize_t sensor_label_show(struct device *dev,
-+				 struct device_attribute *attr, char *buf)
-+{
-+	int sensor_id = (to_sensor_dev_attr(attr))->index;
-+	const char *label;
-+
-+	switch (sensor_id) {
-+	case SENSOR_CPU_TEMP_ID:
-+		label = "CPU Temperature\n";
-+		break;
-+	case SENSOR_GPU_TEMP_ID:
-+		label = "GPU Temperature\n";
-+		break;
-+	case SENSOR_IC_TEMP_ID:
-+		label = "IC Temperature\n";
-+		break;
-+	case SENSOR_FAN1_RPM_ID:
-+		label = "Fan 1\n";
-+		break;
-+	case SENSOR_FAN2_RPM_ID:
-+		label = "Fan 2\n";
-+		break;
-+	case SENSOR_FAN1_TARGET_RPM_ID:
-+		label = "Fan 1 Target\n";
-+		break;
-+	case SENSOR_FAN2_TARGET_RPM_ID:
-+		label = "Fan 2 Target\n";
-+		break;
-+	default:
-+		return -EOPNOTSUPP;
-+	}
-+
-+	return sprintf(buf, label);
-+}
-+
-+// TODO: use one common function (like here) or one function per attribute?
-+static ssize_t sensor_show(struct device *dev, struct device_attribute *devattr,
-+			   char *buf)
-+{
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int sensor_id = (to_sensor_dev_attr(devattr))->index;
-+	struct sensor_values values;
-+	int outval;
-+	int err = -EIO;
-+
-+	switch (sensor_id) {
-+	case SENSOR_CPU_TEMP_ID:
-+		err = read_temperature(priv, 0, &outval);
-+		outval *= 1000;
-+		break;
-+	case SENSOR_GPU_TEMP_ID:
-+		err = read_temperature(priv, 1, &outval);
-+		outval *= 1000;
-+		break;
-+	case SENSOR_IC_TEMP_ID:
-+		ec_read_sensor_values(&priv->ecram, priv->conf, &values);
-+		outval = 1000 * values.ic_temp_celsius;
-+		err = 0;
-+		break;
-+	case SENSOR_FAN1_RPM_ID:
-+		err = read_fanspeed(priv, 0, &outval);
-+		break;
-+	case SENSOR_FAN2_RPM_ID:
-+		err = read_fanspeed(priv, 1, &outval);
-+		break;
-+	case SENSOR_FAN1_TARGET_RPM_ID:
-+		ec_read_sensor_values(&priv->ecram, priv->conf, &values);
-+		outval = values.fan1_target_rpm;
-+		err = 0;
-+		break;
-+	case SENSOR_FAN2_TARGET_RPM_ID:
-+		ec_read_sensor_values(&priv->ecram, priv->conf, &values);
-+		outval = values.fan2_target_rpm;
-+		err = 0;
-+		break;
-+	default:
-+		pr_info("Error reading sensor value with id %d\n", sensor_id);
-+		return -EOPNOTSUPP;
-+	}
-+	if (err)
-+		return err;
-+
-+	return sprintf(buf, "%d\n", outval);
-+}
-+
-+static SENSOR_DEVICE_ATTR_RO(temp1_input, sensor, SENSOR_CPU_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(temp1_label, sensor_label, SENSOR_CPU_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(temp2_input, sensor, SENSOR_GPU_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(temp2_label, sensor_label, SENSOR_GPU_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(temp3_input, sensor, SENSOR_IC_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(temp3_label, sensor_label, SENSOR_IC_TEMP_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan1_input, sensor, SENSOR_FAN1_RPM_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan1_label, sensor_label, SENSOR_FAN1_RPM_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan2_input, sensor, SENSOR_FAN2_RPM_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan2_label, sensor_label, SENSOR_FAN2_RPM_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan1_target, sensor, SENSOR_FAN1_TARGET_RPM_ID);
-+static SENSOR_DEVICE_ATTR_RO(fan2_target, sensor, SENSOR_FAN2_TARGET_RPM_ID);
-+
-+static struct attribute *sensor_hwmon_attributes[] = {
-+	&sensor_dev_attr_temp1_input.dev_attr.attr,
-+	&sensor_dev_attr_temp1_label.dev_attr.attr,
-+	&sensor_dev_attr_temp2_input.dev_attr.attr,
-+	&sensor_dev_attr_temp2_label.dev_attr.attr,
-+	&sensor_dev_attr_temp3_input.dev_attr.attr,
-+	&sensor_dev_attr_temp3_label.dev_attr.attr,
-+	&sensor_dev_attr_fan1_input.dev_attr.attr,
-+	&sensor_dev_attr_fan1_label.dev_attr.attr,
-+	&sensor_dev_attr_fan2_input.dev_attr.attr,
-+	&sensor_dev_attr_fan2_label.dev_attr.attr,
-+	&sensor_dev_attr_fan1_target.dev_attr.attr,
-+	&sensor_dev_attr_fan2_target.dev_attr.attr,
-+	NULL
-+};
-+
-+static ssize_t autopoint_show(struct device *dev,
-+			      struct device_attribute *devattr, char *buf)
-+{
-+	struct fancurve fancurve;
-+	int err;
-+	int value;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr;
-+	int point_id = to_sensor_dev_attr_2(devattr)->index;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = read_fancurve(priv, &fancurve);
-+	mutex_unlock(&priv->fancurve_mutex);
-+
-+	if (err) {
-+		pr_info("Failed to read fancurve\n");
-+		return -EOPNOTSUPP;
-+	}
-+	if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) {
-+		pr_info("Failed to read fancurve due to wrong point id: %d\n",
-+			point_id);
-+		return -EOPNOTSUPP;
-+	}
-+
-+	switch (fancurve_attr_id) {
-+	case FANCURVE_ATTR_PWM1:
-+		value = fancurve.points[point_id].rpm1_raw * 100;
-+		break;
-+	case FANCURVE_ATTR_PWM2:
-+		value = fancurve.points[point_id].rpm2_raw * 100;
-+		break;
-+	case FANCURVE_ATTR_CPU_TEMP:
-+		value = fancurve.points[point_id].cpu_max_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_CPU_HYST:
-+		value = fancurve.points[point_id].cpu_min_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_GPU_TEMP:
-+		value = fancurve.points[point_id].gpu_max_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_GPU_HYST:
-+		value = fancurve.points[point_id].gpu_min_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_IC_TEMP:
-+		value = fancurve.points[point_id].ic_max_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_IC_HYST:
-+		value = fancurve.points[point_id].ic_min_temp_celsius;
-+		break;
-+	case FANCURVE_ATTR_ACCEL:
-+		value = fancurve.points[point_id].accel;
-+		break;
-+	case FANCURVE_ATTR_DECEL:
-+		value = fancurve.points[point_id].decel;
-+		break;
-+	case FANCURVE_SIZE:
-+		value = fancurve.size;
-+		break;
-+	default:
-+		pr_info("Failed to read fancurve due to wrong attribute id: %d\n",
-+			fancurve_attr_id);
-+		return -EOPNOTSUPP;
-+	}
-+
-+	return sprintf(buf, "%d\n", value);
-+}
-+
-+static ssize_t autopoint_store(struct device *dev,
-+			       struct device_attribute *devattr,
-+			       const char *buf, size_t count)
-+{
-+	struct fancurve fancurve;
-+	int err;
-+	int value;
-+	bool valid;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+	int fancurve_attr_id = to_sensor_dev_attr_2(devattr)->nr;
-+	int point_id = to_sensor_dev_attr_2(devattr)->index;
-+	bool write_fancurve_size = false;
-+
-+	if (!(point_id >= 0 && point_id < MAXFANCURVESIZE)) {
-+		pr_info("Failed to read fancurve due to wrong point id: %d\n",
-+			point_id);
-+		err = -EOPNOTSUPP;
-+		goto error;
-+	}
-+
-+	err = kstrtoint(buf, 0, &value);
-+	if (err) {
-+		pr_info("Parsing hwmon store failed: error: %d; point_id: %d; fancurve_attr_id: %d\\n",
-+			err, point_id, fancurve_attr_id);
-+		goto error;
-+	}
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = read_fancurve(priv, &fancurve);
-+
-+	if (err) {
-+		pr_info("Failed to read fancurve\n");
-+		err = -EOPNOTSUPP;
-+		goto error_mutex;
-+	}
-+
-+	switch (fancurve_attr_id) {
-+	case FANCURVE_ATTR_PWM1:
-+		valid = fancurve_set_rpm1(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_PWM2:
-+		valid = fancurve_set_rpm2(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_CPU_TEMP:
-+		valid = fancurve_set_cpu_temp_max(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_CPU_HYST:
-+		valid = fancurve_set_cpu_temp_min(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_GPU_TEMP:
-+		valid = fancurve_set_gpu_temp_max(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_GPU_HYST:
-+		valid = fancurve_set_gpu_temp_min(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_IC_TEMP:
-+		valid = fancurve_set_ic_temp_max(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_IC_HYST:
-+		valid = fancurve_set_ic_temp_min(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_ACCEL:
-+		valid = fancurve_set_accel(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_ATTR_DECEL:
-+		valid = fancurve_set_decel(&fancurve, point_id, value);
-+		break;
-+	case FANCURVE_SIZE:
-+		valid = fancurve_set_size(&fancurve, value, true);
-+		write_fancurve_size = true;
-+		break;
-+	default:
-+		pr_info("Failed to write fancurve due to wrong attribute id: %d\n",
-+			fancurve_attr_id);
-+		err = -EOPNOTSUPP;
-+		goto error_mutex;
-+	}
-+
-+	if (!valid) {
-+		pr_info("Ignoring invalid fancurve value %d for attribute %d at point %d\n",
-+			value, fancurve_attr_id, point_id);
-+		err = -EOPNOTSUPP;
-+		goto error_mutex;
-+	}
-+
-+	err = write_fancurve(priv, &fancurve, write_fancurve_size);
-+	if (err) {
-+		pr_info("Failed to write fancurve for accessing hwmon at point_id: %d\n",
-+			point_id);
-+		err = -EOPNOTSUPP;
-+		goto error_mutex;
-+	}
-+
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return count;
-+
-+error_mutex:
-+	mutex_unlock(&priv->fancurve_mutex);
-+error:
-+	return count;
-+}
-+
-+// rpm1
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM1, 9);
-+// rpm2
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_pwm, autopoint,
-+			       FANCURVE_ATTR_PWM2, 9);
-+// CPU temp
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp, autopoint,
-+			       FANCURVE_ATTR_CPU_TEMP, 9);
-+// CPU temp hyst
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_CPU_HYST, 9);
-+// GPU temp
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp, autopoint,
-+			       FANCURVE_ATTR_GPU_TEMP, 9);
-+// GPU temp hyst
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point1_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point2_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point3_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point4_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point5_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point6_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point7_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point8_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point9_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm2_auto_point10_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_GPU_HYST, 9);
-+// IC temp
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp, autopoint,
-+			       FANCURVE_ATTR_IC_TEMP, 9);
-+// IC temp hyst
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point1_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point2_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point3_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point4_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point5_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point6_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point7_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point8_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point9_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm3_auto_point10_temp_hyst, autopoint,
-+			       FANCURVE_ATTR_IC_HYST, 9);
-+// accel
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_accel, autopoint,
-+			       FANCURVE_ATTR_ACCEL, 9);
-+// decel
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point1_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 0);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point2_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 1);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point3_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 2);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point4_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 3);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point5_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 4);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point6_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 5);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point7_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 6);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point8_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 7);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point9_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 8);
-+static SENSOR_DEVICE_ATTR_2_RW(pwm1_auto_point10_decel, autopoint,
-+			       FANCURVE_ATTR_DECEL, 9);
-+//size
-+static SENSOR_DEVICE_ATTR_2_RW(auto_points_size, autopoint, FANCURVE_SIZE, 0);
-+
-+static ssize_t minifancurve_show(struct device *dev,
-+				 struct device_attribute *devattr, char *buf)
-+{
-+	bool value;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_read_minifancurve(&priv->ecram, priv->conf, &value);
-+	if (err) {
-+		err = -1;
-+		pr_info("Failed to read minifancurve\n");
-+		goto error_unlock;
-+	}
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return sprintf(buf, "%d\n", value);
-+
-+error_unlock:
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return -1;
-+}
-+
-+static ssize_t minifancurve_store(struct device *dev,
-+				  struct device_attribute *devattr,
-+				  const char *buf, size_t count)
-+{
-+	int value;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	err = kstrtoint(buf, 0, &value);
-+	if (err) {
-+		err = -1;
-+		pr_info("Parsing hwmon store failed: error:%d\n",
-+			err);
-+		goto error;
-+	}
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_write_minifancurve(&priv->ecram, priv->conf, value);
-+	if (err) {
-+		err = -1;
-+		pr_info("Failed to write minifancurve\n");
-+		goto error_unlock;
-+	}
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return count;
-+
-+error_unlock:
-+	mutex_unlock(&priv->fancurve_mutex);
-+error:
-+	return err;
-+}
-+
-+static SENSOR_DEVICE_ATTR_RW(minifancurve, minifancurve, 0);
-+
-+static ssize_t pwm1_mode_show(struct device *dev,
-+			      struct device_attribute *devattr, char *buf)
-+{
-+	bool value;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_read_fanfullspeed(&priv->ecram, priv->conf, &value);
-+	if (err) {
-+		err = -1;
-+		pr_info("Failed to pwm1_mode/maximumfanspeed\n");
-+		goto error_unlock;
-+	}
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return sprintf(buf, "%d\n", value ? 0 : 2);
-+
-+error_unlock:
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return -1;
-+}
-+
-+// TODO: remove? or use WMI method?
-+static ssize_t pwm1_mode_store(struct device *dev,
-+			       struct device_attribute *devattr,
-+			       const char *buf, size_t count)
-+{
-+	int value;
-+	int is_maximumfanspeed;
-+	int err;
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	err = kstrtoint(buf, 0, &value);
-+	if (err) {
-+		err = -1;
-+		pr_info("Parsing hwmon store failed: error:%d\n",
-+			err);
-+		goto error;
-+	}
-+	is_maximumfanspeed = value == 0;
-+
-+	mutex_lock(&priv->fancurve_mutex);
-+	err = ec_write_fanfullspeed(&priv->ecram, priv->conf,
-+				    is_maximumfanspeed);
-+	if (err) {
-+		err = -1;
-+		pr_info("Failed to write pwm1_mode/maximumfanspeed\n");
-+		goto error_unlock;
-+	}
-+	mutex_unlock(&priv->fancurve_mutex);
-+	return count;
-+
-+error_unlock:
-+	mutex_unlock(&priv->fancurve_mutex);
-+error:
-+	return err;
-+}
-+
-+static SENSOR_DEVICE_ATTR_RW(pwm1_mode, pwm1_mode, 0);
-+
-+static struct attribute *fancurve_hwmon_attributes[] = {
-+	&sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point4_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point5_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point6_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point7_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point8_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point9_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point10_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point5_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point6_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point7_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point8_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point9_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point10_pwm.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point5_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point6_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point7_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point8_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point9_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point10_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point1_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point2_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point3_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point4_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point5_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point6_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point7_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point8_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point9_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point10_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point1_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point5_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point6_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point7_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point8_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point9_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point10_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point1_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point2_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point3_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point4_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point5_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point6_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point7_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point8_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point9_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm2_auto_point10_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point1_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point5_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point6_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point7_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point8_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point9_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point10_temp.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point1_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point2_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point3_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point4_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point5_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point6_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point7_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point8_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point9_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm3_auto_point10_temp_hyst.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point1_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point2_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point3_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point4_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point5_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point6_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point7_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point8_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point9_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point10_accel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point1_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point2_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point3_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point4_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point5_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point6_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point7_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point8_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point9_decel.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_auto_point10_decel.dev_attr.attr,
-+	//
-+	&sensor_dev_attr_auto_points_size.dev_attr.attr,
-+	&sensor_dev_attr_minifancurve.dev_attr.attr,
-+	&sensor_dev_attr_pwm1_mode.dev_attr.attr, NULL
-+};
-+
-+static umode_t legion_hwmon_is_visible(struct kobject *kobj,
-+				       struct attribute *attr, int idx)
-+{
-+	bool supported = true;
-+	struct device *dev = kobj_to_dev(kobj);
-+	struct legion_private *priv = dev_get_drvdata(dev);
-+
-+	if (attr == &sensor_dev_attr_minifancurve.dev_attr.attr)
-+		supported = priv->conf->has_minifancurve;
-+
-+	supported = supported && (priv->conf->access_method_fancurve !=
-+				  ACCESS_METHOD_NO_ACCESS);
-+
-+	return supported ? attr->mode : 0;
-+}
-+
-+static const struct attribute_group legion_hwmon_sensor_group = {
-+	.attrs = sensor_hwmon_attributes,
-+	.is_visible = NULL
-+};
-+
-+static const struct attribute_group legion_hwmon_fancurve_group = {
-+	.attrs = fancurve_hwmon_attributes,
-+	.is_visible = legion_hwmon_is_visible,
-+};
-+
-+static const struct attribute_group *legion_hwmon_groups[] = {
-+	&legion_hwmon_sensor_group, &legion_hwmon_fancurve_group, NULL
-+};
-+
-+static ssize_t legion_hwmon_init(struct legion_private *priv)
-+{
-+	//TODO: use hwmon_device_register_with_groups or
-+	// hwmon_device_register_with_info (latter means all hwmon functions have to be
-+	// changed)
-+	// some laptop driver do it in one way, some in the other
-+	// TODO: Use devm_hwmon_device_register_with_groups ?
-+	// some laptop drivers use this, some
-+	struct device *hwmon_dev = hwmon_device_register_with_groups(
-+		&priv->platform_device->dev, "legion_hwmon", priv,
-+		legion_hwmon_groups);
-+	if (IS_ERR_OR_NULL(hwmon_dev)) {
-+		pr_err("hwmon_device_register failed!\n");
-+		return PTR_ERR(hwmon_dev);
-+	}
-+	dev_set_drvdata(hwmon_dev, priv);
-+	priv->hwmon_dev = hwmon_dev;
-+	return 0;
-+}
-+
-+static void legion_hwmon_exit(struct legion_private *priv)
-+{
-+	pr_info("Unloading legion hwon\n");
-+	if (priv->hwmon_dev) {
-+		hwmon_device_unregister(priv->hwmon_dev);
-+		priv->hwmon_dev = NULL;
-+	}
-+	pr_info("Unloading legion hwon done\n");
-+}
-+
-+/* ACPI*/
-+
-+static int acpi_init(struct legion_private *priv, struct acpi_device *adev)
-+{
-+	int err;
-+	unsigned long cfg;
-+	bool skip_acpi_sta_check;
-+	struct device *dev = &priv->platform_device->dev;
-+
-+	priv->adev = adev;
-+	if (!priv->adev) {
-+		dev_info(dev, "Could not get ACPI handle\n");
-+		goto err_acpi_init;
-+	}
-+
-+	skip_acpi_sta_check = force || (!priv->conf->acpi_check_dev);
-+	if (!skip_acpi_sta_check) {
-+		err = eval_int(priv->adev->handle, "_STA", &cfg);
-+		if (err) {
-+			dev_info(dev, "Could not evaluate ACPI _STA\n");
-+			goto err_acpi_init;
-+		}
-+
-+		err = eval_int(priv->adev->handle, "VPC0._CFG", &cfg);
-+		if (err) {
-+			dev_info(dev, "Could not evaluate ACPI _CFG\n");
-+			goto err_acpi_init;
-+		}
-+		dev_info(dev, "ACPI CFG: %lu\n", cfg);
-+	} else {
-+		dev_info(dev, "Skipping ACPI _STA check");
-+	}
-+
-+	return 0;
-+
-+err_acpi_init:
-+	return err;
-+}
-+
-+/* =============================  */
-+/* White Keyboard Backlight       */
-+/* ============================   */
-+// In style of ideapad-driver and with code modified from ideapad-driver.
-+
-+static enum led_brightness
-+legion_kbd_bl_led_cdev_brightness_get(struct led_classdev *led_cdev)
-+{
-+	struct legion_private *priv =
-+		container_of(led_cdev, struct legion_private, kbd_bl.led);
-+
-+	return legion_kbd_bl_brightness_get(priv);
-+}
-+
-+static int legion_kbd_bl_led_cdev_brightness_set(struct led_classdev *led_cdev,
-+						 enum led_brightness brightness)
-+{
-+	struct legion_private *priv =
-+		container_of(led_cdev, struct legion_private, kbd_bl.led);
-+
-+	return legion_kbd_bl_brightness_set(priv, brightness);
-+}
-+
-+static int legion_kbd_bl_init(struct legion_private *priv)
-+{
-+	int brightness, err;
-+
-+	if (WARN_ON(priv->kbd_bl.initialized)) {
-+		pr_info("Keyboard backlight already initialized\n");
-+		return -EEXIST;
-+	}
-+
-+	if (priv->conf->access_method_keyboard == ACCESS_METHOD_NO_ACCESS) {
-+		pr_info("Keyboard backlight handling disabled by this driver\n");
-+		return -ENODEV;
-+	}
-+
-+	brightness = legion_kbd_bl_brightness_get(priv);
-+	if (brightness < 0) {
-+		pr_info("Error reading keyboard brightness\n");
-+		return brightness;
-+	}
-+
-+	priv->kbd_bl.last_brightness = brightness;
-+
-+	// will be renamed to "platform::kbd_backlight_1" if it exists already
-+	priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
-+	priv->kbd_bl.led.max_brightness = 2;
-+	priv->kbd_bl.led.brightness_get = legion_kbd_bl_led_cdev_brightness_get;
-+	priv->kbd_bl.led.brightness_set_blocking =
-+		legion_kbd_bl_led_cdev_brightness_set;
-+	priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED;
-+
-+	err = led_classdev_register(&priv->platform_device->dev,
-+				    &priv->kbd_bl.led);
-+	if (err)
-+		return err;
-+
-+	priv->kbd_bl.initialized = true;
-+
-+	return 0;
-+}
-+
-+/**
-+ * Deinit keyboard backlight.
-+ *
-+ * Can also be called if init was not successful.
-+ *
-+ */
-+static void legion_kbd_bl_exit(struct legion_private *priv)
-+{
-+	if (!priv->kbd_bl.initialized)
-+		return;
-+
-+	priv->kbd_bl.initialized = false;
-+
-+	led_classdev_unregister(&priv->kbd_bl.led);
-+}
-+
-+/* =============================  */
-+/* Additional light driver        */
-+/* ============================   */
-+
-+static enum led_brightness
-+legion_wmi_cdev_brightness_get(struct led_classdev *led_cdev)
-+{
-+	struct legion_private *priv =
-+		container_of(led_cdev, struct legion_private, kbd_bl.led);
-+	struct light *light_ins = container_of(led_cdev, struct light, led);
-+
-+	return legion_wmi_light_get(priv, light_ins->light_id,
-+				    light_ins->lower_limit,
-+				    light_ins->upper_limit);
-+}
-+
-+static int legion_wmi_cdev_brightness_set(struct led_classdev *led_cdev,
-+					  enum led_brightness brightness)
-+{
-+	struct legion_private *priv =
-+		container_of(led_cdev, struct legion_private, kbd_bl.led);
-+	struct light *light_ins = container_of(led_cdev, struct light, led);
-+
-+	return legion_wmi_light_set(priv, light_ins->light_id,
-+				    light_ins->lower_limit,
-+				    light_ins->upper_limit, brightness);
-+}
-+
-+static int legion_light_init(struct legion_private *priv,
-+			     struct light *light_ins, u8 light_id,
-+			     u8 lower_limit, u8 upper_limit, const char *name)
-+{
-+	int brightness, err;
-+
-+	if (WARN_ON(light_ins->initialized)) {
-+		pr_info("Light already initialized for light: %u\n",
-+			light_ins->light_id);
-+		return -EEXIST;
-+	}
-+
-+	light_ins->light_id = light_id;
-+	light_ins->lower_limit = lower_limit;
-+	light_ins->upper_limit = upper_limit;
-+
-+	brightness = legion_wmi_light_get(priv, light_ins->light_id,
-+					  light_ins->lower_limit,
-+					  light_ins->upper_limit);
-+	if (brightness < 0) {
-+		pr_info("Error reading brightness for light: %u\n",
-+			light_ins->light_id);
-+		return brightness;
-+	}
-+
-+	light_ins->led.name = name;
-+	light_ins->led.max_brightness =
-+		light_ins->upper_limit - light_ins->lower_limit;
-+	light_ins->led.brightness_get = legion_wmi_cdev_brightness_get;
-+	light_ins->led.brightness_set_blocking = legion_wmi_cdev_brightness_set;
-+	light_ins->led.flags = LED_BRIGHT_HW_CHANGED;
-+
-+	err = led_classdev_register(&priv->platform_device->dev,
-+				    &light_ins->led);
-+	if (err)
-+		return err;
-+
-+	light_ins->initialized = true;
-+
-+	return 0;
-+}
-+
-+/**
-+ * Deinit light.
-+ *
-+ * Can also be called if init was not successful.
-+ *
-+ */
-+static void legion_light_exit(struct legion_private *priv,
-+			      struct light *light_ins)
-+{
-+	if (!light_ins->initialized)
-+		return;
-+
-+	light_ins->initialized = false;
-+
-+	led_classdev_unregister(&light_ins->led);
-+}
-+
-+/* =============================  */
-+/* Platform driver                */
-+/* ============================   */
-+
-+static int legion_add(struct platform_device *pdev)
-+{
-+	struct legion_private *priv;
-+	const struct dmi_system_id *dmi_sys;
-+	int err;
-+	u16 ec_read_id;
-+	bool skip_ec_id_check;
-+	bool is_ec_id_valid;
-+	bool is_denied = true;
-+	bool is_allowed = false;
-+	bool do_load_by_list = false;
-+	bool do_load = false;
-+	//struct legion_private *priv = dev_get_drvdata(&pdev->dev);
-+	dev_info(&pdev->dev, "legion_laptop platform driver probing\n");
-+
-+	dev_info(
-+		&pdev->dev,
-+		"Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n",
-+		dmi_get_system_info(DMI_SYS_VENDOR),
-+		dmi_get_system_info(DMI_PRODUCT_NAME),
-+		dmi_get_system_info(DMI_BIOS_VERSION));
-+
-+	// TODO: allocate?
-+	priv = &_priv;
-+	priv->platform_device = pdev;
-+	err = legion_shared_init(priv);
-+	if (err) {
-+		dev_info(&pdev->dev, "legion_laptop is forced to load.\n");
-+		goto err_legion_shared_init;
-+	}
-+	dev_set_drvdata(&pdev->dev, priv);
-+
-+	// TODO: remove
-+	pr_info("Read identifying information: DMI_SYS_VENDOR: %s; DMI_PRODUCT_NAME: %s; DMI_BIOS_VERSION:%s\n",
-+		dmi_get_system_info(DMI_SYS_VENDOR),
-+		dmi_get_system_info(DMI_PRODUCT_NAME),
-+		dmi_get_system_info(DMI_BIOS_VERSION));
-+
-+	dmi_sys = dmi_first_match(optimistic_allowlist);
-+	is_allowed = dmi_sys != NULL;
-+	is_denied = dmi_check_system(denylist);
-+	do_load_by_list = is_allowed && !is_denied;
-+	do_load = do_load_by_list || force;
-+
-+	dev_info(
-+		&pdev->dev,
-+		"is_denied: %d; is_allowed: %d; do_load_by_list: %d; do_load: %d\n",
-+		is_denied, is_allowed, do_load_by_list, do_load);
-+
-+	if (!(do_load)) {
-+		dev_info(
-+			&pdev->dev,
-+			"Module not usable for this laptop because it is not in allowlist. Notify the maintainer if you want to add your device or force load with param force.\n");
-+		err = -ENOMEM;
-+		goto err_model_mismtach;
-+	}
-+
-+	if (force)
-+		dev_info(&pdev->dev, "legion_laptop is forced to load.\n");
-+
-+	if (!do_load_by_list && do_load) {
-+		dev_info(
-+			&pdev->dev,
-+			"legion_laptop is forced to load and would otherwise not be loaded\n");
-+	}
-+
-+	// if forced and no module found, use config for first model
-+	if (dmi_sys == NULL)
-+		dmi_sys = &optimistic_allowlist[0];
-+	dev_info(&pdev->dev, "Using configuration for system: %s\n",
-+		 dmi_sys->ident);
-+
-+	priv->conf = dmi_sys->driver_data;
-+
-+	err = acpi_init(priv, ACPI_COMPANION(&pdev->dev));
-+	if (err) {
-+		dev_info(&pdev->dev, "Could not init ACPI access: %d\n", err);
-+		goto err_acpi_init;
-+	}
-+
-+	// TODO: remove; only used for reverse engineering
-+	pr_info("Creating RAM access to embedded controller\n");
-+	err = ecram_memoryio_init(&priv->ec_memoryio,
-+				  priv->conf->ramio_physical_start, 0,
-+				  priv->conf->ramio_size);
-+	if (err) {
-+		dev_info(
-+			&pdev->dev,
-+			"Could not init RAM access to embedded controller: %d\n",
-+			err);
-+		goto err_ecram_memoryio_init;
-+	}
-+
-+	err = ecram_init(&priv->ecram, priv->conf->memoryio_physical_ec_start,
-+			 priv->conf->memoryio_size);
-+	if (err) {
-+		dev_info(&pdev->dev,
-+			 "Could not init access to embedded controller: %d\n",
-+			 err);
-+		goto err_ecram_init;
-+	}
-+
-+	ec_read_id = read_ec_id(&priv->ecram, priv->conf);
-+	dev_info(&pdev->dev, "Read embedded controller ID 0x%x\n", ec_read_id);
-+	skip_ec_id_check = force || (!priv->conf->check_embedded_controller_id);
-+	is_ec_id_valid = skip_ec_id_check ||
-+			 (ec_read_id == priv->conf->embedded_controller_id);
-+	if (!is_ec_id_valid) {
-+		err = -ENOMEM;
-+		dev_info(&pdev->dev, "Expected EC chip id 0x%x but read 0x%x\n",
-+			 priv->conf->embedded_controller_id, ec_read_id);
-+		goto err_ecram_id;
-+	}
-+	if (skip_ec_id_check) {
-+		dev_info(&pdev->dev,
-+			 "Skipped checking embedded controller id\n");
-+	}
-+
-+	dev_info(&pdev->dev, "Creating debugfs interface\n");
-+	legion_debugfs_init(priv);
-+
-+	pr_info("Creating sysfs interface\n");
-+	err = legion_sysfs_init(priv);
-+	if (err) {
-+		dev_info(&pdev->dev, "Failed to create sysfs interface: %d\n",
-+			 err);
-+		goto err_sysfs_init;
-+	}
-+
-+	pr_info("Creating hwmon interface");
-+	err = legion_hwmon_init(priv);
-+	if (err) {
-+		dev_info(&pdev->dev, "Failed to create hwmon interface: %d\n",
-+			 err);
-+		goto err_hwmon_init;
-+	}
-+
-+	pr_info("Creating platform profile support\n");
-+	err = legion_platform_profile_init(priv);
-+	if (err) {
-+		dev_info(&pdev->dev, "Failed to create platform profile: %d\n",
-+			 err);
-+		goto err_platform_profile;
-+	}
-+
-+	pr_info("Init WMI driver support\n");
-+	err = legion_wmi_init();
-+	if (err) {
-+		dev_info(&pdev->dev, "Failed to init WMI driver: %d\n", err);
-+		goto err_wmi;
-+	}
-+
-+	pr_info("Init keyboard backlight LED driver\n");
-+	err = legion_kbd_bl_init(priv);
-+	if (err) {
-+		dev_info(
-+			&pdev->dev,
-+			"Failed to init keyboard backlight LED driver. Skipping ...\n");
-+	}
-+
-+	pr_info("Init Y-Logo LED driver\n");
-+	err = legion_light_init(priv, &priv->ylogo_light, LIGHT_ID_YLOGO, 0, 1,
-+				"platform::ylogo");
-+	if (err) {
-+		dev_info(&pdev->dev,
-+			 "Failed to init Y-Logo LED driver. Skipping ...\n");
-+	}
-+
-+	pr_info("Init IO-Port LED driver\n");
-+	err = legion_light_init(priv, &priv->iport_light, LIGHT_ID_IOPORT, 1, 2,
-+				"platform::ioport");
-+	if (err) {
-+		dev_info(&pdev->dev,
-+			 "Failed to init IO-Port LED driver. Skipping ...\n");
-+	}
-+
-+	dev_info(&pdev->dev, "legion_laptop loaded for this device\n");
-+	return 0;
-+
-+	// TODO: remove eventually
-+	legion_light_exit(priv, &priv->iport_light);
-+	legion_light_exit(priv, &priv->ylogo_light);
-+	legion_kbd_bl_exit(priv);
-+	legion_wmi_exit();
-+err_wmi:
-+	legion_platform_profile_exit(priv);
-+err_platform_profile:
-+	legion_hwmon_exit(priv);
-+err_hwmon_init:
-+	legion_sysfs_exit(priv);
-+err_sysfs_init:
-+	legion_debugfs_exit(priv);
-+err_ecram_id:
-+	ecram_exit(&priv->ecram);
-+err_ecram_init:
-+	ecram_memoryio_exit(&priv->ec_memoryio);
-+err_ecram_memoryio_init:
-+err_acpi_init:
-+	legion_shared_exit(priv);
-+err_legion_shared_init:
-+err_model_mismtach:
-+	dev_info(&pdev->dev, "legion_laptop not loaded for this device\n");
-+	return err;
-+}
-+
-+static int legion_remove(struct platform_device *pdev)
-+{
-+	struct legion_private *priv = dev_get_drvdata(&pdev->dev);
-+
-+	mutex_lock(&legion_shared_mutex);
-+	priv->loaded = false;
-+	mutex_unlock(&legion_shared_mutex);
-+
-+	legion_light_exit(priv, &priv->iport_light);
-+	legion_light_exit(priv, &priv->ylogo_light);
-+	legion_kbd_bl_exit(priv);
-+	// first unregister wmi, so toggling powermode does not
-+	// generate events anymore that even might be delayed
-+	legion_wmi_exit();
-+	legion_platform_profile_exit(priv);
-+
-+	// toggle power mode to load default setting from embedded controller
-+	// again
-+	toggle_powermode(priv);
-+
-+	legion_hwmon_exit(priv);
-+	legion_sysfs_exit(priv);
-+	legion_debugfs_exit(priv);
-+	ecram_exit(&priv->ecram);
-+	ecram_memoryio_exit(&priv->ec_memoryio);
-+	legion_shared_exit(priv);
-+
-+	pr_info("Legion platform unloaded\n");
-+	return 0;
-+}
-+
-+static int legion_resume(struct platform_device *pdev)
-+{
-+	//struct legion_private *priv = dev_get_drvdata(&pdev->dev);
-+	dev_info(&pdev->dev, "Resumed in legion-laptop\n");
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_PM_SLEEP
-+static int legion_pm_resume(struct device *dev)
-+{
-+	//struct legion_private *priv = dev_get_drvdata(dev);
-+	dev_info(dev, "Resumed PM in legion-laptop\n");
-+
-+	return 0;
-+}
-+#endif
-+static SIMPLE_DEV_PM_OPS(legion_pm, NULL, legion_pm_resume);
-+
-+// same as ideapad
-+static const struct acpi_device_id legion_device_ids[] = {
-+	// todo: change to "VPC2004", and also ACPI paths
-+	{ "PNP0C09", 0 },
-+	{ "", 0 },
-+};
-+MODULE_DEVICE_TABLE(acpi, legion_device_ids);
-+
-+static struct platform_driver legion_driver = {
-+	.probe = legion_add,
-+	.remove = legion_remove,
-+	.resume = legion_resume,
-+	.driver = {
-+		.name   = "legion",
-+		.pm     = &legion_pm,
-+		.acpi_match_table = ACPI_PTR(legion_device_ids),
-+	},
-+};
-+
-+static int __init legion_init(void)
-+{
-+	int err;
-+
-+	pr_info("Loading legion_laptop\n");
-+	err = platform_driver_register(&legion_driver);
-+	if (err) {
-+		pr_info("legion_laptop: platform_driver_register failed\n");
-+		return err;
-+	}
-+
-+	return 0;
-+}
-+
-+module_init(legion_init);
-+
-+static void __exit legion_exit(void)
-+{
-+	platform_driver_unregister(&legion_driver);
-+	pr_info("legion_laptop exit\n");
-+}
-+
-+module_exit(legion_exit);
--- 
-2.43.2
diff --git a/patches/nobara/linux-surface.patch b/patches/nobara/linux-surface.patch
deleted file mode 100644
index 3378feb..0000000
--- a/patches/nobara/linux-surface.patch
+++ /dev/null
@@ -1,9117 +0,0 @@
-From da55b6ffe4a98a4af6ced4074317ba9d026f84dd Mon Sep 17 00:00:00 2001
-From: Tsuchiya Yuto <kitakar@gmail.com>
-Date: Sun, 18 Oct 2020 16:42:44 +0900
-Subject: [PATCH] (surface3-oemb) add DMI matches for Surface 3 with broken DMI
- table
-
-On some Surface 3, the DMI table gets corrupted for unknown reasons
-and breaks existing DMI matching used for device-specific quirks.
-
-This commit adds the (broken) DMI data into dmi_system_id tables used
-for quirks so that each driver can enable quirks even on the affected
-systems.
-
-On affected systems, DMI data will look like this:
-    $ grep . /sys/devices/virtual/dmi/id/{bios_vendor,board_name,board_vendor,\
-    chassis_vendor,product_name,sys_vendor}
-    /sys/devices/virtual/dmi/id/bios_vendor:American Megatrends Inc.
-    /sys/devices/virtual/dmi/id/board_name:OEMB
-    /sys/devices/virtual/dmi/id/board_vendor:OEMB
-    /sys/devices/virtual/dmi/id/chassis_vendor:OEMB
-    /sys/devices/virtual/dmi/id/product_name:OEMB
-    /sys/devices/virtual/dmi/id/sys_vendor:OEMB
-
-Expected:
-    $ grep . /sys/devices/virtual/dmi/id/{bios_vendor,board_name,board_vendor,\
-    chassis_vendor,product_name,sys_vendor}
-    /sys/devices/virtual/dmi/id/bios_vendor:American Megatrends Inc.
-    /sys/devices/virtual/dmi/id/board_name:Surface 3
-    /sys/devices/virtual/dmi/id/board_vendor:Microsoft Corporation
-    /sys/devices/virtual/dmi/id/chassis_vendor:Microsoft Corporation
-    /sys/devices/virtual/dmi/id/product_name:Surface 3
-    /sys/devices/virtual/dmi/id/sys_vendor:Microsoft Corporation
-
-Signed-off-by: Tsuchiya Yuto <kitakar@gmail.com>
-Patchset: surface3-oemb
----
- drivers/platform/surface/surface3-wmi.c           | 7 +++++++
- sound/soc/codecs/rt5645.c                         | 9 +++++++++
- sound/soc/intel/common/soc-acpi-intel-cht-match.c | 8 ++++++++
- 3 files changed, 24 insertions(+)
-
-diff --git a/drivers/platform/surface/surface3-wmi.c b/drivers/platform/surface/surface3-wmi.c
-index ca4602bcc7dea..490b9731068ae 100644
---- a/drivers/platform/surface/surface3-wmi.c
-+++ b/drivers/platform/surface/surface3-wmi.c
-@@ -37,6 +37,13 @@ static const struct dmi_system_id surface3_dmi_table[] = {
- 			DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"),
- 		},
- 	},
-+	{
-+		.matches = {
-+			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-+			DMI_MATCH(DMI_SYS_VENDOR, "OEMB"),
-+			DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"),
-+		},
-+	},
- #endif
- 	{ }
- };
-diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
-index 7938b52d741d8..2d5f83b0cdb0b 100644
---- a/sound/soc/codecs/rt5645.c
-+++ b/sound/soc/codecs/rt5645.c
-@@ -3746,6 +3746,15 @@ static const struct dmi_system_id dmi_platform_data[] = {
- 		},
- 		.driver_data = (void *)&intel_braswell_platform_data,
- 	},
-+	{
-+		.ident = "Microsoft Surface 3",
-+		.matches = {
-+			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-+			DMI_MATCH(DMI_SYS_VENDOR, "OEMB"),
-+			DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"),
-+		},
-+		.driver_data = (void *)&intel_braswell_platform_data,
-+	},
- 	{
- 		/*
- 		 * Match for the GPDwin which unfortunately uses somewhat
-diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c
-index cdcbf04b8832f..958305779b125 100644
---- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c
-+++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c
-@@ -27,6 +27,14 @@ static const struct dmi_system_id cht_table[] = {
- 			DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"),
- 		},
- 	},
-+	{
-+		.callback = cht_surface_quirk_cb,
-+		.matches = {
-+			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
-+			DMI_MATCH(DMI_SYS_VENDOR, "OEMB"),
-+			DMI_MATCH(DMI_PRODUCT_NAME, "OEMB"),
-+		},
-+	},
- 	{ }
- };
- 
--- 
-2.42.0
-
-From 35b3c5195c9fc191de6b5a6e4361762aa37edad2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
-Date: Tue, 3 Nov 2020 13:28:04 +0100
-Subject: [PATCH] mwifiex: Add quirk resetting the PCI bridge on MS Surface
- devices
-
-The most recent firmware of the 88W8897 card reports a hardcoded LTR
-value to the system during initialization, probably as an (unsuccessful)
-attempt of the developers to fix firmware crashes. This LTR value
-prevents most of the Microsoft Surface devices from entering deep
-powersaving states (either platform C-State 10 or S0ix state), because
-the exit latency of that state would be higher than what the card can
-tolerate.
-
-Turns out the card works just the same (including the firmware crashes)
-no matter if that hardcoded LTR value is reported or not, so it's kind
-of useless and only prevents us from saving power.
-
-To get rid of those hardcoded LTR reports, it's possible to reset the
-PCI bridge device after initializing the cards firmware. I'm not exactly
-sure why that works, maybe the power management subsystem of the PCH
-resets its stored LTR values when doing a function level reset of the
-bridge device. Doing the reset once after starting the wifi firmware
-works very well, probably because the firmware only reports that LTR
-value a single time during firmware startup.
-
-Patchset: mwifiex
----
- drivers/net/wireless/marvell/mwifiex/pcie.c   | 12 +++++++++
- .../wireless/marvell/mwifiex/pcie_quirks.c    | 26 +++++++++++++------
- .../wireless/marvell/mwifiex/pcie_quirks.h    |  1 +
- 3 files changed, 31 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
-index 6697132ecc977..f06b4ebc5bd8e 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie.c
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
-@@ -1771,9 +1771,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb)
- static int mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter)
- {
- 	struct pcie_service_card *card = adapter->card;
-+	struct pci_dev *pdev = card->dev;
-+	struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
- 	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
- 	int tx_wrap = card->txbd_wrptr & reg->tx_wrap_mask;
- 
-+	/* Trigger a function level reset of the PCI bridge device, this makes
-+	 * the firmware of PCIe 88W8897 cards stop reporting a fixed LTR value
-+	 * that prevents the system from entering package C10 and S0ix powersaving
-+	 * states.
-+	 * We need to do it here because it must happen after firmware
-+	 * initialization and this function is called after that is done.
-+	 */
-+	if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE)
-+		pci_reset_function(parent_pdev);
-+
- 	/* Write the RX ring read pointer in to reg->rx_rdptr */
- 	if (mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr |
- 			      tx_wrap)) {
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-index dd6d21f1dbfd7..f46b06f8d6435 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-@@ -13,7 +13,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Pro 5",
-@@ -22,7 +23,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Pro 5 (LTE)",
-@@ -31,7 +33,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Pro 6",
-@@ -39,7 +42,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Book 1",
-@@ -47,7 +51,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Book 2",
-@@ -55,7 +60,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Laptop 1",
-@@ -63,7 +69,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{
- 		.ident = "Surface Laptop 2",
-@@ -71,7 +78,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"),
- 		},
--		.driver_data = (void *)QUIRK_FW_RST_D3COLD,
-+		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
-+					QUIRK_DO_FLR_ON_BRIDGE),
- 	},
- 	{}
- };
-@@ -89,6 +97,8 @@ void mwifiex_initialize_quirks(struct pcie_service_card *card)
- 		dev_info(&pdev->dev, "no quirks enabled\n");
- 	if (card->quirks & QUIRK_FW_RST_D3COLD)
- 		dev_info(&pdev->dev, "quirk reset_d3cold enabled\n");
-+	if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE)
-+		dev_info(&pdev->dev, "quirk do_flr_on_bridge enabled\n");
- }
- 
- static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-index d6ff964aec5bf..5d30ae39d65ec 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-@@ -4,6 +4,7 @@
- #include "pcie.h"
- 
- #define QUIRK_FW_RST_D3COLD	BIT(0)
-+#define QUIRK_DO_FLR_ON_BRIDGE	BIT(1)
- 
- void mwifiex_initialize_quirks(struct pcie_service_card *card);
- int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
--- 
-2.42.0
-
-From 241da24644ea2f5b8119019448b638aa8df6ab26 Mon Sep 17 00:00:00 2001
-From: Tsuchiya Yuto <kitakar@gmail.com>
-Date: Sun, 4 Oct 2020 00:11:49 +0900
-Subject: [PATCH] mwifiex: pcie: disable bridge_d3 for Surface gen4+
-
-Currently, mwifiex fw will crash after suspend on recent kernel series.
-On Windows, it seems that the root port of wifi will never enter D3 state
-(stay on D0 state). And on Linux, disabling the D3 state for the
-bridge fixes fw crashing after suspend.
-
-This commit disables the D3 state of root port on driver initialization
-and fixes fw crashing after suspend.
-
-Signed-off-by: Tsuchiya Yuto <kitakar@gmail.com>
-Patchset: mwifiex
----
- drivers/net/wireless/marvell/mwifiex/pcie.c   |  7 +++++
- .../wireless/marvell/mwifiex/pcie_quirks.c    | 27 +++++++++++++------
- .../wireless/marvell/mwifiex/pcie_quirks.h    |  1 +
- 3 files changed, 27 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
-index f06b4ebc5bd8e..07f13b52ddb92 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie.c
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
-@@ -370,6 +370,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
- 					const struct pci_device_id *ent)
- {
- 	struct pcie_service_card *card;
-+	struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
- 	int ret;
- 
- 	pr_debug("info: vendor=0x%4.04X device=0x%4.04X rev=%d\n",
-@@ -411,6 +412,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
- 		return -1;
- 	}
- 
-+	/* disable bridge_d3 for Surface gen4+ devices to fix fw crashing
-+	 * after suspend
-+	 */
-+	if (card->quirks & QUIRK_NO_BRIDGE_D3)
-+		parent_pdev->bridge_d3 = false;
-+
- 	return 0;
- }
- 
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-index f46b06f8d6435..99b024ecbadea 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-@@ -14,7 +14,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Pro 5",
-@@ -24,7 +25,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Pro 5 (LTE)",
-@@ -34,7 +36,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Pro 6",
-@@ -43,7 +46,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Book 1",
-@@ -52,7 +56,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Book 2",
-@@ -61,7 +66,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Laptop 1",
-@@ -70,7 +76,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{
- 		.ident = "Surface Laptop 2",
-@@ -79,7 +86,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
- 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"),
- 		},
- 		.driver_data = (void *)(QUIRK_FW_RST_D3COLD |
--					QUIRK_DO_FLR_ON_BRIDGE),
-+					QUIRK_DO_FLR_ON_BRIDGE |
-+					QUIRK_NO_BRIDGE_D3),
- 	},
- 	{}
- };
-@@ -99,6 +107,9 @@ void mwifiex_initialize_quirks(struct pcie_service_card *card)
- 		dev_info(&pdev->dev, "quirk reset_d3cold enabled\n");
- 	if (card->quirks & QUIRK_DO_FLR_ON_BRIDGE)
- 		dev_info(&pdev->dev, "quirk do_flr_on_bridge enabled\n");
-+	if (card->quirks & QUIRK_NO_BRIDGE_D3)
-+		dev_info(&pdev->dev,
-+			 "quirk no_brigde_d3 enabled\n");
- }
- 
- static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
-diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-index 5d30ae39d65ec..c14eb56eb9118 100644
---- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-@@ -5,6 +5,7 @@
- 
- #define QUIRK_FW_RST_D3COLD	BIT(0)
- #define QUIRK_DO_FLR_ON_BRIDGE	BIT(1)
-+#define QUIRK_NO_BRIDGE_D3	BIT(2)
- 
- void mwifiex_initialize_quirks(struct pcie_service_card *card);
- int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
--- 
-2.42.0
-
-From d20b58f9e2ccec57c66864e79c291c2618ab2dbe Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
-Date: Thu, 25 Mar 2021 11:33:02 +0100
-Subject: [PATCH] Bluetooth: btusb: Lower passive lescan interval on Marvell
- 88W8897
-
-The Marvell 88W8897 combined wifi and bluetooth card (pcie+usb version)
-is used in a lot of Microsoft Surface devices, and all those devices
-suffer from very low 2.4GHz wifi connection speeds while bluetooth is
-enabled. The reason for that is that the default passive scanning
-interval for Bluetooth Low Energy devices is quite high in Linux
-(interval of 60 msec and scan window of 30 msec, see hci_core.c), and
-the Marvell chip is known for its bad bt+wifi coexisting performance.
-
-So decrease that passive scan interval and make the scan window shorter
-on this particular device to allow for spending more time transmitting
-wifi signals: The new scan interval is 250 msec (0x190 * 0.625 msec) and
-the new scan window is 6.25 msec (0xa * 0,625 msec).
-
-This change has a very large impact on the 2.4GHz wifi speeds and gets
-it up to performance comparable with the Windows driver, which seems to
-apply a similar quirk.
-
-The interval and window length were tested and found to work very well
-with a lot of Bluetooth Low Energy devices, including the Surface Pen, a
-Bluetooth Speaker and two modern Bluetooth headphones. All devices were
-discovered immediately after turning them on. Even lower values were
-also tested, but they introduced longer delays until devices get
-discovered.
-
-Patchset: mwifiex
----
- drivers/bluetooth/btusb.c | 15 +++++++++++++++
- 1 file changed, 15 insertions(+)
-
-diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
-index 499f4809fcdf3..2d442e080ca28 100644
---- a/drivers/bluetooth/btusb.c
-+++ b/drivers/bluetooth/btusb.c
-@@ -65,6 +65,7 @@ static struct usb_driver btusb_driver;
- #define BTUSB_INTEL_BROKEN_INITIAL_NCMD BIT(25)
- #define BTUSB_INTEL_NO_WBS_SUPPORT	BIT(26)
- #define BTUSB_ACTIONS_SEMI		BIT(27)
-+#define BTUSB_LOWER_LESCAN_INTERVAL	BIT(28)
- 
- static const struct usb_device_id btusb_table[] = {
- 	/* Generic Bluetooth USB device */
-@@ -468,6 +469,7 @@ static const struct usb_device_id quirks_table[] = {
- 	{ USB_DEVICE(0x1286, 0x2044), .driver_info = BTUSB_MARVELL },
- 	{ USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL },
- 	{ USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL },
-+	{ USB_DEVICE(0x1286, 0x204c), .driver_info = BTUSB_LOWER_LESCAN_INTERVAL },
- 
- 	/* Intel Bluetooth devices */
- 	{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED },
-@@ -4388,6 +4390,19 @@ static int btusb_probe(struct usb_interface *intf,
- 	if (id->driver_info & BTUSB_MARVELL)
- 		hdev->set_bdaddr = btusb_set_bdaddr_marvell;
- 
-+	/* The Marvell 88W8897 combined wifi and bluetooth card is known for
-+	 * very bad bt+wifi coexisting performance.
-+	 *
-+	 * Decrease the passive BT Low Energy scan interval a bit
-+	 * (0x0190 * 0.625 msec = 250 msec) and make the scan window shorter
-+	 * (0x000a * 0,625 msec = 6.25 msec). This allows for significantly
-+	 * higher wifi throughput while passively scanning for BT LE devices.
-+	 */
-+	if (id->driver_info & BTUSB_LOWER_LESCAN_INTERVAL) {
-+		hdev->le_scan_interval = 0x0190;
-+		hdev->le_scan_window = 0x000a;
-+	}
-+
- 	if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) &&
- 	    (id->driver_info & BTUSB_MEDIATEK)) {
- 		hdev->setup = btusb_mtk_setup;
--- 
-2.42.0
-
-From c6f0985fae241ed43ea1245c9e5861e2c728e21e Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sat, 27 Feb 2021 00:45:52 +0100
-Subject: [PATCH] ath10k: Add module parameters to override board files
-
-Some Surface devices, specifically the Surface Go and AMD version of the
-Surface Laptop 3 (wich both come with QCA6174 WiFi chips), work better
-with a different board file, as it seems that the firmeware included
-upstream is buggy.
-
-As it is generally not a good idea to randomly overwrite files, let
-alone doing so via packages, we add module parameters to override those
-file names in the driver. This allows us to package/deploy the override
-via a modprobe.d config.
-
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: ath10k
----
- drivers/net/wireless/ath/ath10k/core.c | 58 ++++++++++++++++++++++++++
- 1 file changed, 58 insertions(+)
-
-diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
-index 6cdb225b7eacc..19c036751fb16 100644
---- a/drivers/net/wireless/ath/ath10k/core.c
-+++ b/drivers/net/wireless/ath/ath10k/core.c
-@@ -38,6 +38,9 @@ static bool fw_diag_log;
- /* frame mode values are mapped as per enum ath10k_hw_txrx_mode */
- unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI;
- 
-+static char *override_board = "";
-+static char *override_board2 = "";
-+
- unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) |
- 				     BIT(ATH10K_FW_CRASH_DUMP_CE_DATA);
- 
-@@ -50,6 +53,9 @@ module_param(fw_diag_log, bool, 0644);
- module_param_named(frame_mode, ath10k_frame_mode, uint, 0644);
- module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444);
- 
-+module_param(override_board, charp, 0644);
-+module_param(override_board2, charp, 0644);
-+
- MODULE_PARM_DESC(debug_mask, "Debugging mask");
- MODULE_PARM_DESC(uart_print, "Uart target debugging");
- MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode");
-@@ -59,6 +65,9 @@ MODULE_PARM_DESC(frame_mode,
- MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file");
- MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging");
- 
-+MODULE_PARM_DESC(override_board, "Override for board.bin file");
-+MODULE_PARM_DESC(override_board2, "Override for board-2.bin file");
-+
- static const struct ath10k_hw_params ath10k_hw_params_list[] = {
- 	{
- 		.id = QCA988X_HW_2_0_VERSION,
-@@ -911,6 +920,42 @@ static int ath10k_init_configure_target(struct ath10k *ar)
- 	return 0;
- }
- 
-+static const char *ath10k_override_board_fw_file(struct ath10k *ar,
-+						 const char *file)
-+{
-+	if (strcmp(file, "board.bin") == 0) {
-+		if (strcmp(override_board, "") == 0)
-+			return file;
-+
-+		if (strcmp(override_board, "none") == 0) {
-+			dev_info(ar->dev, "firmware override: pretending 'board.bin' does not exist\n");
-+			return NULL;
-+		}
-+
-+		dev_info(ar->dev, "firmware override: replacing 'board.bin' with '%s'\n",
-+			 override_board);
-+
-+		return override_board;
-+	}
-+
-+	if (strcmp(file, "board-2.bin") == 0) {
-+		if (strcmp(override_board2, "") == 0)
-+			return file;
-+
-+		if (strcmp(override_board2, "none") == 0) {
-+			dev_info(ar->dev, "firmware override: pretending 'board-2.bin' does not exist\n");
-+			return NULL;
-+		}
-+
-+		dev_info(ar->dev, "firmware override: replacing 'board-2.bin' with '%s'\n",
-+			 override_board2);
-+
-+		return override_board2;
-+	}
-+
-+	return file;
-+}
-+
- static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar,
- 						   const char *dir,
- 						   const char *file)
-@@ -925,6 +970,19 @@ static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar,
- 	if (dir == NULL)
- 		dir = ".";
- 
-+	/* HACK: Override board.bin and board-2.bin files if specified.
-+	 *
-+	 * Some Surface devices perform better with a different board
-+	 * configuration. To this end, one would need to replace the board.bin
-+	 * file with the modified config and remove the board-2.bin file.
-+	 * Unfortunately, that's not a solution that we can easily package. So
-+	 * we add module options to perform these overrides here.
-+	 */
-+
-+	file = ath10k_override_board_fw_file(ar, file);
-+	if (!file)
-+		return ERR_PTR(-ENOENT);
-+
- 	snprintf(filename, sizeof(filename), "%s/%s", dir, file);
- 	ret = firmware_request_nowarn(&fw, filename, ar->dev);
- 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n",
--- 
-2.42.0
-
-From 986fe56f682f93925b2964f59fe78c7043758e47 Mon Sep 17 00:00:00 2001
-From: Dorian Stoll <dorian.stoll@tmsp.io>
-Date: Thu, 30 Jul 2020 13:21:53 +0200
-Subject: [PATCH] misc: mei: Add missing IPTS device IDs
-
-Patchset: ipts
----
- drivers/misc/mei/hw-me-regs.h | 1 +
- drivers/misc/mei/pci-me.c     | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
-index bdc65d50b945f..08723c01d7275 100644
---- a/drivers/misc/mei/hw-me-regs.h
-+++ b/drivers/misc/mei/hw-me-regs.h
-@@ -92,6 +92,7 @@
- #define MEI_DEV_ID_CDF        0x18D3  /* Cedar Fork */
- 
- #define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
-+#define MEI_DEV_ID_ICP_LP_3   0x34E4  /* Ice Lake Point LP 3 (iTouch) */
- #define MEI_DEV_ID_ICP_N      0x38E0  /* Ice Lake Point N */
- 
- #define MEI_DEV_ID_JSP_N      0x4DE0  /* Jasper Lake Point N */
-diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
-index 676d566f38ddf..6b37dd1f8b2a3 100644
---- a/drivers/misc/mei/pci-me.c
-+++ b/drivers/misc/mei/pci-me.c
-@@ -97,6 +97,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
- 	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)},
- 
- 	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
-+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP_3, MEI_ME_PCH12_CFG)},
- 	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)},
- 
- 	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)},
--- 
-2.42.0
-
-From 72ee1cbf26ccc575dbfbaee5e7305ab13e1aeb1e Mon Sep 17 00:00:00 2001
-From: Liban Hannan <liban.p@gmail.com>
-Date: Tue, 12 Apr 2022 23:31:12 +0100
-Subject: [PATCH] iommu: ipts: use IOMMU passthrough mode for IPTS
-
-Adds a quirk so that IOMMU uses passthrough mode for the IPTS device.
-Otherwise, when IOMMU is enabled, IPTS produces DMAR errors like:
-
-DMAR: [DMA Read NO_PASID] Request device [00:16.4] fault addr
-0x104ea3000 [fault reason 0x06] PTE Read access is not set
-
-This is very similar to the bug described at:
-https://bugs.launchpad.net/bugs/1958004
-
-Fixed with the following patch which this patch basically copies:
-https://launchpadlibrarian.net/586396847/43255ca.diff
-Patchset: ipts
----
- drivers/iommu/intel/iommu.c | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 3685ba90ec88e..5a627e081797c 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -38,6 +38,8 @@
- #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
- #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
- #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
-+#define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL &&	\
-+			    ((pdev)->device == 0x9d3e))
- #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
- 
- #define IOAPIC_RANGE_START	(0xfee00000)
-@@ -292,12 +294,14 @@ int intel_iommu_enabled = 0;
- EXPORT_SYMBOL_GPL(intel_iommu_enabled);
- 
- static int dmar_map_gfx = 1;
-+static int dmar_map_ipts = 1;
- static int intel_iommu_superpage = 1;
- static int iommu_identity_mapping;
- static int iommu_skip_te_disable;
- 
- #define IDENTMAP_GFX		2
- #define IDENTMAP_AZALIA		4
-+#define IDENTMAP_IPTS		16
- 
- const struct iommu_ops intel_iommu_ops;
- 
-@@ -2542,6 +2546,9 @@ static int device_def_domain_type(struct device *dev)
- 
- 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
- 			return IOMMU_DOMAIN_IDENTITY;
-+
-+		if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev))
-+			return IOMMU_DOMAIN_IDENTITY;
- 	}
- 
- 	return 0;
-@@ -2849,6 +2856,9 @@ static int __init init_dmars(void)
- 	if (!dmar_map_gfx)
- 		iommu_identity_mapping |= IDENTMAP_GFX;
- 
-+	if (!dmar_map_ipts)
-+		iommu_identity_mapping |= IDENTMAP_IPTS;
-+
- 	check_tylersburg_isoch();
- 
- 	ret = si_domain_init(hw_pass_through);
-@@ -4828,6 +4838,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
- 	dmar_map_gfx = 0;
- }
- 
-+static void quirk_iommu_ipts(struct pci_dev *dev)
-+{
-+	if (!IS_IPTS(dev))
-+		return;
-+
-+	if (risky_device(dev))
-+		return;
-+
-+	pci_info(dev, "Passthrough IOMMU for IPTS\n");
-+	dmar_map_ipts = 0;
-+}
- /* G4x/GM45 integrated gfx dmar support is totally busted. */
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
-@@ -4863,6 +4884,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
- 
-+/* disable IPTS dmar support */
-+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts);
-+
- static void quirk_iommu_rwbf(struct pci_dev *dev)
- {
- 	if (risky_device(dev))
--- 
-2.42.0
-
-From 8330f9f39ce8c9796259a8aeffe919fa950e18f5 Mon Sep 17 00:00:00 2001
-From: Dorian Stoll <dorian.stoll@tmsp.io>
-Date: Sun, 11 Dec 2022 12:00:59 +0100
-Subject: [PATCH] hid: Add support for Intel Precise Touch and Stylus
-
-Based on linux-surface/intel-precise-touch@8abe268
-
-Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
-Patchset: ipts
----
- drivers/hid/Kconfig            |   2 +
- drivers/hid/Makefile           |   2 +
- drivers/hid/ipts/Kconfig       |  14 +
- drivers/hid/ipts/Makefile      |  16 ++
- drivers/hid/ipts/cmd.c         |  61 +++++
- drivers/hid/ipts/cmd.h         |  60 ++++
- drivers/hid/ipts/context.h     |  52 ++++
- drivers/hid/ipts/control.c     | 486 +++++++++++++++++++++++++++++++++
- drivers/hid/ipts/control.h     | 126 +++++++++
- drivers/hid/ipts/desc.h        |  80 ++++++
- drivers/hid/ipts/eds1.c        | 103 +++++++
- drivers/hid/ipts/eds1.h        |  35 +++
- drivers/hid/ipts/eds2.c        | 144 ++++++++++
- drivers/hid/ipts/eds2.h        |  35 +++
- drivers/hid/ipts/hid.c         | 225 +++++++++++++++
- drivers/hid/ipts/hid.h         |  24 ++
- drivers/hid/ipts/main.c        | 126 +++++++++
- drivers/hid/ipts/mei.c         | 188 +++++++++++++
- drivers/hid/ipts/mei.h         |  66 +++++
- drivers/hid/ipts/receiver.c    | 250 +++++++++++++++++
- drivers/hid/ipts/receiver.h    |  16 ++
- drivers/hid/ipts/resources.c   | 131 +++++++++
- drivers/hid/ipts/resources.h   |  41 +++
- drivers/hid/ipts/spec-data.h   | 100 +++++++
- drivers/hid/ipts/spec-device.h | 290 ++++++++++++++++++++
- drivers/hid/ipts/spec-hid.h    |  34 +++
- drivers/hid/ipts/thread.c      |  84 ++++++
- drivers/hid/ipts/thread.h      |  59 ++++
- 28 files changed, 2850 insertions(+)
- create mode 100644 drivers/hid/ipts/Kconfig
- create mode 100644 drivers/hid/ipts/Makefile
- create mode 100644 drivers/hid/ipts/cmd.c
- create mode 100644 drivers/hid/ipts/cmd.h
- create mode 100644 drivers/hid/ipts/context.h
- create mode 100644 drivers/hid/ipts/control.c
- create mode 100644 drivers/hid/ipts/control.h
- create mode 100644 drivers/hid/ipts/desc.h
- create mode 100644 drivers/hid/ipts/eds1.c
- create mode 100644 drivers/hid/ipts/eds1.h
- create mode 100644 drivers/hid/ipts/eds2.c
- create mode 100644 drivers/hid/ipts/eds2.h
- create mode 100644 drivers/hid/ipts/hid.c
- create mode 100644 drivers/hid/ipts/hid.h
- create mode 100644 drivers/hid/ipts/main.c
- create mode 100644 drivers/hid/ipts/mei.c
- create mode 100644 drivers/hid/ipts/mei.h
- create mode 100644 drivers/hid/ipts/receiver.c
- create mode 100644 drivers/hid/ipts/receiver.h
- create mode 100644 drivers/hid/ipts/resources.c
- create mode 100644 drivers/hid/ipts/resources.h
- create mode 100644 drivers/hid/ipts/spec-data.h
- create mode 100644 drivers/hid/ipts/spec-device.h
- create mode 100644 drivers/hid/ipts/spec-hid.h
- create mode 100644 drivers/hid/ipts/thread.c
- create mode 100644 drivers/hid/ipts/thread.h
-
-diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
-index 790aa908e2a78..0b9d245d10e54 100644
---- a/drivers/hid/Kconfig
-+++ b/drivers/hid/Kconfig
-@@ -1345,4 +1345,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig"
- 
- source "drivers/hid/surface-hid/Kconfig"
- 
-+source "drivers/hid/ipts/Kconfig"
-+
- endif # HID_SUPPORT
-diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
-index 8a06d0f840bcb..2ef21b257d0b5 100644
---- a/drivers/hid/Makefile
-+++ b/drivers/hid/Makefile
-@@ -169,3 +169,5 @@ obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER)	+= intel-ish-hid/
- obj-$(CONFIG_AMD_SFH_HID)       += amd-sfh-hid/
- 
- obj-$(CONFIG_SURFACE_HID_CORE)  += surface-hid/
-+
-+obj-$(CONFIG_HID_IPTS)          += ipts/
-diff --git a/drivers/hid/ipts/Kconfig b/drivers/hid/ipts/Kconfig
-new file mode 100644
-index 0000000000000..297401bd388dd
---- /dev/null
-+++ b/drivers/hid/ipts/Kconfig
-@@ -0,0 +1,14 @@
-+# SPDX-License-Identifier: GPL-2.0-or-later
-+
-+config HID_IPTS
-+	tristate "Intel Precise Touch & Stylus"
-+	depends on INTEL_MEI
-+	depends on HID
-+	help
-+	  Say Y here if your system has a touchscreen using Intels
-+	  Precise Touch & Stylus (IPTS) technology.
-+
-+	  If unsure say N.
-+
-+	  To compile this driver as a module, choose M here: the
-+	  module will be called ipts.
-diff --git a/drivers/hid/ipts/Makefile b/drivers/hid/ipts/Makefile
-new file mode 100644
-index 0000000000000..883896f68e6ad
---- /dev/null
-+++ b/drivers/hid/ipts/Makefile
-@@ -0,0 +1,16 @@
-+# SPDX-License-Identifier: GPL-2.0-or-later
-+#
-+# Makefile for the IPTS touchscreen driver
-+#
-+
-+obj-$(CONFIG_HID_IPTS) += ipts.o
-+ipts-objs := cmd.o
-+ipts-objs += control.o
-+ipts-objs += eds1.o
-+ipts-objs += eds2.o
-+ipts-objs += hid.o
-+ipts-objs += main.o
-+ipts-objs += mei.o
-+ipts-objs += receiver.o
-+ipts-objs += resources.o
-+ipts-objs += thread.o
-diff --git a/drivers/hid/ipts/cmd.c b/drivers/hid/ipts/cmd.c
-new file mode 100644
-index 0000000000000..63a4934bbc5fa
---- /dev/null
-+++ b/drivers/hid/ipts/cmd.c
-@@ -0,0 +1,61 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/errno.h>
-+#include <linux/types.h>
-+
-+#include "cmd.h"
-+#include "context.h"
-+#include "mei.h"
-+#include "spec-device.h"
-+
-+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code,
-+			  struct ipts_response *rsp, u64 timeout)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!rsp)
-+		return -EFAULT;
-+
-+	/*
-+	 * In a response, the command code will have the most significant bit flipped to 1.
-+	 * If code is passed to ipts_mei_recv as is, no messages will be received.
-+	 */
-+	ret = ipts_mei_recv(&ipts->mei, code | IPTS_RSP_BIT, rsp, timeout);
-+	if (ret < 0)
-+		return ret;
-+
-+	dev_dbg(ipts->dev, "Received 0x%02X with status 0x%02X\n", code, rsp->status);
-+
-+	/*
-+	 * Some devices will always return this error.
-+	 * It is allowed to ignore it and to try continuing.
-+	 */
-+	if (rsp->status == IPTS_STATUS_COMPAT_CHECK_FAIL)
-+		rsp->status = IPTS_STATUS_SUCCESS;
-+
-+	return 0;
-+}
-+
-+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size)
-+{
-+	struct ipts_command cmd = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	cmd.cmd = code;
-+
-+	if (data && size > 0)
-+		memcpy(cmd.payload, data, size);
-+
-+	dev_dbg(ipts->dev, "Sending 0x%02X with %ld bytes payload\n", code, size);
-+	return ipts_mei_send(&ipts->mei, &cmd, sizeof(cmd.cmd) + size);
-+}
-diff --git a/drivers/hid/ipts/cmd.h b/drivers/hid/ipts/cmd.h
-new file mode 100644
-index 0000000000000..2b4079075b642
---- /dev/null
-+++ b/drivers/hid/ipts/cmd.h
-@@ -0,0 +1,60 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_CMD_H
-+#define IPTS_CMD_H
-+
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "spec-device.h"
-+
-+/*
-+ * The default timeout for receiving responses
-+ */
-+#define IPTS_CMD_DEFAULT_TIMEOUT 1000
-+
-+/**
-+ * ipts_cmd_recv_timeout() - Receives a response to a command.
-+ * @ipts: The IPTS driver context.
-+ * @code: The type of the command / response.
-+ * @rsp: The address that the received response will be copied to.
-+ * @timeout: How many milliseconds the function will wait at most.
-+ *
-+ * A negative timeout means to wait forever.
-+ *
-+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received.
-+ */
-+int ipts_cmd_recv_timeout(struct ipts_context *ipts, enum ipts_command_code code,
-+			  struct ipts_response *rsp, u64 timeout);
-+
-+/**
-+ * ipts_cmd_recv() - Receives a response to a command.
-+ * @ipts: The IPTS driver context.
-+ * @code: The type of the command / response.
-+ * @rsp: The address that the received response will be copied to.
-+ *
-+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received.
-+ */
-+static inline int ipts_cmd_recv(struct ipts_context *ipts, enum ipts_command_code code,
-+				struct ipts_response *rsp)
-+{
-+	return ipts_cmd_recv_timeout(ipts, code, rsp, IPTS_CMD_DEFAULT_TIMEOUT);
-+}
-+
-+/**
-+ * ipts_cmd_send() - Executes a command on the device.
-+ * @ipts: The IPTS driver context.
-+ * @code: The type of the command to execute.
-+ * @data: The payload containing parameters for the command.
-+ * @size: The size of the payload.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_cmd_send(struct ipts_context *ipts, enum ipts_command_code code, void *data, size_t size);
-+
-+#endif /* IPTS_CMD_H */
-diff --git a/drivers/hid/ipts/context.h b/drivers/hid/ipts/context.h
-new file mode 100644
-index 0000000000000..ba33259f1f7c5
---- /dev/null
-+++ b/drivers/hid/ipts/context.h
-@@ -0,0 +1,52 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_CONTEXT_H
-+#define IPTS_CONTEXT_H
-+
-+#include <linux/completion.h>
-+#include <linux/device.h>
-+#include <linux/hid.h>
-+#include <linux/mei_cl_bus.h>
-+#include <linux/mutex.h>
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+
-+#include "mei.h"
-+#include "resources.h"
-+#include "spec-device.h"
-+#include "thread.h"
-+
-+struct ipts_context {
-+	struct device *dev;
-+	struct ipts_mei mei;
-+
-+	enum ipts_mode mode;
-+
-+	/*
-+	 * Prevents concurrent GET_FEATURE reports.
-+	 */
-+	struct mutex feature_lock;
-+	struct completion feature_event;
-+
-+	/*
-+	 * These are not inside of struct ipts_resources
-+	 * because they don't own the memory they point to.
-+	 */
-+	struct ipts_buffer feature_report;
-+	struct ipts_buffer descriptor;
-+
-+	bool hid_active;
-+	struct hid_device *hid;
-+
-+	struct ipts_device_info info;
-+	struct ipts_resources resources;
-+
-+	struct ipts_thread receiver_loop;
-+};
-+
-+#endif /* IPTS_CONTEXT_H */
-diff --git a/drivers/hid/ipts/control.c b/drivers/hid/ipts/control.c
-new file mode 100644
-index 0000000000000..5360842d260ba
---- /dev/null
-+++ b/drivers/hid/ipts/control.c
-@@ -0,0 +1,486 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/delay.h>
-+#include <linux/dev_printk.h>
-+#include <linux/errno.h>
-+#include <linux/kernel.h>
-+#include <linux/kthread.h>
-+#include <linux/types.h>
-+
-+#include "cmd.h"
-+#include "context.h"
-+#include "control.h"
-+#include "desc.h"
-+#include "hid.h"
-+#include "receiver.h"
-+#include "resources.h"
-+#include "spec-data.h"
-+#include "spec-device.h"
-+
-+static int ipts_control_get_device_info(struct ipts_context *ipts, struct ipts_device_info *info)
-+{
-+	int ret = 0;
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!info)
-+		return -EFAULT;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DEVICE_INFO, NULL, 0);
-+	if (ret) {
-+		dev_err(ipts->dev, "GET_DEVICE_INFO: send failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DEVICE_INFO, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "GET_DEVICE_INFO: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "GET_DEVICE_INFO: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	memcpy(info, rsp.payload, sizeof(*info));
-+	return 0;
-+}
-+
-+static int ipts_control_set_mode(struct ipts_context *ipts, enum ipts_mode mode)
-+{
-+	int ret = 0;
-+	struct ipts_set_mode cmd = { 0 };
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	cmd.mode = mode;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MODE, &cmd, sizeof(cmd));
-+	if (ret) {
-+		dev_err(ipts->dev, "SET_MODE: send failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MODE, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "SET_MODE: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "SET_MODE: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	return 0;
-+}
-+
-+static int ipts_control_set_mem_window(struct ipts_context *ipts, struct ipts_resources *res)
-+{
-+	int i = 0;
-+	int ret = 0;
-+	struct ipts_mem_window cmd = { 0 };
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!res)
-+		return -EFAULT;
-+
-+	for (i = 0; i < IPTS_BUFFERS; i++) {
-+		cmd.data_addr_lower[i] = lower_32_bits(res->data[i].dma_address);
-+		cmd.data_addr_upper[i] = upper_32_bits(res->data[i].dma_address);
-+		cmd.feedback_addr_lower[i] = lower_32_bits(res->feedback[i].dma_address);
-+		cmd.feedback_addr_upper[i] = upper_32_bits(res->feedback[i].dma_address);
-+	}
-+
-+	cmd.workqueue_addr_lower = lower_32_bits(res->workqueue.dma_address);
-+	cmd.workqueue_addr_upper = upper_32_bits(res->workqueue.dma_address);
-+
-+	cmd.doorbell_addr_lower = lower_32_bits(res->doorbell.dma_address);
-+	cmd.doorbell_addr_upper = upper_32_bits(res->doorbell.dma_address);
-+
-+	cmd.hid2me_addr_lower = lower_32_bits(res->hid2me.dma_address);
-+	cmd.hid2me_addr_upper = upper_32_bits(res->hid2me.dma_address);
-+
-+	cmd.workqueue_size = IPTS_WORKQUEUE_SIZE;
-+	cmd.workqueue_item_size = IPTS_WORKQUEUE_ITEM_SIZE;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_SET_MEM_WINDOW, &cmd, sizeof(cmd));
-+	if (ret) {
-+		dev_err(ipts->dev, "SET_MEM_WINDOW: send failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_SET_MEM_WINDOW, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "SET_MEM_WINDOW: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "SET_MEM_WINDOW: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	return 0;
-+}
-+
-+static int ipts_control_get_descriptor(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+	struct ipts_data_header *header = NULL;
-+	struct ipts_get_descriptor cmd = { 0 };
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!ipts->resources.descriptor.address)
-+		return -EFAULT;
-+
-+	memset(ipts->resources.descriptor.address, 0, ipts->resources.descriptor.size);
-+
-+	cmd.addr_lower = lower_32_bits(ipts->resources.descriptor.dma_address);
-+	cmd.addr_upper = upper_32_bits(ipts->resources.descriptor.dma_address);
-+	cmd.magic = 8;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_GET_DESCRIPTOR, &cmd, sizeof(cmd));
-+	if (ret) {
-+		dev_err(ipts->dev, "GET_DESCRIPTOR: send failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_GET_DESCRIPTOR, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "GET_DESCRIPTOR: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "GET_DESCRIPTOR: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	header = (struct ipts_data_header *)ipts->resources.descriptor.address;
-+
-+	if (header->type == IPTS_DATA_TYPE_DESCRIPTOR) {
-+		ipts->descriptor.address = &header->data[8];
-+		ipts->descriptor.size = header->size - 8;
-+
-+		return 0;
-+	}
-+
-+	return -ENODATA;
-+}
-+
-+int ipts_control_request_flush(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+	struct ipts_quiesce_io cmd = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_QUIESCE_IO, &cmd, sizeof(cmd));
-+	if (ret)
-+		dev_err(ipts->dev, "QUIESCE_IO: send failed: %d\n", ret);
-+
-+	return ret;
-+}
-+
-+int ipts_control_wait_flush(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_QUIESCE_IO, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "QUIESCE_IO: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	if (rsp.status == IPTS_STATUS_TIMEOUT)
-+		return -EAGAIN;
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "QUIESCE_IO: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_control_request_data(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_READY_FOR_DATA, NULL, 0);
-+	if (ret)
-+		dev_err(ipts->dev, "READY_FOR_DATA: send failed: %d\n", ret);
-+
-+	return ret;
-+}
-+
-+int ipts_control_wait_data(struct ipts_context *ipts, bool shutdown)
-+{
-+	int ret = 0;
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!shutdown)
-+		ret = ipts_cmd_recv_timeout(ipts, IPTS_CMD_READY_FOR_DATA, &rsp, 0);
-+	else
-+		ret = ipts_cmd_recv(ipts, IPTS_CMD_READY_FOR_DATA, &rsp);
-+
-+	if (ret) {
-+		if (ret != -EAGAIN)
-+			dev_err(ipts->dev, "READY_FOR_DATA: recv failed: %d\n", ret);
-+
-+		return ret;
-+	}
-+
-+	/*
-+	 * During shutdown, it is possible that the sensor has already been disabled.
-+	 */
-+	if (rsp.status == IPTS_STATUS_SENSOR_DISABLED)
-+		return 0;
-+
-+	if (rsp.status == IPTS_STATUS_TIMEOUT)
-+		return -EAGAIN;
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "READY_FOR_DATA: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer)
-+{
-+	int ret = 0;
-+	struct ipts_feedback cmd = { 0 };
-+	struct ipts_response rsp = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	cmd.buffer = buffer;
-+
-+	ret = ipts_cmd_send(ipts, IPTS_CMD_FEEDBACK, &cmd, sizeof(cmd));
-+	if (ret) {
-+		dev_err(ipts->dev, "FEEDBACK: send failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_cmd_recv(ipts, IPTS_CMD_FEEDBACK, &rsp);
-+	if (ret) {
-+		dev_err(ipts->dev, "FEEDBACK: recv failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	/*
-+	 * We don't know what feedback data looks like so we are sending zeros.
-+	 * See also ipts_control_refill_buffer.
-+	 */
-+	if (rsp.status == IPTS_STATUS_INVALID_PARAMS)
-+		return 0;
-+
-+	if (rsp.status != IPTS_STATUS_SUCCESS) {
-+		dev_err(ipts->dev, "FEEDBACK: cmd failed: %d\n", rsp.status);
-+		return -EBADR;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd,
-+				 enum ipts_feedback_data_type type, void *data, size_t size)
-+{
-+	struct ipts_feedback_header *header = NULL;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!ipts->resources.hid2me.address)
-+		return -EFAULT;
-+
-+	memset(ipts->resources.hid2me.address, 0, ipts->resources.hid2me.size);
-+	header = (struct ipts_feedback_header *)ipts->resources.hid2me.address;
-+
-+	header->cmd_type = cmd;
-+	header->data_type = type;
-+	header->size = size;
-+	header->buffer = IPTS_HID2ME_BUFFER;
-+
-+	if (size + sizeof(*header) > ipts->resources.hid2me.size)
-+		return -EINVAL;
-+
-+	if (data && size > 0)
-+		memcpy(header->payload, data, size);
-+
-+	return ipts_control_send_feedback(ipts, IPTS_HID2ME_BUFFER);
-+}
-+
-+int ipts_control_start(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+	struct ipts_device_info info = { 0 };
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	dev_info(ipts->dev, "Starting IPTS\n");
-+
-+	ret = ipts_control_get_device_info(ipts, &info);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to get device info: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ipts->info = info;
-+
-+	ret = ipts_resources_init(&ipts->resources, ipts->dev, info.data_size, info.feedback_size);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to allocate buffers: %d", ret);
-+		return ret;
-+	}
-+
-+	dev_info(ipts->dev, "IPTS EDS Version: %d\n", info.intf_eds);
-+
-+	/*
-+	 * Handle newer devices
-+	 */
-+	if (info.intf_eds > 1) {
-+		/*
-+		 * Fetching the descriptor will only work on newer devices.
-+		 * For older devices, a fallback descriptor will be used.
-+		 */
-+		ret = ipts_control_get_descriptor(ipts);
-+		if (ret) {
-+			dev_err(ipts->dev, "Failed to fetch HID descriptor: %d\n", ret);
-+			return ret;
-+		}
-+
-+		/*
-+		 * Newer devices can be directly initialized in polling mode.
-+		 */
-+		ipts->mode = IPTS_MODE_POLL;
-+	}
-+
-+	ret = ipts_control_set_mode(ipts, ipts->mode);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to set mode: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_control_set_mem_window(ipts, &ipts->resources);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to set memory window: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_receiver_start(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to start receiver: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_control_request_data(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to request data: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ipts_hid_enable(ipts);
-+
-+	ret = ipts_hid_init(ipts, info);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to initialize HID device: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int _ipts_control_stop(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	ipts_hid_disable(ipts);
-+	dev_info(ipts->dev, "Stopping IPTS\n");
-+
-+	ret = ipts_receiver_stop(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to stop receiver: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_resources_free(&ipts->resources);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to free resources: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_control_stop(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	ret = _ipts_control_stop(ipts);
-+	if (ret)
-+		return ret;
-+
-+	ret = ipts_hid_free(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to free HID device: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_control_restart(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	ret = _ipts_control_stop(ipts);
-+	if (ret)
-+		return ret;
-+
-+	/*
-+	 * Wait a second to give the sensor time to fully shut down.
-+	 */
-+	msleep(1000);
-+
-+	ret = ipts_control_start(ipts);
-+	if (ret)
-+		return ret;
-+
-+	return 0;
-+}
-diff --git a/drivers/hid/ipts/control.h b/drivers/hid/ipts/control.h
-new file mode 100644
-index 0000000000000..26629c5144edb
---- /dev/null
-+++ b/drivers/hid/ipts/control.h
-@@ -0,0 +1,126 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_CONTROL_H
-+#define IPTS_CONTROL_H
-+
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "spec-data.h"
-+#include "spec-device.h"
-+
-+/**
-+ * ipts_control_request_flush() - Stop the data flow.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Runs the command to stop the data flow on the device.
-+ * All outstanding data needs to be acknowledged using feedback before the command will return.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_request_flush(struct ipts_context *ipts);
-+
-+/**
-+ * ipts_control_wait_flush() - Wait until data flow has been stopped.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_wait_flush(struct ipts_context *ipts);
-+
-+/**
-+ * ipts_control_wait_flush() - Notify the device that the driver can receive new data.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_request_data(struct ipts_context *ipts);
-+
-+/**
-+ * ipts_control_wait_data() - Wait until new data is available.
-+ * @ipts: The IPTS driver context.
-+ * @block: Whether to block execution until data is available.
-+ *
-+ * In poll mode, this function will never return while the data flow is active. Instead,
-+ * the poll will be incremented when new data is available.
-+ *
-+ * Returns: 0 on success, <0 on error, -EAGAIN if no data is available.
-+ */
-+int ipts_control_wait_data(struct ipts_context *ipts, bool block);
-+
-+/**
-+ * ipts_control_send_feedback() - Submits a feedback buffer to the device.
-+ * @ipts: The IPTS driver context.
-+ * @buffer: The ID of the buffer containing feedback data.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_send_feedback(struct ipts_context *ipts, u32 buffer);
-+
-+/**
-+ * ipts_control_hid2me_feedback() - Sends HID2ME feedback, a special type of feedback.
-+ * @ipts: The IPTS driver context.
-+ * @cmd: The command that will be run on the device.
-+ * @type: The type of the payload that is sent to the device.
-+ * @data: The payload of the feedback command.
-+ * @size: The size of the payload.
-+ *
-+ * HID2ME feedback is a special type of feedback, because it allows interfacing with
-+ * the HID API of the device at any moment, without requiring a buffer that has to
-+ * be acknowledged.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_hid2me_feedback(struct ipts_context *ipts, enum ipts_feedback_cmd_type cmd,
-+				 enum ipts_feedback_data_type type, void *data, size_t size);
-+
-+/**
-+ * ipts_control_refill_buffer() - Acknowledges that data in a buffer has been processed.
-+ * @ipts: The IPTS driver context.
-+ * @buffer: The buffer that has been processed and can be refilled.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+static inline int ipts_control_refill_buffer(struct ipts_context *ipts, u32 buffer)
-+{
-+	/*
-+	 * IPTS expects structured data in the feedback buffer matching the buffer that will be
-+	 * refilled. We don't know what that data looks like, so we just keep the buffer empty.
-+	 * This results in an INVALID_PARAMS error, but the buffer gets refilled without an issue.
-+	 * Sending a minimal structure with the buffer ID fixes the error, but breaks refilling
-+	 * the buffers on some devices.
-+	 */
-+
-+	return ipts_control_send_feedback(ipts, buffer);
-+}
-+
-+/**
-+ * ipts_control_start() - Initialized the device and starts the data flow.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_start(struct ipts_context *ipts);
-+
-+/**
-+ * ipts_control_stop() - Stops the data flow and resets the device.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_stop(struct ipts_context *ipts);
-+
-+/**
-+ * ipts_control_restart() - Stops the device and starts it again.
-+ * @ipts: The IPTS driver context.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_control_restart(struct ipts_context *ipts);
-+
-+#endif /* IPTS_CONTROL_H */
-diff --git a/drivers/hid/ipts/desc.h b/drivers/hid/ipts/desc.h
-new file mode 100644
-index 0000000000000..307438c7c80cd
---- /dev/null
-+++ b/drivers/hid/ipts/desc.h
-@@ -0,0 +1,80 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2022-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_DESC_H
-+#define IPTS_DESC_H
-+
-+#include <linux/types.h>
-+
-+#define IPTS_HID_REPORT_SINGLETOUCH 64
-+#define IPTS_HID_REPORT_DATA	    65
-+#define IPTS_HID_REPORT_SET_MODE    66
-+
-+#define IPTS_HID_REPORT_DATA_SIZE 7485
-+
-+/*
-+ * HID descriptor for singletouch data.
-+ * This descriptor should be present on all IPTS devices.
-+ */
-+static const u8 ipts_singletouch_descriptor[] = {
-+	0x05, 0x0D,	  /*  Usage Page (Digitizer),            */
-+	0x09, 0x04,	  /*  Usage (Touchscreen),               */
-+	0xA1, 0x01,	  /*  Collection (Application),          */
-+	0x85, 0x40,	  /*      Report ID (64),                */
-+	0x09, 0x42,	  /*      Usage (Tip Switch),            */
-+	0x15, 0x00,	  /*      Logical Minimum (0),           */
-+	0x25, 0x01,	  /*      Logical Maximum (1),           */
-+	0x75, 0x01,	  /*      Report Size (1),               */
-+	0x95, 0x01,	  /*      Report Count (1),              */
-+	0x81, 0x02,	  /*      Input (Variable),              */
-+	0x95, 0x07,	  /*      Report Count (7),              */
-+	0x81, 0x03,	  /*      Input (Constant, Variable),    */
-+	0x05, 0x01,	  /*      Usage Page (Desktop),          */
-+	0x09, 0x30,	  /*      Usage (X),                     */
-+	0x75, 0x10,	  /*      Report Size (16),              */
-+	0x95, 0x01,	  /*      Report Count (1),              */
-+	0xA4,		  /*      Push,                          */
-+	0x55, 0x0E,	  /*      Unit Exponent (14),            */
-+	0x65, 0x11,	  /*      Unit (Centimeter),             */
-+	0x46, 0x76, 0x0B, /*      Physical Maximum (2934),       */
-+	0x26, 0xFF, 0x7F, /*      Logical Maximum (32767),       */
-+	0x81, 0x02,	  /*      Input (Variable),              */
-+	0x09, 0x31,	  /*      Usage (Y),                     */
-+	0x46, 0x74, 0x06, /*      Physical Maximum (1652),       */
-+	0x26, 0xFF, 0x7F, /*      Logical Maximum (32767),       */
-+	0x81, 0x02,	  /*      Input (Variable),              */
-+	0xB4,		  /*      Pop,                           */
-+	0xC0,		  /*  End Collection                     */
-+};
-+
-+/*
-+ * Fallback HID descriptor for older devices that do not have
-+ * the ability to query their HID descriptor.
-+ */
-+static const u8 ipts_fallback_descriptor[] = {
-+	0x05, 0x0D,	  /*  Usage Page (Digitizer),            */
-+	0x09, 0x0F,	  /*  Usage (Capacitive Hm Digitizer),   */
-+	0xA1, 0x01,	  /*  Collection (Application),          */
-+	0x85, 0x41,	  /*      Report ID (65),                */
-+	0x09, 0x56,	  /*      Usage (Scan Time),             */
-+	0x95, 0x01,	  /*      Report Count (1),              */
-+	0x75, 0x10,	  /*      Report Size (16),              */
-+	0x81, 0x02,	  /*      Input (Variable),              */
-+	0x09, 0x61,	  /*      Usage (Gesture Char Quality),  */
-+	0x75, 0x08,	  /*      Report Size (8),               */
-+	0x96, 0x3D, 0x1D, /*      Report Count (7485),           */
-+	0x81, 0x03,	  /*      Input (Constant, Variable),    */
-+	0x85, 0x42,	  /*      Report ID (66),                */
-+	0x06, 0x00, 0xFF, /*      Usage Page (FF00h),            */
-+	0x09, 0xC8,	  /*      Usage (C8h),                   */
-+	0x75, 0x08,	  /*      Report Size (8),               */
-+	0x95, 0x01,	  /*      Report Count (1),              */
-+	0xB1, 0x02,	  /*      Feature (Variable),            */
-+	0xC0,		  /*  End Collection,                    */
-+};
-+
-+#endif /* IPTS_DESC_H */
-diff --git a/drivers/hid/ipts/eds1.c b/drivers/hid/ipts/eds1.c
-new file mode 100644
-index 0000000000000..ecbb3a8bdaf60
---- /dev/null
-+++ b/drivers/hid/ipts/eds1.c
-@@ -0,0 +1,103 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/err.h>
-+#include <linux/gfp.h>
-+#include <linux/hid.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "control.h"
-+#include "desc.h"
-+#include "spec-device.h"
-+
-+int ipts_eds1_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size)
-+{
-+	size_t size = 0;
-+	u8 *buffer = NULL;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!desc_buffer)
-+		return -EFAULT;
-+
-+	if (!desc_size)
-+		return -EFAULT;
-+
-+	size = sizeof(ipts_singletouch_descriptor) + sizeof(ipts_fallback_descriptor);
-+
-+	buffer = kzalloc(size, GFP_KERNEL);
-+	if (!buffer)
-+		return -ENOMEM;
-+
-+	memcpy(buffer, ipts_singletouch_descriptor, sizeof(ipts_singletouch_descriptor));
-+	memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts_fallback_descriptor,
-+	       sizeof(ipts_fallback_descriptor));
-+
-+	*desc_size = size;
-+	*desc_buffer = buffer;
-+
-+	return 0;
-+}
-+
-+static int ipts_eds1_switch_mode(struct ipts_context *ipts, enum ipts_mode mode)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (ipts->mode == mode)
-+		return 0;
-+
-+	ipts->mode = mode;
-+
-+	ret = ipts_control_restart(ipts);
-+	if (ret)
-+		dev_err(ipts->dev, "Failed to switch modes: %d\n", ret);
-+
-+	return ret;
-+}
-+
-+int ipts_eds1_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+			  enum hid_report_type report_type, enum hid_class_request request_type)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!buffer)
-+		return -EFAULT;
-+
-+	if (report_id != IPTS_HID_REPORT_SET_MODE)
-+		return -EIO;
-+
-+	if (report_type != HID_FEATURE_REPORT)
-+		return -EIO;
-+
-+	if (size != 2)
-+		return -EINVAL;
-+
-+	/*
-+	 * Implement mode switching report for older devices without native HID support.
-+	 */
-+
-+	if (request_type == HID_REQ_GET_REPORT) {
-+		memset(buffer, 0, size);
-+		buffer[0] = report_id;
-+		buffer[1] = ipts->mode;
-+	} else if (request_type == HID_REQ_SET_REPORT) {
-+		return ipts_eds1_switch_mode(ipts, buffer[1]);
-+	} else {
-+		return -EIO;
-+	}
-+
-+	return ret;
-+}
-diff --git a/drivers/hid/ipts/eds1.h b/drivers/hid/ipts/eds1.h
-new file mode 100644
-index 0000000000000..eeeb6575e3e89
---- /dev/null
-+++ b/drivers/hid/ipts/eds1.h
-@@ -0,0 +1,35 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/hid.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+
-+/**
-+ * ipts_eds1_get_descriptor() - Assembles the HID descriptor of the device.
-+ * @ipts: The IPTS driver context.
-+ * @desc_buffer: A pointer to the location where the address of the allocated buffer is stored.
-+ * @desc_size: A pointer to the location where the size of the allocated buffer is stored.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_eds1_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size);
-+
-+/**
-+ * ipts_eds1_raw_request() - Executes an output or feature report on the device.
-+ * @ipts: The IPTS driver context.
-+ * @buffer: The buffer containing the report.
-+ * @size: The size of the buffer.
-+ * @report_id: The HID report ID.
-+ * @report_type: Whether this report is an output or a feature report.
-+ * @request_type: Whether this report requests or sends data.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_eds1_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+			  enum hid_report_type report_type, enum hid_class_request request_type);
-diff --git a/drivers/hid/ipts/eds2.c b/drivers/hid/ipts/eds2.c
-new file mode 100644
-index 0000000000000..198dc65d78876
---- /dev/null
-+++ b/drivers/hid/ipts/eds2.c
-@@ -0,0 +1,144 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/completion.h>
-+#include <linux/err.h>
-+#include <linux/gfp.h>
-+#include <linux/mutex.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "control.h"
-+#include "desc.h"
-+#include "spec-data.h"
-+
-+int ipts_eds2_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size)
-+{
-+	size_t size = 0;
-+	u8 *buffer = NULL;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!desc_buffer)
-+		return -EFAULT;
-+
-+	if (!desc_size)
-+		return -EFAULT;
-+
-+	size = sizeof(ipts_singletouch_descriptor) + ipts->descriptor.size;
-+
-+	buffer = kzalloc(size, GFP_KERNEL);
-+	if (!buffer)
-+		return -ENOMEM;
-+
-+	memcpy(buffer, ipts_singletouch_descriptor, sizeof(ipts_singletouch_descriptor));
-+	memcpy(&buffer[sizeof(ipts_singletouch_descriptor)], ipts->descriptor.address,
-+	       ipts->descriptor.size);
-+
-+	*desc_size = size;
-+	*desc_buffer = buffer;
-+
-+	return 0;
-+}
-+
-+static int ipts_eds2_get_feature(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+				 enum ipts_feedback_data_type type)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!buffer)
-+		return -EFAULT;
-+
-+	mutex_lock(&ipts->feature_lock);
-+
-+	memset(buffer, 0, size);
-+	buffer[0] = report_id;
-+
-+	memset(&ipts->feature_report, 0, sizeof(ipts->feature_report));
-+	reinit_completion(&ipts->feature_event);
-+
-+	ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buffer, size);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret);
-+		goto out;
-+	}
-+
-+	ret = wait_for_completion_timeout(&ipts->feature_event, msecs_to_jiffies(5000));
-+	if (ret == 0) {
-+		dev_warn(ipts->dev, "GET_FEATURES timed out!\n");
-+		ret = -EIO;
-+		goto out;
-+	}
-+
-+	if (!ipts->feature_report.address) {
-+		ret = -EFAULT;
-+		goto out;
-+	}
-+
-+	if (ipts->feature_report.size > size) {
-+		ret = -ETOOSMALL;
-+		goto out;
-+	}
-+
-+	ret = ipts->feature_report.size;
-+	memcpy(buffer, ipts->feature_report.address, ipts->feature_report.size);
-+
-+out:
-+	mutex_unlock(&ipts->feature_lock);
-+	return ret;
-+}
-+
-+static int ipts_eds2_set_feature(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+				 enum ipts_feedback_data_type type)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!buffer)
-+		return -EFAULT;
-+
-+	buffer[0] = report_id;
-+
-+	ret = ipts_control_hid2me_feedback(ipts, IPTS_FEEDBACK_CMD_TYPE_NONE, type, buffer, size);
-+	if (ret)
-+		dev_err(ipts->dev, "Failed to send hid2me feedback: %d\n", ret);
-+
-+	return ret;
-+}
-+
-+int ipts_eds2_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+			  enum hid_report_type report_type, enum hid_class_request request_type)
-+{
-+	enum ipts_feedback_data_type feedback_type = IPTS_FEEDBACK_DATA_TYPE_VENDOR;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!buffer)
-+		return -EFAULT;
-+
-+	if (report_type == HID_OUTPUT_REPORT && request_type == HID_REQ_SET_REPORT)
-+		feedback_type = IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT;
-+	else if (report_type == HID_FEATURE_REPORT && request_type == HID_REQ_GET_REPORT)
-+		feedback_type = IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES;
-+	else if (report_type == HID_FEATURE_REPORT && request_type == HID_REQ_SET_REPORT)
-+		feedback_type = IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES;
-+	else
-+		return -EIO;
-+
-+	if (request_type == HID_REQ_GET_REPORT)
-+		return ipts_eds2_get_feature(ipts, buffer, size, report_id, feedback_type);
-+	else
-+		return ipts_eds2_set_feature(ipts, buffer, size, report_id, feedback_type);
-+}
-diff --git a/drivers/hid/ipts/eds2.h b/drivers/hid/ipts/eds2.h
-new file mode 100644
-index 0000000000000..064e3716907ab
---- /dev/null
-+++ b/drivers/hid/ipts/eds2.h
-@@ -0,0 +1,35 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/hid.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+
-+/**
-+ * ipts_eds2_get_descriptor() - Assembles the HID descriptor of the device.
-+ * @ipts: The IPTS driver context.
-+ * @desc_buffer: A pointer to the location where the address of the allocated buffer is stored.
-+ * @desc_size: A pointer to the location where the size of the allocated buffer is stored.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_eds2_get_descriptor(struct ipts_context *ipts, u8 **desc_buffer, size_t *desc_size);
-+
-+/**
-+ * ipts_eds2_raw_request() - Executes an output or feature report on the device.
-+ * @ipts: The IPTS driver context.
-+ * @buffer: The buffer containing the report.
-+ * @size: The size of the buffer.
-+ * @report_id: The HID report ID.
-+ * @report_type: Whether this report is an output or a feature report.
-+ * @request_type: Whether this report requests or sends data.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_eds2_raw_request(struct ipts_context *ipts, u8 *buffer, size_t size, u8 report_id,
-+			  enum hid_report_type report_type, enum hid_class_request request_type);
-diff --git a/drivers/hid/ipts/hid.c b/drivers/hid/ipts/hid.c
-new file mode 100644
-index 0000000000000..e34a1a4f9fa77
---- /dev/null
-+++ b/drivers/hid/ipts/hid.c
-@@ -0,0 +1,225 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2022-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/completion.h>
-+#include <linux/err.h>
-+#include <linux/gfp.h>
-+#include <linux/hid.h>
-+#include <linux/mutex.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "desc.h"
-+#include "eds1.h"
-+#include "eds2.h"
-+#include "hid.h"
-+#include "spec-data.h"
-+#include "spec-hid.h"
-+
-+void ipts_hid_enable(struct ipts_context *ipts)
-+{
-+	WRITE_ONCE(ipts->hid_active, true);
-+}
-+
-+void ipts_hid_disable(struct ipts_context *ipts)
-+{
-+	WRITE_ONCE(ipts->hid_active, false);
-+}
-+
-+static int ipts_hid_start(struct hid_device *hid)
-+{
-+	return 0;
-+}
-+
-+static void ipts_hid_stop(struct hid_device *hid)
-+{
-+}
-+
-+static int ipts_hid_parse(struct hid_device *hid)
-+{
-+	int ret = 0;
-+	struct ipts_context *ipts = NULL;
-+
-+	u8 *buffer = NULL;
-+	size_t size = 0;
-+
-+	if (!hid)
-+		return -ENODEV;
-+
-+	ipts = hid->driver_data;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!READ_ONCE(ipts->hid_active))
-+		return -ENODEV;
-+
-+	if (ipts->info.intf_eds == 1)
-+		ret = ipts_eds1_get_descriptor(ipts, &buffer, &size);
-+	else
-+		ret = ipts_eds2_get_descriptor(ipts, &buffer, &size);
-+
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to allocate HID descriptor: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = hid_parse_report(hid, buffer, size);
-+	kfree(buffer);
-+
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to parse HID descriptor: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static int ipts_hid_raw_request(struct hid_device *hid, unsigned char report_id, __u8 *buffer,
-+				size_t size, unsigned char report_type, int request_type)
-+{
-+	struct ipts_context *ipts = NULL;
-+
-+	if (!hid)
-+		return -ENODEV;
-+
-+	ipts = hid->driver_data;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!READ_ONCE(ipts->hid_active))
-+		return -ENODEV;
-+
-+	if (ipts->info.intf_eds == 1) {
-+		return ipts_eds1_raw_request(ipts, buffer, size, report_id, report_type,
-+					     request_type);
-+	} else {
-+		return ipts_eds2_raw_request(ipts, buffer, size, report_id, report_type,
-+					     request_type);
-+	}
-+}
-+
-+static struct hid_ll_driver ipts_hid_driver = {
-+	.start = ipts_hid_start,
-+	.stop = ipts_hid_stop,
-+	.open = ipts_hid_start,
-+	.close = ipts_hid_stop,
-+	.parse = ipts_hid_parse,
-+	.raw_request = ipts_hid_raw_request,
-+};
-+
-+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer)
-+{
-+	u8 *temp = NULL;
-+	struct ipts_hid_header *frame = NULL;
-+	struct ipts_data_header *header = NULL;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!ipts->hid)
-+		return -ENODEV;
-+
-+	if (!READ_ONCE(ipts->hid_active))
-+		return -ENODEV;
-+
-+	header = (struct ipts_data_header *)ipts->resources.data[buffer].address;
-+
-+	temp = ipts->resources.report.address;
-+	memset(temp, 0, ipts->resources.report.size);
-+
-+	if (!header)
-+		return -EFAULT;
-+
-+	if (header->size == 0)
-+		return 0;
-+
-+	if (header->type == IPTS_DATA_TYPE_HID)
-+		return hid_input_report(ipts->hid, HID_INPUT_REPORT, header->data, header->size, 1);
-+
-+	if (header->type == IPTS_DATA_TYPE_GET_FEATURES) {
-+		ipts->feature_report.address = header->data;
-+		ipts->feature_report.size = header->size;
-+
-+		complete_all(&ipts->feature_event);
-+		return 0;
-+	}
-+
-+	if (header->type != IPTS_DATA_TYPE_FRAME)
-+		return 0;
-+
-+	if (header->size + 3 + sizeof(struct ipts_hid_header) > IPTS_HID_REPORT_DATA_SIZE)
-+		return -ERANGE;
-+
-+	/*
-+	 * Synthesize a HID report matching the devices that natively send HID reports
-+	 */
-+	temp[0] = IPTS_HID_REPORT_DATA;
-+
-+	frame = (struct ipts_hid_header *)&temp[3];
-+	frame->type = IPTS_HID_FRAME_TYPE_RAW;
-+	frame->size = header->size + sizeof(*frame);
-+
-+	memcpy(frame->data, header->data, header->size);
-+
-+	return hid_input_report(ipts->hid, HID_INPUT_REPORT, temp, IPTS_HID_REPORT_DATA_SIZE, 1);
-+}
-+
-+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (ipts->hid)
-+		return 0;
-+
-+	ipts->hid = hid_allocate_device();
-+	if (IS_ERR(ipts->hid)) {
-+		int err = PTR_ERR(ipts->hid);
-+
-+		dev_err(ipts->dev, "Failed to allocate HID device: %d\n", err);
-+		return err;
-+	}
-+
-+	ipts->hid->driver_data = ipts;
-+	ipts->hid->dev.parent = ipts->dev;
-+	ipts->hid->ll_driver = &ipts_hid_driver;
-+
-+	ipts->hid->vendor = info.vendor;
-+	ipts->hid->product = info.product;
-+	ipts->hid->group = HID_GROUP_GENERIC;
-+
-+	snprintf(ipts->hid->name, sizeof(ipts->hid->name), "IPTS %04X:%04X", info.vendor,
-+		 info.product);
-+
-+	ret = hid_add_device(ipts->hid);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to add HID device: %d\n", ret);
-+		ipts_hid_free(ipts);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_hid_free(struct ipts_context *ipts)
-+{
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (!ipts->hid)
-+		return 0;
-+
-+	hid_destroy_device(ipts->hid);
-+	ipts->hid = NULL;
-+
-+	return 0;
-+}
-diff --git a/drivers/hid/ipts/hid.h b/drivers/hid/ipts/hid.h
-new file mode 100644
-index 0000000000000..1ebe77447903a
---- /dev/null
-+++ b/drivers/hid/ipts/hid.h
-@@ -0,0 +1,24 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2022-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_HID_H
-+#define IPTS_HID_H
-+
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "spec-device.h"
-+
-+void ipts_hid_enable(struct ipts_context *ipts);
-+void ipts_hid_disable(struct ipts_context *ipts);
-+
-+int ipts_hid_input_data(struct ipts_context *ipts, u32 buffer);
-+
-+int ipts_hid_init(struct ipts_context *ipts, struct ipts_device_info info);
-+int ipts_hid_free(struct ipts_context *ipts);
-+
-+#endif /* IPTS_HID_H */
-diff --git a/drivers/hid/ipts/main.c b/drivers/hid/ipts/main.c
-new file mode 100644
-index 0000000000000..fb5b5c13ee3ea
---- /dev/null
-+++ b/drivers/hid/ipts/main.c
-@@ -0,0 +1,126 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/completion.h>
-+#include <linux/delay.h>
-+#include <linux/device.h>
-+#include <linux/dma-mapping.h>
-+#include <linux/mei_cl_bus.h>
-+#include <linux/mod_devicetable.h>
-+#include <linux/module.h>
-+#include <linux/mutex.h>
-+#include <linux/slab.h>
-+#include <linux/stddef.h>
-+#include <linux/types.h>
-+
-+#include "context.h"
-+#include "control.h"
-+#include "mei.h"
-+#include "receiver.h"
-+#include "spec-device.h"
-+
-+/*
-+ * The MEI client ID for IPTS functionality.
-+ */
-+#define IPTS_ID UUID_LE(0x3e8d0870, 0x271a, 0x4208, 0x8e, 0xb5, 0x9a, 0xcb, 0x94, 0x02, 0xae, 0x04)
-+
-+static int ipts_set_dma_mask(struct mei_cl_device *cldev)
-+{
-+	if (!cldev)
-+		return -EFAULT;
-+
-+	if (!dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(64)))
-+		return 0;
-+
-+	return dma_coerce_mask_and_coherent(&cldev->dev, DMA_BIT_MASK(32));
-+}
-+
-+static int ipts_probe(struct mei_cl_device *cldev, const struct mei_cl_device_id *id)
-+{
-+	int ret = 0;
-+	struct ipts_context *ipts = NULL;
-+
-+	if (!cldev)
-+		return -EFAULT;
-+
-+	ret = ipts_set_dma_mask(cldev);
-+	if (ret) {
-+		dev_err(&cldev->dev, "Failed to set DMA mask for IPTS: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = mei_cldev_enable(cldev);
-+	if (ret) {
-+		dev_err(&cldev->dev, "Failed to enable MEI device: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ipts = devm_kzalloc(&cldev->dev, sizeof(*ipts), GFP_KERNEL);
-+	if (!ipts) {
-+		mei_cldev_disable(cldev);
-+		return -ENOMEM;
-+	}
-+
-+	ret = ipts_mei_init(&ipts->mei, cldev);
-+	if (ret) {
-+		dev_err(&cldev->dev, "Failed to init MEI bus logic: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ipts->dev = &cldev->dev;
-+	ipts->mode = IPTS_MODE_EVENT;
-+
-+	mutex_init(&ipts->feature_lock);
-+	init_completion(&ipts->feature_event);
-+
-+	mei_cldev_set_drvdata(cldev, ipts);
-+
-+	ret = ipts_control_start(ipts);
-+	if (ret) {
-+		dev_err(&cldev->dev, "Failed to start IPTS: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void ipts_remove(struct mei_cl_device *cldev)
-+{
-+	int ret = 0;
-+	struct ipts_context *ipts = NULL;
-+
-+	if (!cldev) {
-+		pr_err("MEI device is NULL!");
-+		return;
-+	}
-+
-+	ipts = mei_cldev_get_drvdata(cldev);
-+
-+	ret = ipts_control_stop(ipts);
-+	if (ret)
-+		dev_err(&cldev->dev, "Failed to stop IPTS: %d\n", ret);
-+
-+	mei_cldev_disable(cldev);
-+}
-+
-+static struct mei_cl_device_id ipts_device_id_table[] = {
-+	{ .uuid = IPTS_ID, .version = MEI_CL_VERSION_ANY },
-+	{},
-+};
-+MODULE_DEVICE_TABLE(mei, ipts_device_id_table);
-+
-+static struct mei_cl_driver ipts_driver = {
-+	.id_table = ipts_device_id_table,
-+	.name = "ipts",
-+	.probe = ipts_probe,
-+	.remove = ipts_remove,
-+};
-+module_mei_cl_driver(ipts_driver);
-+
-+MODULE_DESCRIPTION("IPTS touchscreen driver");
-+MODULE_AUTHOR("Dorian Stoll <dorian.stoll@tmsp.io>");
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/hid/ipts/mei.c b/drivers/hid/ipts/mei.c
-new file mode 100644
-index 0000000000000..1e0395ceae4a4
---- /dev/null
-+++ b/drivers/hid/ipts/mei.c
-@@ -0,0 +1,188 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/device.h>
-+#include <linux/errno.h>
-+#include <linux/jiffies.h>
-+#include <linux/list.h>
-+#include <linux/mei_cl_bus.h>
-+#include <linux/printk.h>
-+#include <linux/rwsem.h>
-+#include <linux/types.h>
-+#include <linux/wait.h>
-+
-+#include "context.h"
-+#include "mei.h"
-+
-+static void locked_list_add(struct list_head *new, struct list_head *head,
-+			    struct rw_semaphore *lock)
-+{
-+	down_write(lock);
-+	list_add(new, head);
-+	up_write(lock);
-+}
-+
-+static void locked_list_del(struct list_head *entry, struct rw_semaphore *lock)
-+{
-+	down_write(lock);
-+	list_del(entry);
-+	up_write(lock);
-+}
-+
-+static void ipts_mei_incoming(struct mei_cl_device *cldev)
-+{
-+	ssize_t ret = 0;
-+	struct ipts_mei_message *entry = NULL;
-+	struct ipts_context *ipts = NULL;
-+
-+	if (!cldev) {
-+		pr_err("MEI device is NULL!");
-+		return;
-+	}
-+
-+	ipts = mei_cldev_get_drvdata(cldev);
-+	if (!ipts) {
-+		pr_err("IPTS driver context is NULL!");
-+		return;
-+	}
-+
-+	entry = devm_kzalloc(ipts->dev, sizeof(*entry), GFP_KERNEL);
-+	if (!entry)
-+		return;
-+
-+	INIT_LIST_HEAD(&entry->list);
-+
-+	do {
-+		ret = mei_cldev_recv(cldev, (u8 *)&entry->rsp, sizeof(entry->rsp));
-+	} while (ret == -EINTR);
-+
-+	if (ret < 0) {
-+		dev_err(ipts->dev, "Error while reading response: %ld\n", ret);
-+		return;
-+	}
-+
-+	if (ret == 0) {
-+		dev_err(ipts->dev, "Received empty response\n");
-+		return;
-+	}
-+
-+	locked_list_add(&entry->list, &ipts->mei.messages, &ipts->mei.message_lock);
-+	wake_up_all(&ipts->mei.message_queue);
-+}
-+
-+static int ipts_mei_search(struct ipts_mei *mei, enum ipts_command_code code,
-+			   struct ipts_response *rsp)
-+{
-+	struct ipts_mei_message *entry = NULL;
-+
-+	if (!mei)
-+		return -EFAULT;
-+
-+	if (!rsp)
-+		return -EFAULT;
-+
-+	down_read(&mei->message_lock);
-+
-+	/*
-+	 * Iterate over the list of received messages, and check if there is one
-+	 * matching the requested command code.
-+	 */
-+	list_for_each_entry(entry, &mei->messages, list) {
-+		if (entry->rsp.cmd == code)
-+			break;
-+	}
-+
-+	up_read(&mei->message_lock);
-+
-+	/*
-+	 * If entry is not the list head, this means that the loop above has been stopped early,
-+	 * and that we found a matching element. We drop the message from the list and return it.
-+	 */
-+	if (!list_entry_is_head(entry, &mei->messages, list)) {
-+		locked_list_del(&entry->list, &mei->message_lock);
-+
-+		*rsp = entry->rsp;
-+		devm_kfree(&mei->cldev->dev, entry);
-+
-+		return 0;
-+	}
-+
-+	return -EAGAIN;
-+}
-+
-+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp,
-+		  u64 timeout)
-+{
-+	int ret = 0;
-+
-+	if (!mei)
-+		return -EFAULT;
-+
-+	/*
-+	 * A timeout of 0 means check and return immideately.
-+	 */
-+	if (timeout == 0)
-+		return ipts_mei_search(mei, code, rsp);
-+
-+	/*
-+	 * A timeout of less than 0 means to wait forever.
-+	 */
-+	if (timeout < 0) {
-+		wait_event(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0);
-+		return 0;
-+	}
-+
-+	ret = wait_event_timeout(mei->message_queue, ipts_mei_search(mei, code, rsp) == 0,
-+				 msecs_to_jiffies(timeout));
-+
-+	if (ret > 0)
-+		return 0;
-+
-+	return -EAGAIN;
-+}
-+
-+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length)
-+{
-+	int ret = 0;
-+
-+	if (!mei)
-+		return -EFAULT;
-+
-+	if (!mei->cldev)
-+		return -EFAULT;
-+
-+	if (!data)
-+		return -EFAULT;
-+
-+	do {
-+		ret = mei_cldev_send(mei->cldev, (u8 *)data, length);
-+	} while (ret == -EINTR);
-+
-+	if (ret < 0)
-+		return ret;
-+
-+	return 0;
-+}
-+
-+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev)
-+{
-+	if (!mei)
-+		return -EFAULT;
-+
-+	if (!cldev)
-+		return -EFAULT;
-+
-+	mei->cldev = cldev;
-+
-+	INIT_LIST_HEAD(&mei->messages);
-+	init_waitqueue_head(&mei->message_queue);
-+	init_rwsem(&mei->message_lock);
-+
-+	mei_cldev_register_rx_cb(cldev, ipts_mei_incoming);
-+
-+	return 0;
-+}
-diff --git a/drivers/hid/ipts/mei.h b/drivers/hid/ipts/mei.h
-new file mode 100644
-index 0000000000000..973bade6b0fdd
---- /dev/null
-+++ b/drivers/hid/ipts/mei.h
-@@ -0,0 +1,66 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_MEI_H
-+#define IPTS_MEI_H
-+
-+#include <linux/list.h>
-+#include <linux/mei_cl_bus.h>
-+#include <linux/rwsem.h>
-+#include <linux/types.h>
-+#include <linux/wait.h>
-+
-+#include "spec-device.h"
-+
-+struct ipts_mei_message {
-+	struct list_head list;
-+	struct ipts_response rsp;
-+};
-+
-+struct ipts_mei {
-+	struct mei_cl_device *cldev;
-+
-+	struct list_head messages;
-+
-+	wait_queue_head_t message_queue;
-+	struct rw_semaphore message_lock;
-+};
-+
-+/**
-+ * ipts_mei_recv() - Receive data from a MEI device.
-+ * @mei: The IPTS MEI device context.
-+ * @code: The IPTS command code to look for.
-+ * @rsp: The address that the received data will be copied to.
-+ * @timeout: How many milliseconds the function will wait at most.
-+ *
-+ * A negative timeout means to wait forever.
-+ *
-+ * Returns: 0 on success, <0 on error, -EAGAIN if no response has been received.
-+ */
-+int ipts_mei_recv(struct ipts_mei *mei, enum ipts_command_code code, struct ipts_response *rsp,
-+		  u64 timeout);
-+
-+/**
-+ * ipts_mei_send() - Send data to a MEI device.
-+ * @ipts: The IPTS MEI device context.
-+ * @data: The data to send.
-+ * @size: The size of the data.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_mei_send(struct ipts_mei *mei, void *data, size_t length);
-+
-+/**
-+ * ipts_mei_init() - Initialize the MEI device context.
-+ * @mei: The MEI device context to initialize.
-+ * @cldev: The MEI device the context will be bound to.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_mei_init(struct ipts_mei *mei, struct mei_cl_device *cldev);
-+
-+#endif /* IPTS_MEI_H */
-diff --git a/drivers/hid/ipts/receiver.c b/drivers/hid/ipts/receiver.c
-new file mode 100644
-index 0000000000000..ef66c3c9db807
---- /dev/null
-+++ b/drivers/hid/ipts/receiver.c
-@@ -0,0 +1,250 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/delay.h>
-+#include <linux/err.h>
-+#include <linux/kthread.h>
-+#include <linux/time64.h>
-+#include <linux/timekeeping.h>
-+#include <linux/types.h>
-+
-+#include "cmd.h"
-+#include "context.h"
-+#include "control.h"
-+#include "hid.h"
-+#include "resources.h"
-+#include "spec-device.h"
-+#include "thread.h"
-+
-+static void ipts_receiver_next_doorbell(struct ipts_context *ipts)
-+{
-+	u32 *doorbell = (u32 *)ipts->resources.doorbell.address;
-+	*doorbell = *doorbell + 1;
-+}
-+
-+static u32 ipts_receiver_current_doorbell(struct ipts_context *ipts)
-+{
-+	u32 *doorbell = (u32 *)ipts->resources.doorbell.address;
-+	return *doorbell;
-+}
-+
-+static void ipts_receiver_backoff(time64_t last, u32 n)
-+{
-+	/*
-+	 * If the last change was less than n seconds ago,
-+	 * sleep for a shorter period so that new data can be
-+	 * processed quickly. If there was no change for more than
-+	 * n seconds, sleep longer to avoid wasting CPU cycles.
-+	 */
-+	if (last + n > ktime_get_seconds())
-+		usleep_range(1 * USEC_PER_MSEC, 5 * USEC_PER_MSEC);
-+	else
-+		msleep(200);
-+}
-+
-+static int ipts_receiver_event_loop(struct ipts_thread *thread)
-+{
-+	int ret = 0;
-+	u32 buffer = 0;
-+
-+	struct ipts_context *ipts = NULL;
-+	time64_t last = ktime_get_seconds();
-+
-+	if (!thread)
-+		return -EFAULT;
-+
-+	ipts = thread->data;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	dev_info(ipts->dev, "IPTS running in event mode\n");
-+
-+	while (!ipts_thread_should_stop(thread)) {
-+		int i = 0;
-+
-+		for (i = 0; i < IPTS_BUFFERS; i++) {
-+			ret = ipts_control_wait_data(ipts, false);
-+			if (ret == -EAGAIN)
-+				break;
-+
-+			if (ret) {
-+				dev_err(ipts->dev, "Failed to wait for data: %d\n", ret);
-+				continue;
-+			}
-+
-+			buffer = ipts_receiver_current_doorbell(ipts) % IPTS_BUFFERS;
-+			ipts_receiver_next_doorbell(ipts);
-+
-+			ret = ipts_hid_input_data(ipts, buffer);
-+			if (ret)
-+				dev_err(ipts->dev, "Failed to process buffer: %d\n", ret);
-+
-+			ret = ipts_control_refill_buffer(ipts, buffer);
-+			if (ret)
-+				dev_err(ipts->dev, "Failed to send feedback: %d\n", ret);
-+
-+			ret = ipts_control_request_data(ipts);
-+			if (ret)
-+				dev_err(ipts->dev, "Failed to request data: %d\n", ret);
-+
-+			last = ktime_get_seconds();
-+		}
-+
-+		ipts_receiver_backoff(last, 5);
-+	}
-+
-+	ret = ipts_control_request_flush(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to request flush: %d\n", ret);
-+		return ret;
-+	}
-+
-+	ret = ipts_control_wait_data(ipts, true);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to wait for data: %d\n", ret);
-+
-+		if (ret != -EAGAIN)
-+			return ret;
-+		else
-+			return 0;
-+	}
-+
-+	ret = ipts_control_wait_flush(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret);
-+
-+		if (ret != -EAGAIN)
-+			return ret;
-+		else
-+			return 0;
-+	}
-+
-+	return 0;
-+}
-+
-+static int ipts_receiver_poll_loop(struct ipts_thread *thread)
-+{
-+	int ret = 0;
-+	u32 buffer = 0;
-+
-+	u32 doorbell = 0;
-+	u32 lastdb = 0;
-+
-+	struct ipts_context *ipts = NULL;
-+	time64_t last = ktime_get_seconds();
-+
-+	if (!thread)
-+		return -EFAULT;
-+
-+	ipts = thread->data;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	dev_info(ipts->dev, "IPTS running in poll mode\n");
-+
-+	while (true) {
-+		if (ipts_thread_should_stop(thread)) {
-+			ret = ipts_control_request_flush(ipts);
-+			if (ret) {
-+				dev_err(ipts->dev, "Failed to request flush: %d\n", ret);
-+				return ret;
-+			}
-+		}
-+
-+		doorbell = ipts_receiver_current_doorbell(ipts);
-+
-+		/*
-+		 * After filling up one of the data buffers, IPTS will increment
-+		 * the doorbell. The value of the doorbell stands for the *next*
-+		 * buffer that IPTS is going to fill.
-+		 */
-+		while (lastdb != doorbell) {
-+			buffer = lastdb % IPTS_BUFFERS;
-+
-+			ret = ipts_hid_input_data(ipts, buffer);
-+			if (ret)
-+				dev_err(ipts->dev, "Failed to process buffer: %d\n", ret);
-+
-+			ret = ipts_control_refill_buffer(ipts, buffer);
-+			if (ret)
-+				dev_err(ipts->dev, "Failed to send feedback: %d\n", ret);
-+
-+			last = ktime_get_seconds();
-+			lastdb++;
-+		}
-+
-+		if (ipts_thread_should_stop(thread))
-+			break;
-+
-+		ipts_receiver_backoff(last, 5);
-+	}
-+
-+	ret = ipts_control_wait_data(ipts, true);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to wait for data: %d\n", ret);
-+
-+		if (ret != -EAGAIN)
-+			return ret;
-+		else
-+			return 0;
-+	}
-+
-+	ret = ipts_control_wait_flush(ipts);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to wait for flush: %d\n", ret);
-+
-+		if (ret != -EAGAIN)
-+			return ret;
-+		else
-+			return 0;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_receiver_start(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	if (ipts->mode == IPTS_MODE_EVENT) {
-+		ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_event_loop, ipts,
-+					"ipts_event");
-+	} else if (ipts->mode == IPTS_MODE_POLL) {
-+		ret = ipts_thread_start(&ipts->receiver_loop, ipts_receiver_poll_loop, ipts,
-+					"ipts_poll");
-+	} else {
-+		ret = -EINVAL;
-+	}
-+
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to start receiver loop: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+int ipts_receiver_stop(struct ipts_context *ipts)
-+{
-+	int ret = 0;
-+
-+	if (!ipts)
-+		return -EFAULT;
-+
-+	ret = ipts_thread_stop(&ipts->receiver_loop);
-+	if (ret) {
-+		dev_err(ipts->dev, "Failed to stop receiver loop: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-diff --git a/drivers/hid/ipts/receiver.h b/drivers/hid/ipts/receiver.h
-new file mode 100644
-index 0000000000000..3de7da62d40c1
---- /dev/null
-+++ b/drivers/hid/ipts/receiver.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_RECEIVER_H
-+#define IPTS_RECEIVER_H
-+
-+#include "context.h"
-+
-+int ipts_receiver_start(struct ipts_context *ipts);
-+int ipts_receiver_stop(struct ipts_context *ipts);
-+
-+#endif /* IPTS_RECEIVER_H */
-diff --git a/drivers/hid/ipts/resources.c b/drivers/hid/ipts/resources.c
-new file mode 100644
-index 0000000000000..cc14653b2a9f5
---- /dev/null
-+++ b/drivers/hid/ipts/resources.c
-@@ -0,0 +1,131 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/dma-mapping.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+
-+#include "desc.h"
-+#include "resources.h"
-+#include "spec-device.h"
-+
-+static int ipts_resources_alloc_buffer(struct ipts_buffer *buffer, struct device *dev, size_t size)
-+{
-+	if (!buffer)
-+		return -EFAULT;
-+
-+	if (buffer->address)
-+		return 0;
-+
-+	buffer->address = dma_alloc_coherent(dev, size, &buffer->dma_address, GFP_KERNEL);
-+
-+	if (!buffer->address)
-+		return -ENOMEM;
-+
-+	buffer->size = size;
-+	buffer->device = dev;
-+
-+	return 0;
-+}
-+
-+static void ipts_resources_free_buffer(struct ipts_buffer *buffer)
-+{
-+	if (!buffer->address)
-+		return;
-+
-+	dma_free_coherent(buffer->device, buffer->size, buffer->address, buffer->dma_address);
-+
-+	buffer->address = NULL;
-+	buffer->size = 0;
-+
-+	buffer->dma_address = 0;
-+	buffer->device = NULL;
-+}
-+
-+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Some compilers (AOSP clang) complain about a redefined
-+	 * variable when this is declared inside of the for loop.
-+	 */
-+	int i = 0;
-+
-+	if (!res)
-+		return -EFAULT;
-+
-+	for (i = 0; i < IPTS_BUFFERS; i++) {
-+		ret = ipts_resources_alloc_buffer(&res->data[i], dev, ds);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	for (i = 0; i < IPTS_BUFFERS; i++) {
-+		ret = ipts_resources_alloc_buffer(&res->feedback[i], dev, fs);
-+		if (ret)
-+			goto err;
-+	}
-+
-+	ret = ipts_resources_alloc_buffer(&res->doorbell, dev, sizeof(u32));
-+	if (ret)
-+		goto err;
-+
-+	ret = ipts_resources_alloc_buffer(&res->workqueue, dev, sizeof(u32));
-+	if (ret)
-+		goto err;
-+
-+	ret = ipts_resources_alloc_buffer(&res->hid2me, dev, fs);
-+	if (ret)
-+		goto err;
-+
-+	ret = ipts_resources_alloc_buffer(&res->descriptor, dev, ds + 8);
-+	if (ret)
-+		goto err;
-+
-+	if (!res->report.address) {
-+		res->report.size = IPTS_HID_REPORT_DATA_SIZE;
-+		res->report.address = kzalloc(res->report.size, GFP_KERNEL);
-+
-+		if (!res->report.address) {
-+			ret = -ENOMEM;
-+			goto err;
-+		}
-+	}
-+
-+	return 0;
-+
-+err:
-+
-+	ipts_resources_free(res);
-+	return ret;
-+}
-+
-+int ipts_resources_free(struct ipts_resources *res)
-+{
-+	int i = 0;
-+
-+	if (!res)
-+		return -EFAULT;
-+
-+	for (i = 0; i < IPTS_BUFFERS; i++)
-+		ipts_resources_free_buffer(&res->data[i]);
-+
-+	for (i = 0; i < IPTS_BUFFERS; i++)
-+		ipts_resources_free_buffer(&res->feedback[i]);
-+
-+	ipts_resources_free_buffer(&res->doorbell);
-+	ipts_resources_free_buffer(&res->workqueue);
-+	ipts_resources_free_buffer(&res->hid2me);
-+	ipts_resources_free_buffer(&res->descriptor);
-+
-+	kfree(res->report.address);
-+	res->report.address = NULL;
-+	res->report.size = 0;
-+
-+	return 0;
-+}
-diff --git a/drivers/hid/ipts/resources.h b/drivers/hid/ipts/resources.h
-new file mode 100644
-index 0000000000000..2068e13285f0e
---- /dev/null
-+++ b/drivers/hid/ipts/resources.h
-@@ -0,0 +1,41 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_RESOURCES_H
-+#define IPTS_RESOURCES_H
-+
-+#include <linux/device.h>
-+#include <linux/types.h>
-+
-+#include "spec-device.h"
-+
-+struct ipts_buffer {
-+	u8 *address;
-+	size_t size;
-+
-+	dma_addr_t dma_address;
-+	struct device *device;
-+};
-+
-+struct ipts_resources {
-+	struct ipts_buffer data[IPTS_BUFFERS];
-+	struct ipts_buffer feedback[IPTS_BUFFERS];
-+
-+	struct ipts_buffer doorbell;
-+	struct ipts_buffer workqueue;
-+	struct ipts_buffer hid2me;
-+
-+	struct ipts_buffer descriptor;
-+
-+	// Buffer for synthesizing HID reports
-+	struct ipts_buffer report;
-+};
-+
-+int ipts_resources_init(struct ipts_resources *res, struct device *dev, size_t ds, size_t fs);
-+int ipts_resources_free(struct ipts_resources *res);
-+
-+#endif /* IPTS_RESOURCES_H */
-diff --git a/drivers/hid/ipts/spec-data.h b/drivers/hid/ipts/spec-data.h
-new file mode 100644
-index 0000000000000..e8dd98895a7ee
---- /dev/null
-+++ b/drivers/hid/ipts/spec-data.h
-@@ -0,0 +1,100 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2016 Intel Corporation
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_SPEC_DATA_H
-+#define IPTS_SPEC_DATA_H
-+
-+#include <linux/build_bug.h>
-+#include <linux/types.h>
-+
-+/**
-+ * enum ipts_feedback_cmd_type - Commands that can be executed on the sensor through feedback.
-+ */
-+enum ipts_feedback_cmd_type {
-+	IPTS_FEEDBACK_CMD_TYPE_NONE = 0,
-+	IPTS_FEEDBACK_CMD_TYPE_SOFT_RESET = 1,
-+	IPTS_FEEDBACK_CMD_TYPE_GOTO_ARMED = 2,
-+	IPTS_FEEDBACK_CMD_TYPE_GOTO_SENSING = 3,
-+	IPTS_FEEDBACK_CMD_TYPE_GOTO_SLEEP = 4,
-+	IPTS_FEEDBACK_CMD_TYPE_GOTO_DOZE = 5,
-+	IPTS_FEEDBACK_CMD_TYPE_HARD_RESET = 6,
-+};
-+
-+/**
-+ * enum ipts_feedback_data_type - Defines what data a feedback buffer contains.
-+ * @IPTS_FEEDBACK_DATA_TYPE_VENDOR:        The buffer contains vendor specific feedback.
-+ * @IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES:  The buffer contains a HID set features report.
-+ * @IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES:  The buffer contains a HID get features report.
-+ * @IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT: The buffer contains a HID output report.
-+ * @IPTS_FEEDBACK_DATA_TYPE_STORE_DATA:    The buffer contains calibration data for the sensor.
-+ */
-+enum ipts_feedback_data_type {
-+	IPTS_FEEDBACK_DATA_TYPE_VENDOR = 0,
-+	IPTS_FEEDBACK_DATA_TYPE_SET_FEATURES = 1,
-+	IPTS_FEEDBACK_DATA_TYPE_GET_FEATURES = 2,
-+	IPTS_FEEDBACK_DATA_TYPE_OUTPUT_REPORT = 3,
-+	IPTS_FEEDBACK_DATA_TYPE_STORE_DATA = 4,
-+};
-+
-+/**
-+ * struct ipts_feedback_header - Header that is prefixed to the data in a feedback buffer.
-+ * @cmd_type:   A command that should be executed on the sensor.
-+ * @size:       The size of the payload to be written.
-+ * @buffer:     The ID of the buffer that contains this feedback data.
-+ * @protocol:   The protocol version of the EDS.
-+ * @data_type:  The type of data that the buffer contains.
-+ * @spi_offset: The offset at which to write the payload data to the sensor.
-+ * @payload:    Payload for the feedback command, or 0 if no payload is sent.
-+ */
-+struct ipts_feedback_header {
-+	enum ipts_feedback_cmd_type cmd_type;
-+	u32 size;
-+	u32 buffer;
-+	u32 protocol;
-+	enum ipts_feedback_data_type data_type;
-+	u32 spi_offset;
-+	u8 reserved[40];
-+	u8 payload[];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_feedback_header) == 64);
-+
-+/**
-+ * enum ipts_data_type - Defines what type of data a buffer contains.
-+ * @IPTS_DATA_TYPE_FRAME:        Raw data frame.
-+ * @IPTS_DATA_TYPE_ERROR:        Error data.
-+ * @IPTS_DATA_TYPE_VENDOR:       Vendor specific data.
-+ * @IPTS_DATA_TYPE_HID:          A HID report.
-+ * @IPTS_DATA_TYPE_GET_FEATURES: The response to a GET_FEATURES HID2ME command.
-+ */
-+enum ipts_data_type {
-+	IPTS_DATA_TYPE_FRAME = 0x00,
-+	IPTS_DATA_TYPE_ERROR = 0x01,
-+	IPTS_DATA_TYPE_VENDOR = 0x02,
-+	IPTS_DATA_TYPE_HID = 0x03,
-+	IPTS_DATA_TYPE_GET_FEATURES = 0x04,
-+	IPTS_DATA_TYPE_DESCRIPTOR = 0x05,
-+};
-+
-+/**
-+ * struct ipts_data_header - Header that is prefixed to the data in a data buffer.
-+ * @type: What data the buffer contains.
-+ * @size: How much data the buffer contains.
-+ * @buffer: Which buffer the data is in.
-+ */
-+struct ipts_data_header {
-+	enum ipts_data_type type;
-+	u32 size;
-+	u32 buffer;
-+	u8 reserved[52];
-+	u8 data[];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_data_header) == 64);
-+
-+#endif /* IPTS_SPEC_DATA_H */
-diff --git a/drivers/hid/ipts/spec-device.h b/drivers/hid/ipts/spec-device.h
-new file mode 100644
-index 0000000000000..41845f9d90257
---- /dev/null
-+++ b/drivers/hid/ipts/spec-device.h
-@@ -0,0 +1,290 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2016 Intel Corporation
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_SPEC_DEVICE_H
-+#define IPTS_SPEC_DEVICE_H
-+
-+#include <linux/build_bug.h>
-+#include <linux/types.h>
-+
-+/*
-+ * The amount of buffers that IPTS can use for data transfer.
-+ */
-+#define IPTS_BUFFERS 16
-+
-+/*
-+ * The buffer ID that is used for HID2ME feedback
-+ */
-+#define IPTS_HID2ME_BUFFER IPTS_BUFFERS
-+
-+/**
-+ * enum ipts_command - Commands that can be sent to the IPTS hardware.
-+ * @IPTS_CMD_GET_DEVICE_INFO:  Retrieves vendor information from the device.
-+ * @IPTS_CMD_SET_MODE:         Changes the mode that the device will operate in.
-+ * @IPTS_CMD_SET_MEM_WINDOW:   Configures memory buffers for passing data between device and driver.
-+ * @IPTS_CMD_QUIESCE_IO:       Stops the data flow from the device to the driver.
-+ * @IPTS_CMD_READY_FOR_DATA:   Informs the device that the driver is ready to receive data.
-+ * @IPTS_CMD_FEEDBACK:         Informs the device that a buffer was processed and can be refilled.
-+ * @IPTS_CMD_CLEAR_MEM_WINDOW: Stops the data flow and clears the buffer addresses on the device.
-+ * @IPTS_CMD_RESET_SENSOR:     Resets the sensor to its default state.
-+ * @IPTS_CMD_GET_DESCRIPTOR:   Retrieves the HID descriptor of the device.
-+ */
-+enum ipts_command_code {
-+	IPTS_CMD_GET_DEVICE_INFO = 0x01,
-+	IPTS_CMD_SET_MODE = 0x02,
-+	IPTS_CMD_SET_MEM_WINDOW = 0x03,
-+	IPTS_CMD_QUIESCE_IO = 0x04,
-+	IPTS_CMD_READY_FOR_DATA = 0x05,
-+	IPTS_CMD_FEEDBACK = 0x06,
-+	IPTS_CMD_CLEAR_MEM_WINDOW = 0x07,
-+	IPTS_CMD_RESET_SENSOR = 0x0B,
-+	IPTS_CMD_GET_DESCRIPTOR = 0x0F,
-+};
-+
-+/**
-+ * enum ipts_status - Possible status codes returned by the IPTS device.
-+ * @IPTS_STATUS_SUCCESS:                 Operation completed successfully.
-+ * @IPTS_STATUS_INVALID_PARAMS:          Command contained an invalid payload.
-+ * @IPTS_STATUS_ACCESS_DENIED:           ME could not validate a buffer address.
-+ * @IPTS_STATUS_CMD_SIZE_ERROR:          Command contains an invalid payload.
-+ * @IPTS_STATUS_NOT_READY:               Buffer addresses have not been set.
-+ * @IPTS_STATUS_REQUEST_OUTSTANDING:     There is an outstanding command of the same type.
-+ * @IPTS_STATUS_NO_SENSOR_FOUND:         No sensor could be found.
-+ * @IPTS_STATUS_OUT_OF_MEMORY:           Not enough free memory for requested operation.
-+ * @IPTS_STATUS_INTERNAL_ERROR:          An unexpected error occurred.
-+ * @IPTS_STATUS_SENSOR_DISABLED:         The sensor has been disabled and must be reinitialized.
-+ * @IPTS_STATUS_COMPAT_CHECK_FAIL:       Compatibility revision check between sensor and ME failed.
-+ *                                       The host can ignore this error and attempt to continue.
-+ * @IPTS_STATUS_SENSOR_EXPECTED_RESET:   The sensor went through a reset initiated by the driver.
-+ * @IPTS_STATUS_SENSOR_UNEXPECTED_RESET: The sensor went through an unexpected reset.
-+ * @IPTS_STATUS_RESET_FAILED:            Requested sensor reset failed to complete.
-+ * @IPTS_STATUS_TIMEOUT:                 The operation timed out.
-+ * @IPTS_STATUS_TEST_MODE_FAIL:          Test mode pattern did not match expected values.
-+ * @IPTS_STATUS_SENSOR_FAIL_FATAL:       The sensor reported an error during reset sequence.
-+ *                                       Further progress is not possible.
-+ * @IPTS_STATUS_SENSOR_FAIL_NONFATAL:    The sensor reported an error during reset sequence.
-+ *                                       The driver can attempt to continue.
-+ * @IPTS_STATUS_INVALID_DEVICE_CAPS:     The device reported invalid capabilities.
-+ * @IPTS_STATUS_QUIESCE_IO_IN_PROGRESS:  Command cannot be completed until Quiesce IO is done.
-+ */
-+enum ipts_status {
-+	IPTS_STATUS_SUCCESS = 0x00,
-+	IPTS_STATUS_INVALID_PARAMS = 0x01,
-+	IPTS_STATUS_ACCESS_DENIED = 0x02,
-+	IPTS_STATUS_CMD_SIZE_ERROR = 0x03,
-+	IPTS_STATUS_NOT_READY = 0x04,
-+	IPTS_STATUS_REQUEST_OUTSTANDING = 0x05,
-+	IPTS_STATUS_NO_SENSOR_FOUND = 0x06,
-+	IPTS_STATUS_OUT_OF_MEMORY = 0x07,
-+	IPTS_STATUS_INTERNAL_ERROR = 0x08,
-+	IPTS_STATUS_SENSOR_DISABLED = 0x09,
-+	IPTS_STATUS_COMPAT_CHECK_FAIL = 0x0A,
-+	IPTS_STATUS_SENSOR_EXPECTED_RESET = 0x0B,
-+	IPTS_STATUS_SENSOR_UNEXPECTED_RESET = 0x0C,
-+	IPTS_STATUS_RESET_FAILED = 0x0D,
-+	IPTS_STATUS_TIMEOUT = 0x0E,
-+	IPTS_STATUS_TEST_MODE_FAIL = 0x0F,
-+	IPTS_STATUS_SENSOR_FAIL_FATAL = 0x10,
-+	IPTS_STATUS_SENSOR_FAIL_NONFATAL = 0x11,
-+	IPTS_STATUS_INVALID_DEVICE_CAPS = 0x12,
-+	IPTS_STATUS_QUIESCE_IO_IN_PROGRESS = 0x13,
-+};
-+
-+/**
-+ * struct ipts_command - Message that is sent to the device for calling a command.
-+ * @cmd:     The command that will be called.
-+ * @payload: Payload containing parameters for the called command.
-+ */
-+struct ipts_command {
-+	enum ipts_command_code cmd;
-+	u8 payload[320];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_command) == 324);
-+
-+/**
-+ * enum ipts_mode - Configures what data the device produces and how its sent.
-+ * @IPTS_MODE_EVENT: The device will send an event once a buffer was filled.
-+ *                   Older devices will return singletouch data in this mode.
-+ * @IPTS_MODE_POLL:  The device will notify the driver by incrementing the doorbell value.
-+ *                   Older devices will return multitouch data in this mode.
-+ */
-+enum ipts_mode {
-+	IPTS_MODE_EVENT = 0x00,
-+	IPTS_MODE_POLL = 0x01,
-+};
-+
-+/**
-+ * struct ipts_set_mode - Payload for the SET_MODE command.
-+ * @mode: Changes the mode that IPTS will operate in.
-+ */
-+struct ipts_set_mode {
-+	enum ipts_mode mode;
-+	u8 reserved[12];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_set_mode) == 16);
-+
-+#define IPTS_WORKQUEUE_SIZE	 8192
-+#define IPTS_WORKQUEUE_ITEM_SIZE 16
-+
-+/**
-+ * struct ipts_mem_window - Payload for the SET_MEM_WINDOW command.
-+ * @data_addr_lower:      Lower 32 bits of the data buffer addresses.
-+ * @data_addr_upper:      Upper 32 bits of the data buffer addresses.
-+ * @workqueue_addr_lower: Lower 32 bits of the workqueue buffer address.
-+ * @workqueue_addr_upper: Upper 32 bits of the workqueue buffer address.
-+ * @doorbell_addr_lower:  Lower 32 bits of the doorbell buffer address.
-+ * @doorbell_addr_upper:  Upper 32 bits of the doorbell buffer address.
-+ * @feedbackaddr_lower:   Lower 32 bits of the feedback buffer addresses.
-+ * @feedbackaddr_upper:   Upper 32 bits of the feedback buffer addresses.
-+ * @hid2me_addr_lower:    Lower 32 bits of the hid2me buffer address.
-+ * @hid2me_addr_upper:    Upper 32 bits of the hid2me buffer address.
-+ * @hid2me_size:          Size of the hid2me feedback buffer.
-+ * @workqueue_item_size:  Magic value. Must be 16.
-+ * @workqueue_size:       Magic value. Must be 8192.
-+ *
-+ * The workqueue related items in this struct are required for using
-+ * GuC submission with binary processing firmware. Since this driver does
-+ * not use GuC submission and instead exports raw data to userspace, these
-+ * items are not actually used, but they need to be allocated and passed
-+ * to the device, otherwise initialization will fail.
-+ */
-+struct ipts_mem_window {
-+	u32 data_addr_lower[IPTS_BUFFERS];
-+	u32 data_addr_upper[IPTS_BUFFERS];
-+	u32 workqueue_addr_lower;
-+	u32 workqueue_addr_upper;
-+	u32 doorbell_addr_lower;
-+	u32 doorbell_addr_upper;
-+	u32 feedback_addr_lower[IPTS_BUFFERS];
-+	u32 feedback_addr_upper[IPTS_BUFFERS];
-+	u32 hid2me_addr_lower;
-+	u32 hid2me_addr_upper;
-+	u32 hid2me_size;
-+	u8 reserved1;
-+	u8 workqueue_item_size;
-+	u16 workqueue_size;
-+	u8 reserved[32];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_mem_window) == 320);
-+
-+/**
-+ * struct ipts_quiesce_io - Payload for the QUIESCE_IO command.
-+ */
-+struct ipts_quiesce_io {
-+	u8 reserved[12];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_quiesce_io) == 12);
-+
-+/**
-+ * struct ipts_feedback - Payload for the FEEDBACK command.
-+ * @buffer: The buffer that the device should refill.
-+ */
-+struct ipts_feedback {
-+	u32 buffer;
-+	u8 reserved[12];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_feedback) == 16);
-+
-+/**
-+ * enum ipts_reset_type - Possible ways of resetting the device.
-+ * @IPTS_RESET_TYPE_HARD: Perform hardware reset using GPIO pin.
-+ * @IPTS_RESET_TYPE_SOFT: Perform software reset using SPI command.
-+ */
-+enum ipts_reset_type {
-+	IPTS_RESET_TYPE_HARD = 0x00,
-+	IPTS_RESET_TYPE_SOFT = 0x01,
-+};
-+
-+/**
-+ * struct ipts_reset - Payload for the RESET_SENSOR command.
-+ * @type: How the device should get reset.
-+ */
-+struct ipts_reset_sensor {
-+	enum ipts_reset_type type;
-+	u8 reserved[4];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_reset_sensor) == 8);
-+
-+/**
-+ * struct ipts_get_descriptor - Payload for the GET_DESCRIPTOR command.
-+ * @addr_lower: The lower 32 bits of the descriptor buffer address.
-+ * @addr_upper: The upper 32 bits of the descriptor buffer address.
-+ * @magic:      A magic value. Must be 8.
-+ */
-+struct ipts_get_descriptor {
-+	u32 addr_lower;
-+	u32 addr_upper;
-+	u32 magic;
-+	u8 reserved[12];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_get_descriptor) == 24);
-+
-+/*
-+ * The type of a response is indicated by a
-+ * command code, with the most significant bit flipped to 1.
-+ */
-+#define IPTS_RSP_BIT BIT(31)
-+
-+/**
-+ * struct ipts_response - Data returned from the device in response to a command.
-+ * @cmd:     The command that this response answers (IPTS_RSP_BIT will be 1).
-+ * @status:  The return code of the command.
-+ * @payload: The data that was produced by the command.
-+ */
-+struct ipts_response {
-+	enum ipts_command_code cmd;
-+	enum ipts_status status;
-+	u8 payload[80];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_response) == 88);
-+
-+/**
-+ * struct ipts_device_info - Vendor information of the IPTS device.
-+ * @vendor:         Vendor ID of this device.
-+ * @product:        Product ID of this device.
-+ * @hw_version:     Hardware revision of this device.
-+ * @fw_version:     Firmware revision of this device.
-+ * @data_size:      Requested size for a data buffer.
-+ * @feedback_size:  Requested size for a feedback buffer.
-+ * @mode:           Mode that the device currently operates in.
-+ * @max_contacts:   Maximum amount of concurrent touches the sensor can process.
-+ * @sensor_min_eds: The minimum EDS version supported by the sensor.
-+ * @sensor_max_eds: The maximum EDS version supported by the sensor.
-+ * @me_min_eds:     The minimum EDS version supported by the ME for communicating with the sensor.
-+ * @me_max_eds:     The maximum EDS version supported by the ME for communicating with the sensor.
-+ * @intf_eds:       The EDS version implemented by the interface between ME and host.
-+ */
-+struct ipts_device_info {
-+	u16 vendor;
-+	u16 product;
-+	u32 hw_version;
-+	u32 fw_version;
-+	u32 data_size;
-+	u32 feedback_size;
-+	enum ipts_mode mode;
-+	u8 max_contacts;
-+	u8 reserved1[3];
-+	u8 sensor_min_eds;
-+	u8 sensor_maj_eds;
-+	u8 me_min_eds;
-+	u8 me_maj_eds;
-+	u8 intf_eds;
-+	u8 reserved2[11];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_device_info) == 44);
-+
-+#endif /* IPTS_SPEC_DEVICE_H */
-diff --git a/drivers/hid/ipts/spec-hid.h b/drivers/hid/ipts/spec-hid.h
-new file mode 100644
-index 0000000000000..5a58d4a0a610f
---- /dev/null
-+++ b/drivers/hid/ipts/spec-hid.h
-@@ -0,0 +1,34 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2020-2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_SPEC_HID_H
-+#define IPTS_SPEC_HID_H
-+
-+#include <linux/build_bug.h>
-+#include <linux/types.h>
-+
-+/*
-+ * Made-up type for passing raw IPTS data in a HID report.
-+ */
-+#define IPTS_HID_FRAME_TYPE_RAW 0xEE
-+
-+/**
-+ * struct ipts_hid_frame - Header that is prefixed to raw IPTS data wrapped in a HID report.
-+ * @size: Size of the data inside the report, including this header.
-+ * @type: What type of data does this report contain.
-+ */
-+struct ipts_hid_header {
-+	u32 size;
-+	u8 reserved1;
-+	u8 type;
-+	u8 reserved2;
-+	u8 data[];
-+} __packed;
-+
-+static_assert(sizeof(struct ipts_hid_header) == 7);
-+
-+#endif /* IPTS_SPEC_HID_H */
-diff --git a/drivers/hid/ipts/thread.c b/drivers/hid/ipts/thread.c
-new file mode 100644
-index 0000000000000..355e92bea26f8
---- /dev/null
-+++ b/drivers/hid/ipts/thread.c
-@@ -0,0 +1,84 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#include <linux/completion.h>
-+#include <linux/err.h>
-+#include <linux/kthread.h>
-+#include <linux/mutex.h>
-+
-+#include "thread.h"
-+
-+bool ipts_thread_should_stop(struct ipts_thread *thread)
-+{
-+	if (!thread)
-+		return false;
-+
-+	return READ_ONCE(thread->should_stop);
-+}
-+
-+static int ipts_thread_runner(void *data)
-+{
-+	int ret = 0;
-+	struct ipts_thread *thread = data;
-+
-+	if (!thread)
-+		return -EFAULT;
-+
-+	if (!thread->threadfn)
-+		return -EFAULT;
-+
-+	ret = thread->threadfn(thread);
-+	complete_all(&thread->done);
-+
-+	return ret;
-+}
-+
-+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread),
-+		      void *data, const char *name)
-+{
-+	if (!thread)
-+		return -EFAULT;
-+
-+	if (!threadfn)
-+		return -EFAULT;
-+
-+	init_completion(&thread->done);
-+
-+	thread->data = data;
-+	thread->should_stop = false;
-+	thread->threadfn = threadfn;
-+
-+	thread->thread = kthread_run(ipts_thread_runner, thread, name);
-+	return PTR_ERR_OR_ZERO(thread->thread);
-+}
-+
-+int ipts_thread_stop(struct ipts_thread *thread)
-+{
-+	int ret = 0;
-+
-+	if (!thread)
-+		return -EFAULT;
-+
-+	if (!thread->thread)
-+		return 0;
-+
-+	WRITE_ONCE(thread->should_stop, true);
-+
-+	/*
-+	 * Make sure that the write has gone through before waiting.
-+	 */
-+	wmb();
-+
-+	wait_for_completion(&thread->done);
-+	ret = kthread_stop(thread->thread);
-+
-+	thread->thread = NULL;
-+	thread->data = NULL;
-+	thread->threadfn = NULL;
-+
-+	return ret;
-+}
-diff --git a/drivers/hid/ipts/thread.h b/drivers/hid/ipts/thread.h
-new file mode 100644
-index 0000000000000..1f966b8b32c45
---- /dev/null
-+++ b/drivers/hid/ipts/thread.h
-@@ -0,0 +1,59 @@
-+/* SPDX-License-Identifier: GPL-2.0-or-later */
-+/*
-+ * Copyright (c) 2023 Dorian Stoll
-+ *
-+ * Linux driver for Intel Precise Touch & Stylus
-+ */
-+
-+#ifndef IPTS_THREAD_H
-+#define IPTS_THREAD_H
-+
-+#include <linux/completion.h>
-+#include <linux/mutex.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * This wrapper over kthread is necessary, because calling kthread_stop makes it impossible
-+ * to issue MEI commands from that thread while it shuts itself down. By using a custom
-+ * boolean variable and a completion object, we can call kthread_stop only when the thread
-+ * already finished all of its work and has returned.
-+ */
-+struct ipts_thread {
-+	struct task_struct *thread;
-+
-+	bool should_stop;
-+	struct completion done;
-+
-+	void *data;
-+	int (*threadfn)(struct ipts_thread *thread);
-+};
-+
-+/**
-+ * ipts_thread_should_stop() - Returns true if the thread is asked to terminate.
-+ * @thread: The current thread.
-+ *
-+ * Returns: true if the thread should stop, false if not.
-+ */
-+bool ipts_thread_should_stop(struct ipts_thread *thread);
-+
-+/**
-+ * ipts_thread_start() - Starts an IPTS thread.
-+ * @thread: The thread to initialize and start.
-+ * @threadfn: The function to execute.
-+ * @data: An argument that will be passed to threadfn.
-+ * @name: The name of the new thread.
-+ *
-+ * Returns: 0 on success, <0 on error.
-+ */
-+int ipts_thread_start(struct ipts_thread *thread, int (*threadfn)(struct ipts_thread *thread),
-+		      void *data, const char name[]);
-+
-+/**
-+ * ipts_thread_stop() - Asks the thread to terminate and waits until it has finished.
-+ * @thread: The thread that should stop.
-+ *
-+ * Returns: The return value of the thread function.
-+ */
-+int ipts_thread_stop(struct ipts_thread *thread);
-+
-+#endif /* IPTS_THREAD_H */
--- 
-2.42.0
-
-From 033de13abc9653b2d773f06182465e03d5d6463b Mon Sep 17 00:00:00 2001
-From: Dorian Stoll <dorian.stoll@tmsp.io>
-Date: Sun, 11 Dec 2022 12:03:38 +0100
-Subject: [PATCH] iommu: intel: Disable source id verification for ITHC
-
-Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
-Patchset: ithc
----
- drivers/iommu/intel/irq_remapping.c | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
-index 29b9e55dcf26c..986e91c813ae1 100644
---- a/drivers/iommu/intel/irq_remapping.c
-+++ b/drivers/iommu/intel/irq_remapping.c
-@@ -386,6 +386,22 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
- 	data.busmatch_count = 0;
- 	pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
- 
-+	/*
-+	 * The Intel Touch Host Controller is at 00:10.6, but for some reason
-+	 * the MSI interrupts have request id 01:05.0.
-+	 * Disable id verification to work around this.
-+	 * FIXME Find proper fix or turn this into a quirk.
-+	 */
-+	if (dev->vendor == PCI_VENDOR_ID_INTEL && (dev->class >> 8) == PCI_CLASS_INPUT_PEN) {
-+		switch(dev->device) {
-+		case 0x98d0: case 0x98d1: // LKF
-+		case 0xa0d0: case 0xa0d1: // TGL LP
-+		case 0x43d0: case 0x43d1: // TGL H
-+			set_irte_sid(irte, SVT_NO_VERIFY, SQ_ALL_16, 0);
-+			return 0;
-+		}
-+	}
-+
- 	/*
- 	 * DMA alias provides us with a PCI device and alias.  The only case
- 	 * where the it will return an alias on a different bus than the
--- 
-2.42.0
-
-From 0dd32bcfb70f9e36cfa009d94cd6c86a4839cff3 Mon Sep 17 00:00:00 2001
-From: Dorian Stoll <dorian.stoll@tmsp.io>
-Date: Sun, 11 Dec 2022 12:10:54 +0100
-Subject: [PATCH] hid: Add support for Intel Touch Host Controller
-
-Based on quo/ithc-linux@55803a2
-
-Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
-Patchset: ithc
----
- drivers/hid/Kconfig           |   2 +
- drivers/hid/Makefile          |   1 +
- drivers/hid/ithc/Kbuild       |   6 +
- drivers/hid/ithc/Kconfig      |  12 +
- drivers/hid/ithc/ithc-debug.c |  96 ++++++
- drivers/hid/ithc/ithc-dma.c   | 258 ++++++++++++++++
- drivers/hid/ithc/ithc-dma.h   |  67 +++++
- drivers/hid/ithc/ithc-main.c  | 534 ++++++++++++++++++++++++++++++++++
- drivers/hid/ithc/ithc-regs.c  |  64 ++++
- drivers/hid/ithc/ithc-regs.h  | 186 ++++++++++++
- drivers/hid/ithc/ithc.h       |  60 ++++
- 11 files changed, 1286 insertions(+)
- create mode 100644 drivers/hid/ithc/Kbuild
- create mode 100644 drivers/hid/ithc/Kconfig
- create mode 100644 drivers/hid/ithc/ithc-debug.c
- create mode 100644 drivers/hid/ithc/ithc-dma.c
- create mode 100644 drivers/hid/ithc/ithc-dma.h
- create mode 100644 drivers/hid/ithc/ithc-main.c
- create mode 100644 drivers/hid/ithc/ithc-regs.c
- create mode 100644 drivers/hid/ithc/ithc-regs.h
- create mode 100644 drivers/hid/ithc/ithc.h
-
-diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
-index 0b9d245d10e54..8ba1c309228be 100644
---- a/drivers/hid/Kconfig
-+++ b/drivers/hid/Kconfig
-@@ -1347,4 +1347,6 @@ source "drivers/hid/surface-hid/Kconfig"
- 
- source "drivers/hid/ipts/Kconfig"
- 
-+source "drivers/hid/ithc/Kconfig"
-+
- endif # HID_SUPPORT
-diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
-index 2ef21b257d0b5..e94b79727b489 100644
---- a/drivers/hid/Makefile
-+++ b/drivers/hid/Makefile
-@@ -171,3 +171,4 @@ obj-$(CONFIG_AMD_SFH_HID)       += amd-sfh-hid/
- obj-$(CONFIG_SURFACE_HID_CORE)  += surface-hid/
- 
- obj-$(CONFIG_HID_IPTS)          += ipts/
-+obj-$(CONFIG_HID_ITHC)          += ithc/
-diff --git a/drivers/hid/ithc/Kbuild b/drivers/hid/ithc/Kbuild
-new file mode 100644
-index 0000000000000..aea83f2ac07b4
---- /dev/null
-+++ b/drivers/hid/ithc/Kbuild
-@@ -0,0 +1,6 @@
-+obj-$(CONFIG_HID_ITHC) := ithc.o
-+
-+ithc-objs := ithc-main.o ithc-regs.o ithc-dma.o ithc-debug.o
-+
-+ccflags-y := -std=gnu11 -Wno-declaration-after-statement
-+
-diff --git a/drivers/hid/ithc/Kconfig b/drivers/hid/ithc/Kconfig
-new file mode 100644
-index 0000000000000..ede7130236096
---- /dev/null
-+++ b/drivers/hid/ithc/Kconfig
-@@ -0,0 +1,12 @@
-+config HID_ITHC
-+	tristate "Intel Touch Host Controller"
-+	depends on PCI
-+	depends on HID
-+	help
-+	  Say Y here if your system has a touchscreen using Intels
-+	  Touch Host Controller (ITHC / IPTS) technology.
-+
-+	  If unsure say N.
-+
-+	  To compile this driver as a module, choose M here: the
-+	  module will be called ithc.
-diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c
-new file mode 100644
-index 0000000000000..57bf125c45bd5
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-debug.c
-@@ -0,0 +1,96 @@
-+#include "ithc.h"
-+
-+void ithc_log_regs(struct ithc *ithc) {
-+	if (!ithc->prev_regs) return;
-+	u32 __iomem *cur = (__iomem void*)ithc->regs;
-+	u32 *prev = (void*)ithc->prev_regs;
-+	for (int i = 1024; i < sizeof *ithc->regs / 4; i++) {
-+		u32 x = readl(cur + i);
-+		if (x != prev[i]) {
-+			pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x);
-+			prev[i] = x;
-+		}
-+	}
-+}
-+
-+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) {
-+	struct ithc *ithc = file_inode(f)->i_private;
-+	char cmd[256];
-+	if (!ithc || !ithc->pci) return -ENODEV;
-+	if (!len) return -EINVAL;
-+	if (len >= sizeof cmd) return -EINVAL;
-+	if (copy_from_user(cmd, buf, len)) return -EFAULT;
-+	cmd[len] = 0;
-+	if (cmd[len-1] == '\n') cmd[len-1] = 0;
-+	pci_info(ithc->pci, "debug command: %s\n", cmd);
-+	u32 n = 0;
-+	const char *s = cmd + 1;
-+	u32 a[32];
-+	while (*s && *s != '\n') {
-+		if (n >= ARRAY_SIZE(a)) return -EINVAL;
-+		if (*s++ != ' ') return -EINVAL;
-+		char *e;
-+		a[n++] = simple_strtoul(s, &e, 0);
-+		if (e == s) return -EINVAL;
-+		s = e;
-+	}
-+	ithc_log_regs(ithc);
-+	switch(cmd[0]) {
-+	case 'x': // reset
-+		ithc_reset(ithc);
-+		break;
-+	case 'w': // write register: offset mask value
-+		if (n != 3 || (a[0] & 3)) return -EINVAL;
-+		pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]);
-+		bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]);
-+		break;
-+	case 'r': // read register: offset
-+		if (n != 1 || (a[0] & 3)) return -EINVAL;
-+		pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
-+		break;
-+	case 's': // spi command: cmd offset len data...
-+		// read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-+		// set touch cfg: s 6 12 4 XX
-+		if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL;
-+		pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]);
-+		if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3))
-+			for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
-+		break;
-+	case 'd': // dma command: cmd len data...
-+		// get report descriptor: d 7 8 0 0
-+		// enable multitouch: d 3 2 0x0105
-+		if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL;
-+		pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]);
-+		if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n");
-+		break;
-+	default:
-+		return -EINVAL;
-+	}
-+	ithc_log_regs(ithc);
-+	return len;
-+}
-+
-+static const struct file_operations ithc_debugfops_cmd = {
-+	.owner = THIS_MODULE,
-+	.write = ithc_debugfs_cmd_write,
-+};
-+
-+static void ithc_debugfs_devres_release(struct device *dev, void *res) {
-+	struct dentry **dbgm = res;
-+	if (*dbgm) debugfs_remove_recursive(*dbgm);
-+}
-+
-+int ithc_debug_init(struct ithc *ithc) {
-+	struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL);
-+	if (!dbgm) return -ENOMEM;
-+	devres_add(&ithc->pci->dev, dbgm);
-+	struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL);
-+	if (IS_ERR(dbg)) return PTR_ERR(dbg);
-+	*dbgm = dbg;
-+
-+	struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd);
-+	if (IS_ERR(cmd)) return PTR_ERR(cmd);
-+
-+	return 0;
-+}
-+
-diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c
-new file mode 100644
-index 0000000000000..7e89b3496918d
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-dma.c
-@@ -0,0 +1,258 @@
-+#include "ithc.h"
-+
-+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) {
-+	p->num_pages = num_pages;
-+	p->dir = dir;
-+	p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE);
-+	p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL);
-+	if (!p->addr) return -ENOMEM;
-+	if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT;
-+	return 0;
-+}
-+
-+struct ithc_sg_table {
-+	void *addr;
-+	struct sg_table sgt;
-+	enum dma_data_direction dir;
-+};
-+static void ithc_dma_sgtable_free(struct sg_table *sgt) {
-+	struct scatterlist *sg;
-+	int i;
-+	for_each_sgtable_sg(sgt, sg, i) {
-+		struct page *p = sg_page(sg);
-+		if (p) __free_page(p);
-+	}
-+	sg_free_table(sgt);
-+}
-+static void ithc_dma_data_devres_release(struct device *dev, void *res) {
-+	struct ithc_sg_table *sgt = res;
-+	if (sgt->addr) vunmap(sgt->addr);
-+	dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0);
-+	ithc_dma_sgtable_free(&sgt->sgt);
-+}
-+
-+static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) {
-+	// We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional).
-+	// Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now).
-+	struct page *pages[16];
-+	if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL;
-+	b->active_idx = -1;
-+	struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL);
-+	if (!sgt) return -ENOMEM;
-+	sgt->dir = prds->dir;
-+	if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) {
-+		struct scatterlist *sg;
-+		int i;
-+		bool ok = true;
-+		for_each_sgtable_sg(&sgt->sgt, sg, i) {
-+			struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA
-+			if (!p) { ok = false; break; }
-+			sg_set_page(sg, p, PAGE_SIZE, 0);
-+		}
-+		if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) {
-+			devres_add(&ithc->pci->dev, sgt);
-+			b->sgt = &sgt->sgt;
-+			b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL);
-+			if (!b->addr) return -ENOMEM;
-+			return 0;
-+		}
-+		ithc_dma_sgtable_free(&sgt->sgt);
-+	}
-+	devres_free(sgt);
-+	return -ENOMEM;
-+}
-+
-+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+	struct ithc_phys_region_desc *prd = prds->addr;
-+	prd += idx * prds->num_pages;
-+	if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; }
-+	b->active_idx = idx;
-+	if (prds->dir == DMA_TO_DEVICE) {
-+		if (b->data_size > PAGE_SIZE) return -EINVAL;
-+		prd->addr = sg_dma_address(b->sgt->sgl) >> 10;
-+		prd->size = b->data_size | PRD_FLAG_END;
-+		flush_kernel_vmap_range(b->addr, b->data_size);
-+	} else if (prds->dir == DMA_FROM_DEVICE) {
-+		struct scatterlist *sg;
-+		int i;
-+		for_each_sgtable_dma_sg(b->sgt, sg, i) {
-+			prd->addr = sg_dma_address(sg) >> 10;
-+			prd->size = sg_dma_len(sg);
-+			prd++;
-+		}
-+		prd[-1].size |= PRD_FLAG_END;
-+	}
-+	dma_wmb(); // for the prds
-+	dma_sync_sgtable_for_device(&ithc->pci->dev, b->sgt, prds->dir);
-+	return 0;
-+}
-+
-+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+	struct ithc_phys_region_desc *prd = prds->addr;
-+	prd += idx * prds->num_pages;
-+	if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; }
-+	b->active_idx = -1;
-+	if (prds->dir == DMA_FROM_DEVICE) {
-+		dma_rmb(); // for the prds
-+		b->data_size = 0;
-+		struct scatterlist *sg;
-+		int i;
-+		for_each_sgtable_dma_sg(b->sgt, sg, i) {
-+			unsigned size = prd->size;
-+			b->data_size += size & PRD_SIZE_MASK;
-+			if (size & PRD_FLAG_END) break;
-+			if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; }
-+			prd++;
-+		}
-+		invalidate_kernel_vmap_range(b->addr, b->data_size);
-+	}
-+	dma_sync_sgtable_for_cpu(&ithc->pci->dev, b->sgt, prds->dir);
-+	return 0;
-+}
-+
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) {
-+	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
-+	mutex_init(&rx->mutex);
-+	u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes);
-+	unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+	pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages);
-+	CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE);
-+	for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+		CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]);
-+	writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2);
-+	lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr);
-+	writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs);
-+	writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds);
-+	u8 head = readb(&ithc->regs->dma_rx[channel].head);
-+	if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; }
-+	for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+		CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i);
-+	writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail);
-+	return 0;
-+}
-+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) {
-+	bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
-+	CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
-+}
-+
-+int ithc_dma_tx_init(struct ithc *ithc) {
-+	struct ithc_dma_tx *tx = &ithc->dma_tx;
-+	mutex_init(&tx->mutex);
-+	tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes);
-+	unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+	pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages);
-+	CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE);
-+	CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf);
-+	lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr);
-+	writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds);
-+	CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
-+	return 0;
-+}
-+
-+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) {
-+	if (buf >= NUM_RX_BUF) {
-+		pci_err(ithc->pci, "invalid dma ringbuffer index\n");
-+		return -EINVAL;
-+	}
-+	ithc_set_active(ithc);
-+	u32 len = data->data_size;
-+	struct ithc_dma_rx_header *hdr = data->addr;
-+	u8 *hiddata = (void *)(hdr + 1);
-+	if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) {
-+		CHECK(ithc_reset, ithc);
-+	} else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) {
-+		if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-+			// When the CPU enters a low power state during DMA, we can get truncated messages.
-+			// Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes.
-+			// See also ithc_set_active().
-+		} else {
-+			pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size);
-+			print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+		}
-+	} else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) {
-+		CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8);
-+		WRITE_ONCE(ithc->hid_parse_done, true);
-+		wake_up(&ithc->wait_hid_parse);
-+	} else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-+		CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1);
-+	} else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) {
-+		bool done = false;
-+		mutex_lock(&ithc->hid_get_feature_mutex);
-+		if (ithc->hid_get_feature_buf) {
-+			if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size;
-+			memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size);
-+			ithc->hid_get_feature_buf = NULL;
-+			done = true;
-+		}
-+		mutex_unlock(&ithc->hid_get_feature_mutex);
-+		if (done) wake_up(&ithc->wait_hid_get_feature);
-+		else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1);
-+	} else {
-+		pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code);
-+		print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+	}
-+	return 0;
-+}
-+
-+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
-+	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
-+	unsigned n = rx->num_received;
-+	u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head);
-+	while (1) {
-+		u8 tail = n % NUM_RX_BUF;
-+		u8 tail_wrap = tail | ((n / NUM_RX_BUF) & 1 ? 0 : DMA_RX_WRAP_FLAG);
-+		writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail);
-+		// ringbuffer is full if tail_wrap == head_wrap
-+		// ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG
-+		if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0;
-+
-+		// take the buffer that the device just filled
-+		struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF];
-+		CHECK_RET(ithc_dma_data_buffer_get, ithc, &rx->prds, b, tail);
-+		rx->num_received = ++n;
-+
-+		// process data
-+		CHECK(ithc_dma_rx_process_buf, ithc, b, channel, tail);
-+
-+		// give the buffer back to the device
-+		CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail);
-+	}
-+}
-+int ithc_dma_rx(struct ithc *ithc, u8 channel) {
-+	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
-+	mutex_lock(&rx->mutex);
-+	int ret = ithc_dma_rx_unlocked(ithc, channel);
-+	mutex_unlock(&rx->mutex);
-+	return ret;
-+}
-+
-+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+	pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize);
-+	struct ithc_dma_tx_header *hdr;
-+	u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0;
-+	unsigned fullsize = sizeof *hdr + datasize + padding;
-+	if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL;
-+	CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
-+
-+	ithc->dma_tx.buf.data_size = fullsize;
-+	hdr = ithc->dma_tx.buf.addr;
-+	hdr->code = cmdcode;
-+	hdr->data_size = datasize;
-+	u8 *dest = (void *)(hdr + 1);
-+	memcpy(dest, data, datasize);
-+	dest += datasize;
-+	for (u8 p = 0; p < padding; p++) *dest++ = 0;
-+	CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
-+
-+	bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND);
-+	CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0);
-+	writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status);
-+	return 0;
-+}
-+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+	mutex_lock(&ithc->dma_tx.mutex);
-+	int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data);
-+	mutex_unlock(&ithc->dma_tx.mutex);
-+	return ret;
-+}
-+
-diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h
-new file mode 100644
-index 0000000000000..d9f2c19a13f3a
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-dma.h
-@@ -0,0 +1,67 @@
-+#define PRD_SIZE_MASK            0xffffff
-+#define PRD_FLAG_END             0x1000000
-+#define PRD_FLAG_SUCCESS         0x2000000
-+#define PRD_FLAG_ERROR           0x4000000
-+
-+struct ithc_phys_region_desc {
-+	u64 addr; // physical addr/1024
-+	u32 size; // num bytes, PRD_FLAG_END marks last prd for data split over multiple prds
-+	u32 unused;
-+};
-+
-+#define DMA_RX_CODE_INPUT_REPORT          3
-+#define DMA_RX_CODE_FEATURE_REPORT        4
-+#define DMA_RX_CODE_REPORT_DESCRIPTOR     5
-+#define DMA_RX_CODE_RESET                 7
-+
-+struct ithc_dma_rx_header {
-+	u32 code;
-+	u32 data_size;
-+	u32 _unknown[14];
-+};
-+
-+#define DMA_TX_CODE_SET_FEATURE           3
-+#define DMA_TX_CODE_GET_FEATURE           4
-+#define DMA_TX_CODE_OUTPUT_REPORT         5
-+#define DMA_TX_CODE_GET_REPORT_DESCRIPTOR 7
-+
-+struct ithc_dma_tx_header {
-+	u32 code;
-+	u32 data_size;
-+};
-+
-+struct ithc_dma_prd_buffer {
-+	void *addr;
-+	dma_addr_t dma_addr;
-+	u32 size;
-+	u32 num_pages; // per data buffer
-+	enum dma_data_direction dir;
-+};
-+
-+struct ithc_dma_data_buffer {
-+	void *addr;
-+	struct sg_table *sgt;
-+	int active_idx;
-+	u32 data_size;
-+};
-+
-+struct ithc_dma_tx {
-+	struct mutex mutex;
-+	u32 max_size;
-+	struct ithc_dma_prd_buffer prds;
-+	struct ithc_dma_data_buffer buf;
-+};
-+
-+struct ithc_dma_rx {
-+	struct mutex mutex;
-+	u32 num_received;
-+	struct ithc_dma_prd_buffer prds;
-+	struct ithc_dma_data_buffer bufs[NUM_RX_BUF];
-+};
-+
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname);
-+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel);
-+int ithc_dma_tx_init(struct ithc *ithc);
-+int ithc_dma_rx(struct ithc *ithc, u8 channel);
-+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *cmddata);
-+
-diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c
-new file mode 100644
-index 0000000000000..09512b9cb4d31
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-main.c
-@@ -0,0 +1,534 @@
-+#include "ithc.h"
-+
-+MODULE_DESCRIPTION("Intel Touch Host Controller driver");
-+MODULE_LICENSE("Dual BSD/GPL");
-+
-+// Lakefield
-+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT1    0x98d0
-+#define PCI_DEVICE_ID_INTEL_THC_LKF_PORT2    0x98d1
-+// Tiger Lake
-+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1 0xa0d0
-+#define PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2 0xa0d1
-+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1  0x43d0
-+#define PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2  0x43d1
-+// Alder Lake
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1  0x7ad8
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2  0x7ad9
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1  0x51d0
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2  0x51d1
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1  0x54d0
-+#define PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2  0x54d1
-+// Raptor Lake
-+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1  0x7a58
-+#define PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2  0x7a59
-+// Meteor Lake
-+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT1    0x7e48
-+#define PCI_DEVICE_ID_INTEL_THC_MTL_PORT2    0x7e4a
-+
-+static const struct pci_device_id ithc_pci_tbl[] = {
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_LKF_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_LP_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_TGL_H_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_S_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_P_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_ADL_M_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) },
-+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(pci, ithc_pci_tbl);
-+
-+// Module parameters
-+
-+static bool ithc_use_polling = false;
-+module_param_named(poll, ithc_use_polling, bool, 0);
-+MODULE_PARM_DESC(poll, "Use polling instead of interrupts");
-+
-+static bool ithc_use_rx0 = false;
-+module_param_named(rx0, ithc_use_rx0, bool, 0);
-+MODULE_PARM_DESC(rx0, "Use DMA RX channel 0");
-+
-+static bool ithc_use_rx1 = true;
-+module_param_named(rx1, ithc_use_rx1, bool, 0);
-+MODULE_PARM_DESC(rx1, "Use DMA RX channel 1");
-+
-+static bool ithc_log_regs_enabled = false;
-+module_param_named(logregs, ithc_log_regs_enabled, bool, 0);
-+MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)");
-+
-+// Sysfs attributes
-+
-+static bool ithc_is_config_valid(struct ithc *ithc) {
-+	return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC;
-+}
-+
-+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+	struct ithc *ithc = dev_get_drvdata(dev);
-+	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	return sprintf(buf, "0x%04x", ithc->config.vendor_id);
-+}
-+static DEVICE_ATTR_RO(vendor);
-+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+	struct ithc *ithc = dev_get_drvdata(dev);
-+	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	return sprintf(buf, "0x%04x", ithc->config.product_id);
-+}
-+static DEVICE_ATTR_RO(product);
-+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+	struct ithc *ithc = dev_get_drvdata(dev);
-+	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	return sprintf(buf, "%u", ithc->config.revision);
-+}
-+static DEVICE_ATTR_RO(revision);
-+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+	struct ithc *ithc = dev_get_drvdata(dev);
-+	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	u32 v = ithc->config.fw_version;
-+	return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff);
-+}
-+static DEVICE_ATTR_RO(fw_version);
-+
-+static const struct attribute_group *ithc_attribute_groups[] = {
-+	&(const struct attribute_group){
-+		.name = DEVNAME,
-+		.attrs = (struct attribute *[]){
-+			&dev_attr_vendor.attr,
-+			&dev_attr_product.attr,
-+			&dev_attr_revision.attr,
-+			&dev_attr_fw_version.attr,
-+			NULL
-+		},
-+	},
-+	NULL
-+};
-+
-+// HID setup
-+
-+static int ithc_hid_start(struct hid_device *hdev) { return 0; }
-+static void ithc_hid_stop(struct hid_device *hdev) { }
-+static int ithc_hid_open(struct hid_device *hdev) { return 0; }
-+static void ithc_hid_close(struct hid_device *hdev) { }
-+
-+static int ithc_hid_parse(struct hid_device *hdev) {
-+	struct ithc *ithc = hdev->driver_data;
-+	u64 val = 0;
-+	WRITE_ONCE(ithc->hid_parse_done, false);
-+	CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val);
-+	if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT;
-+	return 0;
-+}
-+
-+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) {
-+	struct ithc *ithc = hdev->driver_data;
-+	if (!buf || !len) return -EINVAL;
-+	u32 code;
-+	if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT;
-+	else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE;
-+	else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE;
-+	else {
-+		pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum);
-+		return -EINVAL;
-+	}
-+	buf[0] = reportnum;
-+	if (reqtype == HID_REQ_GET_REPORT) {
-+		mutex_lock(&ithc->hid_get_feature_mutex);
-+		ithc->hid_get_feature_buf = buf;
-+		ithc->hid_get_feature_size = len;
-+		mutex_unlock(&ithc->hid_get_feature_mutex);
-+		int r = CHECK(ithc_dma_tx, ithc, code, 1, buf);
-+		if (!r) {
-+			r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
-+			if (!r) r = -ETIMEDOUT;
-+			else if (r < 0) r = -EINTR;
-+			else r = 0;
-+		}
-+		mutex_lock(&ithc->hid_get_feature_mutex);
-+		ithc->hid_get_feature_buf = NULL;
-+		if (!r) r = ithc->hid_get_feature_size;
-+		mutex_unlock(&ithc->hid_get_feature_mutex);
-+		return r;
-+	}
-+	CHECK_RET(ithc_dma_tx, ithc, code, len, buf);
-+	return 0;
-+}
-+
-+static struct hid_ll_driver ithc_ll_driver = {
-+	.start = ithc_hid_start,
-+	.stop = ithc_hid_stop,
-+	.open = ithc_hid_open,
-+	.close = ithc_hid_close,
-+	.parse = ithc_hid_parse,
-+	.raw_request = ithc_hid_raw_request,
-+};
-+
-+static void ithc_hid_devres_release(struct device *dev, void *res) {
-+	struct hid_device **hidm = res;
-+	if (*hidm) hid_destroy_device(*hidm);
-+}
-+
-+static int ithc_hid_init(struct ithc *ithc) {
-+	struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL);
-+	if (!hidm) return -ENOMEM;
-+	devres_add(&ithc->pci->dev, hidm);
-+	struct hid_device *hid = hid_allocate_device();
-+	if (IS_ERR(hid)) return PTR_ERR(hid);
-+	*hidm = hid;
-+
-+	strscpy(hid->name, DEVFULLNAME, sizeof(hid->name));
-+	strscpy(hid->phys, ithc->phys, sizeof(hid->phys));
-+	hid->ll_driver = &ithc_ll_driver;
-+	hid->bus = BUS_PCI;
-+	hid->vendor = ithc->config.vendor_id;
-+	hid->product = ithc->config.product_id;
-+	hid->version = 0x100;
-+	hid->dev.parent = &ithc->pci->dev;
-+	hid->driver_data = ithc;
-+
-+	ithc->hid = hid;
-+	return 0;
-+}
-+
-+// Interrupts/polling
-+
-+static void ithc_activity_timer_callback(struct timer_list *t) {
-+	struct ithc *ithc = container_of(t, struct ithc, activity_timer);
-+	cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+}
-+
-+void ithc_set_active(struct ithc *ithc) {
-+	// When CPU usage is very low, the CPU can enter various low power states (C2-C10).
-+	// This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens.
-+	// The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor).
-+	// To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions.
-+	cpu_latency_qos_update_request(&ithc->activity_qos, 0);
-+	mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000));
-+}
-+
-+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) {
-+	u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2
-+		| (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
-+	return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x);
-+}
-+
-+static void ithc_disable_interrupts(struct ithc *ithc) {
-+	writel(0, &ithc->regs->error_control);
-+	bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0);
-+	bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0);
-+	bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0);
-+	bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0);
-+}
-+
-+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) {
-+	writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status);
-+}
-+
-+static void ithc_clear_interrupts(struct ithc *ithc) {
-+	writel(0xffffffff, &ithc->regs->error_flags);
-+	writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status);
-+	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
-+	ithc_clear_dma_rx_interrupts(ithc, 0);
-+	ithc_clear_dma_rx_interrupts(ithc, 1);
-+	writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status);
-+}
-+
-+static void ithc_process(struct ithc *ithc) {
-+	ithc_log_regs(ithc);
-+
-+	// read and clear error bits
-+	u32 err = readl(&ithc->regs->error_flags);
-+	if (err) {
-+		if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err);
-+		writel(err, &ithc->regs->error_flags);
-+	}
-+
-+	// process DMA rx
-+	if (ithc_use_rx0) {
-+		ithc_clear_dma_rx_interrupts(ithc, 0);
-+		ithc_dma_rx(ithc, 0);
-+	}
-+	if (ithc_use_rx1) {
-+		ithc_clear_dma_rx_interrupts(ithc, 1);
-+		ithc_dma_rx(ithc, 1);
-+	}
-+
-+	ithc_log_regs(ithc);
-+}
-+
-+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
-+	struct ithc *ithc = arg;
-+	pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n",
-+		readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags),
-+		readb(&ithc->regs->spi_cmd.control), readl(&ithc->regs->spi_cmd.status),
-+		readb(&ithc->regs->dma_rx[0].control), readl(&ithc->regs->dma_rx[0].status),
-+		readb(&ithc->regs->dma_rx[1].control), readl(&ithc->regs->dma_rx[1].status),
-+		readb(&ithc->regs->dma_tx.control), readl(&ithc->regs->dma_tx.status));
-+	ithc_process(ithc);
-+	return IRQ_HANDLED;
-+}
-+
-+static int ithc_poll_thread(void *arg) {
-+	struct ithc *ithc = arg;
-+	unsigned sleep = 100;
-+	while (!kthread_should_stop()) {
-+		u32 n = ithc->dma_rx[1].num_received;
-+		ithc_process(ithc);
-+		if (n != ithc->dma_rx[1].num_received) sleep = 20;
-+		else sleep = min(200u, sleep + (sleep >> 4) + 1);
-+		msleep_interruptible(sleep);
-+	}
-+	return 0;
-+}
-+
-+// Device initialization and shutdown
-+
-+static void ithc_disable(struct ithc *ithc) {
-+	bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE);
-+	CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED);
-+	bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-+	bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND, 0);
-+	bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0);
-+	bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_ENABLE, 0);
-+	bitsb(&ithc->regs->dma_rx[1].control, DMA_RX_CONTROL_ENABLE, 0);
-+	ithc_disable_interrupts(ithc);
-+	ithc_clear_interrupts(ithc);
-+}
-+
-+static int ithc_init_device(struct ithc *ithc) {
-+	ithc_log_regs(ithc);
-+	bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0;
-+	ithc_disable(ithc);
-+	CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY);
-+	ithc_set_spi_config(ithc, 10, 0);
-+	bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config
-+
-+	if (was_enabled) if (msleep_interruptible(100)) return -EINTR;
-+	bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0);
-+	CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0);
-+	for (int retries = 0; ; retries++) {
-+		ithc_log_regs(ithc);
-+		bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET);
-+		if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break;
-+		if (retries > 5) {
-+			pci_err(ithc->pci, "too many retries, failed to reset device\n");
-+			return -ETIMEDOUT;
-+		}
-+		pci_err(ithc->pci, "invalid state, retrying reset\n");
-+		bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-+		if (msleep_interruptible(1000)) return -EINTR;
-+	}
-+	ithc_log_regs(ithc);
-+
-+	CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4);
-+
-+	// read config
-+	for (int retries = 0; ; retries++) {
-+		ithc_log_regs(ithc);
-+		memset(&ithc->config, 0, sizeof ithc->config);
-+		CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config);
-+		u32 *p = (void *)&ithc->config;
-+		pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-+			p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
-+		if (ithc_is_config_valid(ithc)) break;
-+		if (retries > 10) {
-+			pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id);
-+			return -EIO;
-+		}
-+		pci_err(ithc->pci, "failed to read config, retrying\n");
-+		if (msleep_interruptible(100)) return -EINTR;
-+	}
-+	ithc_log_regs(ithc);
-+
-+	CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config));
-+	CHECK_RET(ithc_set_device_enabled, ithc, true);
-+	ithc_log_regs(ithc);
-+	return 0;
-+}
-+
-+int ithc_reset(struct ithc *ithc) {
-+	// FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA
-+	// processing, that would have to be done asynchronously (schedule_work()?). And with extra locking?
-+	pci_err(ithc->pci, "reset\n");
-+	CHECK(ithc_init_device, ithc);
-+	if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-+	if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+	ithc_log_regs(ithc);
-+	pci_dbg(ithc->pci, "reset completed\n");
-+	return 0;
-+}
-+
-+static void ithc_stop(void *res) {
-+	struct ithc *ithc = res;
-+	pci_dbg(ithc->pci, "stopping\n");
-+	ithc_log_regs(ithc);
-+	if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread);
-+	if (ithc->irq >= 0) disable_irq(ithc->irq);
-+	CHECK(ithc_set_device_enabled, ithc, false);
-+	ithc_disable(ithc);
-+	del_timer_sync(&ithc->activity_timer);
-+	cpu_latency_qos_remove_request(&ithc->activity_qos);
-+	// clear dma config
-+	for(unsigned i = 0; i < 2; i++) {
-+		CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0);
-+		lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr);
-+		writeb(0, &ithc->regs->dma_rx[i].num_bufs);
-+		writeb(0, &ithc->regs->dma_rx[i].num_prds);
-+	}
-+	lo_hi_writeq(0, &ithc->regs->dma_tx.addr);
-+	writeb(0, &ithc->regs->dma_tx.num_prds);
-+	ithc_log_regs(ithc);
-+	pci_dbg(ithc->pci, "stopped\n");
-+}
-+
-+static void ithc_clear_drvdata(void *res) {
-+	struct pci_dev *pci = res;
-+	pci_set_drvdata(pci, NULL);
-+}
-+
-+static int ithc_start(struct pci_dev *pci) {
-+	pci_dbg(pci, "starting\n");
-+	if (pci_get_drvdata(pci)) {
-+		pci_err(pci, "device already initialized\n");
-+		return -EINVAL;
-+	}
-+	if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM;
-+
-+	struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL);
-+	if (!ithc) return -ENOMEM;
-+	ithc->irq = -1;
-+	ithc->pci = pci;
-+	snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci));
-+	init_waitqueue_head(&ithc->wait_hid_parse);
-+	init_waitqueue_head(&ithc->wait_hid_get_feature);
-+	mutex_init(&ithc->hid_get_feature_mutex);
-+	pci_set_drvdata(pci, ithc);
-+	CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci);
-+	if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL);
-+
-+	CHECK_RET(pcim_enable_device, pci);
-+	pci_set_master(pci);
-+	CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs");
-+	CHECK_RET(dma_set_mask_and_coherent, &pci->dev, DMA_BIT_MASK(64));
-+	CHECK_RET(pci_set_power_state, pci, PCI_D0);
-+	ithc->regs = pcim_iomap_table(pci)[0];
-+
-+	if (!ithc_use_polling) {
-+		CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
-+		ithc->irq = CHECK(pci_irq_vector, pci, 0);
-+		if (ithc->irq < 0) return ithc->irq;
-+	}
-+
-+	CHECK_RET(ithc_init_device, ithc);
-+	CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups);
-+	if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME);
-+	if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME);
-+	CHECK_RET(ithc_dma_tx_init, ithc);
-+
-+	CHECK_RET(ithc_hid_init, ithc);
-+
-+	cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+	timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0);
-+
-+	// add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed
-+	CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc);
-+
-+	if (ithc_use_polling) {
-+		pci_info(pci, "using polling instead of irq\n");
-+		// use a thread instead of simple timer because we want to be able to sleep
-+		ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll");
-+		if (IS_ERR(ithc->poll_thread)) {
-+			int err = PTR_ERR(ithc->poll_thread);
-+			ithc->poll_thread = NULL;
-+			return err;
-+		}
-+	} else {
-+		CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
-+	}
-+
-+	if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-+	if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+
-+	// hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA
-+	CHECK_RET(hid_add_device, ithc->hid);
-+
-+	CHECK(ithc_debug_init, ithc);
-+
-+	pci_dbg(pci, "started\n");
-+	return 0;
-+}
-+
-+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) {
-+	pci_dbg(pci, "device probe\n");
-+	return ithc_start(pci);
-+}
-+
-+static void ithc_remove(struct pci_dev *pci) {
-+	pci_dbg(pci, "device remove\n");
-+	// all cleanup is handled by devres
-+}
-+
-+static int ithc_suspend(struct device *dev) {
-+	struct pci_dev *pci = to_pci_dev(dev);
-+	pci_dbg(pci, "pm suspend\n");
-+	devres_release_group(dev, ithc_start);
-+	return 0;
-+}
-+
-+static int ithc_resume(struct device *dev) {
-+	struct pci_dev *pci = to_pci_dev(dev);
-+	pci_dbg(pci, "pm resume\n");
-+	return ithc_start(pci);
-+}
-+
-+static int ithc_freeze(struct device *dev) {
-+	struct pci_dev *pci = to_pci_dev(dev);
-+	pci_dbg(pci, "pm freeze\n");
-+	devres_release_group(dev, ithc_start);
-+	return 0;
-+}
-+
-+static int ithc_thaw(struct device *dev) {
-+	struct pci_dev *pci = to_pci_dev(dev);
-+	pci_dbg(pci, "pm thaw\n");
-+	return ithc_start(pci);
-+}
-+
-+static int ithc_restore(struct device *dev) {
-+	struct pci_dev *pci = to_pci_dev(dev);
-+	pci_dbg(pci, "pm restore\n");
-+	return ithc_start(pci);
-+}
-+
-+static struct pci_driver ithc_driver = {
-+	.name = DEVNAME,
-+	.id_table = ithc_pci_tbl,
-+	.probe = ithc_probe,
-+	.remove = ithc_remove,
-+	.driver.pm = &(const struct dev_pm_ops) {
-+		.suspend = ithc_suspend,
-+		.resume = ithc_resume,
-+		.freeze = ithc_freeze,
-+		.thaw = ithc_thaw,
-+		.restore = ithc_restore,
-+	},
-+	//.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway
-+};
-+
-+static int __init ithc_init(void) {
-+	return pci_register_driver(&ithc_driver);
-+}
-+
-+static void __exit ithc_exit(void) {
-+	pci_unregister_driver(&ithc_driver);
-+}
-+
-+module_init(ithc_init);
-+module_exit(ithc_exit);
-+
-diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c
-new file mode 100644
-index 0000000000000..85d567b05761f
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-regs.c
-@@ -0,0 +1,64 @@
-+#include "ithc.h"
-+
-+#define reg_num(r) (0x1fff & (u16)(__force u64)(r))
-+
-+void bitsl(__iomem u32 *reg, u32 mask, u32 val) {
-+	if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+	writel((readl(reg) & ~mask) | (val & mask), reg);
-+}
-+
-+void bitsb(__iomem u8 *reg, u8 mask, u8 val) {
-+	if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+	writeb((readb(reg) & ~mask) | (val & mask), reg);
-+}
-+
-+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) {
-+	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+	u32 x;
-+	if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-+		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+		return -ETIMEDOUT;
-+	}
-+	pci_dbg(ithc->pci, "done waiting\n");
-+	return 0;
-+}
-+
-+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) {
-+	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+	u8 x;
-+	if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-+		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+		return -ETIMEDOUT;
-+	}
-+	pci_dbg(ithc->pci, "done waiting\n");
-+	return 0;
-+}
-+
-+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) {
-+	pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode);
-+	if (mode == 3) mode = 2;
-+	bitsl(&ithc->regs->spi_config,
-+		SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff),
-+		SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed));
-+	return 0;
-+}
-+
-+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) {
-+	pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset);
-+	if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL;
-+	CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-+	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
-+	writeb(command, &ithc->regs->spi_cmd.code);
-+	writew(size, &ithc->regs->spi_cmd.size);
-+	writel(offset, &ithc->regs->spi_cmd.offset);
-+	u32 *p = data, n = (size + 3) / 4;
-+	for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]);
-+	bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND);
-+	CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-+	if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO;
-+	if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE;
-+	for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]);
-+	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
-+	return 0;
-+}
-+
-diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h
-new file mode 100644
-index 0000000000000..1a96092ed7eed
---- /dev/null
-+++ b/drivers/hid/ithc/ithc-regs.h
-@@ -0,0 +1,186 @@
-+#define CONTROL_QUIESCE                     BIT(1)
-+#define CONTROL_IS_QUIESCED                 BIT(2)
-+#define CONTROL_NRESET                      BIT(3)
-+#define CONTROL_READY                       BIT(29)
-+
-+#define SPI_CONFIG_MODE(x)                  (((x) & 3) << 2)
-+#define SPI_CONFIG_SPEED(x)                 (((x) & 7) << 4)
-+#define SPI_CONFIG_UNKNOWN_18(x)            (((x) & 3) << 18)
-+#define SPI_CONFIG_SPEED2(x)                (((x) & 0xf) << 20) // high bit = high speed mode?
-+
-+#define ERROR_CONTROL_UNKNOWN_0             BIT(0)
-+#define ERROR_CONTROL_DISABLE_DMA           BIT(1) // clears DMA_RX_CONTROL_ENABLE when a DMA error occurs
-+#define ERROR_CONTROL_UNKNOWN_2             BIT(2)
-+#define ERROR_CONTROL_UNKNOWN_3             BIT(3)
-+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_9     BIT(9)
-+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_10    BIT(10)
-+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_12    BIT(12)
-+#define ERROR_CONTROL_IRQ_DMA_UNKNOWN_13    BIT(13)
-+#define ERROR_CONTROL_UNKNOWN_16(x)         (((x) & 0xff) << 16) // spi error code irq?
-+#define ERROR_CONTROL_SET_DMA_STATUS        BIT(29) // sets DMA_RX_STATUS_ERROR when a DMA error occurs
-+
-+#define ERROR_STATUS_DMA                    BIT(28)
-+#define ERROR_STATUS_SPI                    BIT(30)
-+
-+#define ERROR_FLAG_DMA_UNKNOWN_9            BIT(9)
-+#define ERROR_FLAG_DMA_UNKNOWN_10           BIT(10)
-+#define ERROR_FLAG_DMA_UNKNOWN_12           BIT(12) // set when we receive a truncated DMA message
-+#define ERROR_FLAG_DMA_UNKNOWN_13           BIT(13)
-+#define ERROR_FLAG_SPI_BUS_TURNAROUND       BIT(16)
-+#define ERROR_FLAG_SPI_RESPONSE_TIMEOUT     BIT(17)
-+#define ERROR_FLAG_SPI_INTRA_PACKET_TIMEOUT BIT(18)
-+#define ERROR_FLAG_SPI_INVALID_RESPONSE     BIT(19)
-+#define ERROR_FLAG_SPI_HS_RX_TIMEOUT        BIT(20)
-+#define ERROR_FLAG_SPI_TOUCH_IC_INIT        BIT(21)
-+
-+#define SPI_CMD_CONTROL_SEND                BIT(0) // cleared by device when sending is complete
-+#define SPI_CMD_CONTROL_IRQ                 BIT(1)
-+
-+#define SPI_CMD_CODE_READ                   4
-+#define SPI_CMD_CODE_WRITE                  6
-+
-+#define SPI_CMD_STATUS_DONE                 BIT(0)
-+#define SPI_CMD_STATUS_ERROR                BIT(1)
-+#define SPI_CMD_STATUS_BUSY                 BIT(3)
-+
-+#define DMA_TX_CONTROL_SEND                 BIT(0) // cleared by device when sending is complete
-+#define DMA_TX_CONTROL_IRQ                  BIT(3)
-+
-+#define DMA_TX_STATUS_DONE                  BIT(0)
-+#define DMA_TX_STATUS_ERROR                 BIT(1)
-+#define DMA_TX_STATUS_UNKNOWN_2             BIT(2)
-+#define DMA_TX_STATUS_UNKNOWN_3             BIT(3) // busy?
-+
-+#define DMA_RX_CONTROL_ENABLE               BIT(0)
-+#define DMA_RX_CONTROL_IRQ_UNKNOWN_1        BIT(1) // rx1 only?
-+#define DMA_RX_CONTROL_IRQ_ERROR            BIT(3) // rx1 only?
-+#define DMA_RX_CONTROL_IRQ_UNKNOWN_4        BIT(4) // rx0 only?
-+#define DMA_RX_CONTROL_IRQ_DATA             BIT(5)
-+
-+#define DMA_RX_CONTROL2_UNKNOWN_5           BIT(5) // rx0 only?
-+#define DMA_RX_CONTROL2_RESET               BIT(7) // resets ringbuffer indices
-+
-+#define DMA_RX_WRAP_FLAG                    BIT(7)
-+
-+#define DMA_RX_STATUS_ERROR                 BIT(3)
-+#define DMA_RX_STATUS_UNKNOWN_4             BIT(4) // set in rx0 after using CONTROL_NRESET when it becomes possible to read config (can take >100ms)
-+#define DMA_RX_STATUS_HAVE_DATA             BIT(5)
-+#define DMA_RX_STATUS_ENABLED               BIT(8)
-+
-+#define COUNTER_RESET                       BIT(31)
-+
-+struct ithc_registers {
-+	/* 0000 */ u32 _unknown_0000[1024];
-+	/* 1000 */ u32 _unknown_1000;
-+	/* 1004 */ u32 _unknown_1004;
-+	/* 1008 */ u32 control_bits;
-+	/* 100c */ u32 _unknown_100c;
-+	/* 1010 */ u32 spi_config;
-+	/* 1014 */ u32 _unknown_1014[3];
-+	/* 1020 */ u32 error_control;
-+	/* 1024 */ u32 error_status; // write to clear
-+	/* 1028 */ u32 error_flags; // write to clear
-+	/* 102c */ u32 _unknown_102c[5];
-+	struct {
-+		/* 1040 */ u8 control;
-+		/* 1041 */ u8 code;
-+		/* 1042 */ u16 size;
-+		/* 1044 */ u32 status; // write to clear
-+		/* 1048 */ u32 offset;
-+		/* 104c */ u32 data[16];
-+		/* 108c */ u32 _unknown_108c;
-+	} spi_cmd;
-+	struct {
-+		/* 1090 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq()
-+		/* 1098 */ u8 control;
-+		/* 1099 */ u8 _unknown_1099;
-+		/* 109a */ u8 _unknown_109a;
-+		/* 109b */ u8 num_prds;
-+		/* 109c */ u32 status; // write to clear
-+	} dma_tx;
-+	/* 10a0 */ u32 _unknown_10a0[7];
-+	/* 10bc */ u32 state; // is 0xe0000402 (dev config val 0) after CONTROL_NRESET, 0xe0000461 after first touch, 0xe0000401 after DMA_RX_CODE_RESET
-+	/* 10c0 */ u32 _unknown_10c0[8];
-+	/* 10e0 */ u32 _unknown_10e0_counters[3];
-+	/* 10ec */ u32 _unknown_10ec[5];
-+	struct {
-+		/* 1100/1200 */ u64 addr; // cannot be written with writeq(), must use lo_hi_writeq()
-+		/* 1108/1208 */ u8 num_bufs;
-+		/* 1109/1209 */ u8 num_prds;
-+		/* 110a/120a */ u16 _unknown_110a;
-+		/* 110c/120c */ u8 control;
-+		/* 110d/120d */ u8 head;
-+		/* 110e/120e */ u8 tail;
-+		/* 110f/120f */ u8 control2;
-+		/* 1110/1210 */ u32 status; // write to clear
-+		/* 1114/1214 */ u32 _unknown_1114;
-+		/* 1118/1218 */ u64 _unknown_1118_guc_addr;
-+		/* 1120/1220 */ u32 _unknown_1120_guc;
-+		/* 1124/1224 */ u32 _unknown_1124_guc;
-+		/* 1128/1228 */ u32 unknown_init_bits; // bit 2 = guc related, bit 3 = rx1 related, bit 4 = guc related
-+		/* 112c/122c */ u32 _unknown_112c;
-+		/* 1130/1230 */ u64 _unknown_1130_guc_addr;
-+		/* 1138/1238 */ u32 _unknown_1138_guc;
-+		/* 113c/123c */ u32 _unknown_113c;
-+		/* 1140/1240 */ u32 _unknown_1140_guc;
-+		/* 1144/1244 */ u32 _unknown_1144[23];
-+		/* 11a0/12a0 */ u32 _unknown_11a0_counters[6];
-+		/* 11b8/12b8 */ u32 _unknown_11b8[18];
-+	} dma_rx[2];
-+};
-+static_assert(sizeof(struct ithc_registers) == 0x1300);
-+
-+#define DEVCFG_DMA_RX_SIZE(x)          ((((x) & 0x3fff) + 1) << 6)
-+#define DEVCFG_DMA_TX_SIZE(x)          (((((x) >> 14) & 0x3ff) + 1) << 6)
-+
-+#define DEVCFG_TOUCH_MASK              0x3f
-+#define DEVCFG_TOUCH_ENABLE            BIT(0)
-+#define DEVCFG_TOUCH_UNKNOWN_1         BIT(1)
-+#define DEVCFG_TOUCH_UNKNOWN_2         BIT(2)
-+#define DEVCFG_TOUCH_UNKNOWN_3         BIT(3)
-+#define DEVCFG_TOUCH_UNKNOWN_4         BIT(4)
-+#define DEVCFG_TOUCH_UNKNOWN_5         BIT(5)
-+#define DEVCFG_TOUCH_UNKNOWN_6         BIT(6)
-+
-+#define DEVCFG_DEVICE_ID_TIC           0x43495424 // "$TIC"
-+
-+#define DEVCFG_SPI_MAX_FREQ(x)         (((x) >> 1) & 0xf) // high bit = use high speed mode?
-+#define DEVCFG_SPI_MODE(x)             (((x) >> 6) & 3)
-+#define DEVCFG_SPI_UNKNOWN_8(x)        (((x) >> 8) & 0x3f)
-+#define DEVCFG_SPI_NEEDS_HEARTBEAT     BIT(20)
-+#define DEVCFG_SPI_HEARTBEAT_INTERVAL  (((x) >> 21) & 7)
-+#define DEVCFG_SPI_UNKNOWN_25          BIT(25)
-+#define DEVCFG_SPI_UNKNOWN_26          BIT(26)
-+#define DEVCFG_SPI_UNKNOWN_27          BIT(27)
-+#define DEVCFG_SPI_DELAY               (((x) >> 28) & 7)
-+#define DEVCFG_SPI_USE_EXT_READ_CFG    BIT(31)
-+
-+struct ithc_device_config {
-+	u32 _unknown_00;      // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET)
-+	u32 _unknown_04;      // 04 = 0x00000000
-+	u32 dma_buf_sizes;    // 08 = 0x000a00ff
-+	u32 touch_cfg;        // 0c = 0x0000001c
-+	u32 _unknown_10;      // 10 = 0x0000001c
-+	u32 device_id;        // 14 = 0x43495424 = "$TIC"
-+	u32 spi_config;       // 18 = 0xfda00a2e
-+	u16 vendor_id;        // 1c = 0x045e = Microsoft Corp.
-+	u16 product_id;       // 1e = 0x0c1a
-+	u32 revision;         // 20 = 0x00000001
-+	u32 fw_version;       // 24 = 0x05008a8b = 5.0.138.139
-+	u32 _unknown_28;      // 28 = 0x00000000
-+	u32 fw_mode;          // 2c = 0x00000000
-+	u32 _unknown_30;      // 30 = 0x00000000
-+	u32 _unknown_34;      // 34 = 0x0404035e (u8,u8,u8,u8 = version?)
-+	u32 _unknown_38;      // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET)
-+	u32 _unknown_3c;      // 3c = 0x00000002
-+};
-+
-+void bitsl(__iomem u32 *reg, u32 mask, u32 val);
-+void bitsb(__iomem u8 *reg, u8 mask, u8 val);
-+#define bitsl_set(reg, x) bitsl(reg, x, x)
-+#define bitsb_set(reg, x) bitsb(reg, x, x)
-+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val);
-+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val);
-+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode);
-+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data);
-+
-diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h
-new file mode 100644
-index 0000000000000..6a9b0d480bc15
---- /dev/null
-+++ b/drivers/hid/ithc/ithc.h
-@@ -0,0 +1,60 @@
-+#include <linux/module.h>
-+#include <linux/input.h>
-+#include <linux/hid.h>
-+#include <linux/dma-mapping.h>
-+#include <linux/highmem.h>
-+#include <linux/pci.h>
-+#include <linux/io-64-nonatomic-lo-hi.h>
-+#include <linux/iopoll.h>
-+#include <linux/delay.h>
-+#include <linux/kthread.h>
-+#include <linux/miscdevice.h>
-+#include <linux/debugfs.h>
-+#include <linux/poll.h>
-+#include <linux/timer.h>
-+#include <linux/pm_qos.h>
-+
-+#define DEVNAME "ithc"
-+#define DEVFULLNAME "Intel Touch Host Controller"
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-+
-+#define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; })
-+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0)
-+
-+#define NUM_RX_BUF 16
-+
-+struct ithc;
-+
-+#include "ithc-regs.h"
-+#include "ithc-dma.h"
-+
-+struct ithc {
-+	char phys[32];
-+	struct pci_dev *pci;
-+	int irq;
-+	struct task_struct *poll_thread;
-+	struct pm_qos_request activity_qos;
-+	struct timer_list activity_timer;
-+
-+	struct hid_device *hid;
-+	bool hid_parse_done;
-+	wait_queue_head_t wait_hid_parse;
-+	wait_queue_head_t wait_hid_get_feature;
-+	struct mutex hid_get_feature_mutex;
-+	void *hid_get_feature_buf;
-+	size_t hid_get_feature_size;
-+
-+	struct ithc_registers __iomem *regs;
-+	struct ithc_registers *prev_regs; // for debugging
-+	struct ithc_device_config config;
-+	struct ithc_dma_rx dma_rx[2];
-+	struct ithc_dma_tx dma_tx;
-+};
-+
-+int ithc_reset(struct ithc *ithc);
-+void ithc_set_active(struct ithc *ithc);
-+int ithc_debug_init(struct ithc *ithc);
-+void ithc_log_regs(struct ithc *ithc);
-+
--- 
-2.42.0
-
-From 9f8d2a0f4012644f56ed8dfd322e575b57e1c208 Mon Sep 17 00:00:00 2001
-From: quo <tuple@list.ru>
-Date: Mon, 23 Oct 2023 10:15:29 +0200
-Subject: [PATCH] Update ITHC from module repo
-
-Changes:
- - Added some comments and fixed a few checkpatch warnings
- - Improved CPU latency QoS handling
- - Retry reading the report descriptor on error / timeout
-
-Based on https://github.com/quo/ithc-linux/commit/0b8b45d9775e756d6bd3a699bfaf9f5bd7b9b10b
-
-Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
-Patchset: ithc
----
- drivers/hid/ithc/ithc-debug.c |  94 +++++---
- drivers/hid/ithc/ithc-dma.c   | 231 +++++++++++++-----
- drivers/hid/ithc/ithc-dma.h   |   4 +-
- drivers/hid/ithc/ithc-main.c  | 430 ++++++++++++++++++++++++----------
- drivers/hid/ithc/ithc-regs.c  |  68 ++++--
- drivers/hid/ithc/ithc-regs.h  |  19 +-
- drivers/hid/ithc/ithc.h       |  13 +-
- 7 files changed, 623 insertions(+), 236 deletions(-)
-
-diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c
-index 57bf125c45bd5..1f1f1e33f2e5a 100644
---- a/drivers/hid/ithc/ithc-debug.c
-+++ b/drivers/hid/ithc/ithc-debug.c
-@@ -1,10 +1,14 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
- 
--void ithc_log_regs(struct ithc *ithc) {
--	if (!ithc->prev_regs) return;
--	u32 __iomem *cur = (__iomem void*)ithc->regs;
--	u32 *prev = (void*)ithc->prev_regs;
--	for (int i = 1024; i < sizeof *ithc->regs / 4; i++) {
-+void ithc_log_regs(struct ithc *ithc)
-+{
-+	if (!ithc->prev_regs)
-+		return;
-+	u32 __iomem *cur = (__iomem void *)ithc->regs;
-+	u32 *prev = (void *)ithc->prev_regs;
-+	for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) {
- 		u32 x = readl(cur + i);
- 		if (x != prev[i]) {
- 			pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x);
-@@ -13,55 +17,79 @@ void ithc_log_regs(struct ithc *ithc) {
- 	}
- }
- 
--static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) {
-+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len,
-+	loff_t *offset)
-+{
-+	// Debug commands consist of a single letter followed by a list of numbers (decimal or
-+	// hexadecimal, space-separated).
- 	struct ithc *ithc = file_inode(f)->i_private;
- 	char cmd[256];
--	if (!ithc || !ithc->pci) return -ENODEV;
--	if (!len) return -EINVAL;
--	if (len >= sizeof cmd) return -EINVAL;
--	if (copy_from_user(cmd, buf, len)) return -EFAULT;
-+	if (!ithc || !ithc->pci)
-+		return -ENODEV;
-+	if (!len)
-+		return -EINVAL;
-+	if (len >= sizeof(cmd))
-+		return -EINVAL;
-+	if (copy_from_user(cmd, buf, len))
-+		return -EFAULT;
- 	cmd[len] = 0;
--	if (cmd[len-1] == '\n') cmd[len-1] = 0;
-+	if (cmd[len-1] == '\n')
-+		cmd[len-1] = 0;
- 	pci_info(ithc->pci, "debug command: %s\n", cmd);
-+
-+	// Parse the list of arguments into a u32 array.
- 	u32 n = 0;
- 	const char *s = cmd + 1;
- 	u32 a[32];
- 	while (*s && *s != '\n') {
--		if (n >= ARRAY_SIZE(a)) return -EINVAL;
--		if (*s++ != ' ') return -EINVAL;
-+		if (n >= ARRAY_SIZE(a))
-+			return -EINVAL;
-+		if (*s++ != ' ')
-+			return -EINVAL;
- 		char *e;
- 		a[n++] = simple_strtoul(s, &e, 0);
--		if (e == s) return -EINVAL;
-+		if (e == s)
-+			return -EINVAL;
- 		s = e;
- 	}
- 	ithc_log_regs(ithc);
--	switch(cmd[0]) {
-+
-+	// Execute the command.
-+	switch (cmd[0]) {
- 	case 'x': // reset
- 		ithc_reset(ithc);
- 		break;
- 	case 'w': // write register: offset mask value
--		if (n != 3 || (a[0] & 3)) return -EINVAL;
--		pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]);
-+		if (n != 3 || (a[0] & 3))
-+			return -EINVAL;
-+		pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n",
-+			a[0], a[2], a[1]);
- 		bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]);
- 		break;
- 	case 'r': // read register: offset
--		if (n != 1 || (a[0] & 3)) return -EINVAL;
--		pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
-+		if (n != 1 || (a[0] & 3))
-+			return -EINVAL;
-+		pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0],
-+			readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
- 		break;
- 	case 's': // spi command: cmd offset len data...
- 		// read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 		// set touch cfg: s 6 12 4 XX
--		if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL;
-+		if (n < 3 || a[2] > (n - 3) * 4)
-+			return -EINVAL;
- 		pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]);
- 		if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3))
--			for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
-+			for (u32 i = 0; i < (a[2] + 3) / 4; i++)
-+				pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
- 		break;
- 	case 'd': // dma command: cmd len data...
- 		// get report descriptor: d 7 8 0 0
- 		// enable multitouch: d 3 2 0x0105
--		if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL;
-+		if (n < 2 || a[1] > (n - 2) * 4)
-+			return -EINVAL;
- 		pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]);
--		if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n");
-+		if (ithc_dma_tx(ithc, a[0], a[1], a + 2))
-+			pci_err(ithc->pci, "dma tx failed\n");
- 		break;
- 	default:
- 		return -EINVAL;
-@@ -75,21 +103,27 @@ static const struct file_operations ithc_debugfops_cmd = {
- 	.write = ithc_debugfs_cmd_write,
- };
- 
--static void ithc_debugfs_devres_release(struct device *dev, void *res) {
-+static void ithc_debugfs_devres_release(struct device *dev, void *res)
-+{
- 	struct dentry **dbgm = res;
--	if (*dbgm) debugfs_remove_recursive(*dbgm);
-+	if (*dbgm)
-+		debugfs_remove_recursive(*dbgm);
- }
- 
--int ithc_debug_init(struct ithc *ithc) {
--	struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL);
--	if (!dbgm) return -ENOMEM;
-+int ithc_debug_init(struct ithc *ithc)
-+{
-+	struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL);
-+	if (!dbgm)
-+		return -ENOMEM;
- 	devres_add(&ithc->pci->dev, dbgm);
- 	struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL);
--	if (IS_ERR(dbg)) return PTR_ERR(dbg);
-+	if (IS_ERR(dbg))
-+		return PTR_ERR(dbg);
- 	*dbgm = dbg;
- 
- 	struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd);
--	if (IS_ERR(cmd)) return PTR_ERR(cmd);
-+	if (IS_ERR(cmd))
-+		return PTR_ERR(cmd);
- 
- 	return 0;
- }
-diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c
-index 7e89b3496918d..ffb8689b8a780 100644
---- a/drivers/hid/ithc/ithc-dma.c
-+++ b/drivers/hid/ithc/ithc-dma.c
-@@ -1,59 +1,91 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
- 
--static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) {
-+// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers.
-+// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags.
-+// This allows each data buffer to consist of multiple non-contiguous blocks of memory.
-+
-+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p,
-+	unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir)
-+{
- 	p->num_pages = num_pages;
- 	p->dir = dir;
-+	// We allocate enough space to have one PRD per data buffer page, however if the data
-+	// buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so
-+	// some will remain unused (which is fine).
- 	p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE);
- 	p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL);
--	if (!p->addr) return -ENOMEM;
--	if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT;
-+	if (!p->addr)
-+		return -ENOMEM;
-+	if (p->dma_addr & (PAGE_SIZE - 1))
-+		return -EFAULT;
- 	return 0;
- }
- 
-+// Devres managed sg_table wrapper.
- struct ithc_sg_table {
- 	void *addr;
- 	struct sg_table sgt;
- 	enum dma_data_direction dir;
- };
--static void ithc_dma_sgtable_free(struct sg_table *sgt) {
-+static void ithc_dma_sgtable_free(struct sg_table *sgt)
-+{
- 	struct scatterlist *sg;
- 	int i;
- 	for_each_sgtable_sg(sgt, sg, i) {
- 		struct page *p = sg_page(sg);
--		if (p) __free_page(p);
-+		if (p)
-+			__free_page(p);
- 	}
- 	sg_free_table(sgt);
- }
--static void ithc_dma_data_devres_release(struct device *dev, void *res) {
-+static void ithc_dma_data_devres_release(struct device *dev, void *res)
-+{
- 	struct ithc_sg_table *sgt = res;
--	if (sgt->addr) vunmap(sgt->addr);
-+	if (sgt->addr)
-+		vunmap(sgt->addr);
- 	dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0);
- 	ithc_dma_sgtable_free(&sgt->sgt);
- }
- 
--static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) {
--	// We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional).
--	// Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now).
-+static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+	struct ithc_dma_data_buffer *b)
-+{
-+	// We don't use dma_alloc_coherent() for data buffers, because they don't have to be
-+	// coherent (they are unidirectional) or contiguous (we can use one PRD per page).
-+	// We could use dma_alloc_noncontiguous(), however this still always allocates a single
-+	// DMA mapped segment, which is more restrictive than what we need.
-+	// Instead we use an sg_table of individually allocated pages.
- 	struct page *pages[16];
--	if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL;
-+	if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages))
-+		return -EINVAL;
- 	b->active_idx = -1;
--	struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL);
--	if (!sgt) return -ENOMEM;
-+	struct ithc_sg_table *sgt = devres_alloc(
-+		ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL);
-+	if (!sgt)
-+		return -ENOMEM;
- 	sgt->dir = prds->dir;
-+
- 	if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) {
- 		struct scatterlist *sg;
- 		int i;
- 		bool ok = true;
- 		for_each_sgtable_sg(&sgt->sgt, sg, i) {
--			struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA
--			if (!p) { ok = false; break; }
-+			// NOTE: don't need __GFP_DMA for PCI DMA
-+			struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
-+			if (!p) {
-+				ok = false;
-+				break;
-+			}
- 			sg_set_page(sg, p, PAGE_SIZE, 0);
- 		}
- 		if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) {
- 			devres_add(&ithc->pci->dev, sgt);
- 			b->sgt = &sgt->sgt;
- 			b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL);
--			if (!b->addr) return -ENOMEM;
-+			if (!b->addr)
-+				return -ENOMEM;
- 			return 0;
- 		}
- 		ithc_dma_sgtable_free(&sgt->sgt);
-@@ -62,17 +94,29 @@ static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *pr
- 	return -ENOMEM;
- }
- 
--static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+	struct ithc_dma_data_buffer *b, unsigned int idx)
-+{
-+	// Give a buffer to the THC.
- 	struct ithc_phys_region_desc *prd = prds->addr;
- 	prd += idx * prds->num_pages;
--	if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; }
-+	if (b->active_idx >= 0) {
-+		pci_err(ithc->pci, "buffer already active\n");
-+		return -EINVAL;
-+	}
- 	b->active_idx = idx;
- 	if (prds->dir == DMA_TO_DEVICE) {
--		if (b->data_size > PAGE_SIZE) return -EINVAL;
-+		// TX buffer: Caller should have already filled the data buffer, so just fill
-+		// the PRD and flush.
-+		// (TODO: Support multi-page TX buffers. So far no device seems to use or need
-+		// these though.)
-+		if (b->data_size > PAGE_SIZE)
-+			return -EINVAL;
- 		prd->addr = sg_dma_address(b->sgt->sgl) >> 10;
- 		prd->size = b->data_size | PRD_FLAG_END;
- 		flush_kernel_vmap_range(b->addr, b->data_size);
- 	} else if (prds->dir == DMA_FROM_DEVICE) {
-+		// RX buffer: Reset PRDs.
- 		struct scatterlist *sg;
- 		int i;
- 		for_each_sgtable_dma_sg(b->sgt, sg, i) {
-@@ -87,21 +131,34 @@ static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffe
- 	return 0;
- }
- 
--static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+	struct ithc_dma_data_buffer *b, unsigned int idx)
-+{
-+	// Take a buffer from the THC.
- 	struct ithc_phys_region_desc *prd = prds->addr;
- 	prd += idx * prds->num_pages;
--	if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; }
-+	// This is purely a sanity check. We don't strictly need the idx parameter for this
-+	// function, because it should always be the same as active_idx, unless we have a bug.
-+	if (b->active_idx != idx) {
-+		pci_err(ithc->pci, "wrong buffer index\n");
-+		return -EINVAL;
-+	}
- 	b->active_idx = -1;
- 	if (prds->dir == DMA_FROM_DEVICE) {
-+		// RX buffer: Calculate actual received data size from PRDs.
- 		dma_rmb(); // for the prds
- 		b->data_size = 0;
- 		struct scatterlist *sg;
- 		int i;
- 		for_each_sgtable_dma_sg(b->sgt, sg, i) {
--			unsigned size = prd->size;
-+			unsigned int size = prd->size;
- 			b->data_size += size & PRD_SIZE_MASK;
--			if (size & PRD_FLAG_END) break;
--			if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; }
-+			if (size & PRD_FLAG_END)
-+				break;
-+			if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) {
-+				pci_err(ithc->pci, "truncated prd\n");
-+				break;
-+			}
- 			prd++;
- 		}
- 		invalidate_kernel_vmap_range(b->addr, b->data_size);
-@@ -110,93 +167,139 @@ static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffe
- 	return 0;
- }
- 
--int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) {
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel)
-+{
- 	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
- 	mutex_init(&rx->mutex);
-+
-+	// Allocate buffers.
- 	u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes);
--	unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
--	pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages);
-+	unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+	pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n",
-+		NUM_RX_BUF, buf_size, num_pages);
- 	CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE);
--	for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+	for (unsigned int i = 0; i < NUM_RX_BUF; i++)
- 		CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]);
-+
-+	// Init registers.
- 	writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2);
- 	lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr);
- 	writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs);
- 	writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds);
- 	u8 head = readb(&ithc->regs->dma_rx[channel].head);
--	if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; }
--	for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+	if (head) {
-+		pci_err(ithc->pci, "head is nonzero (%u)\n", head);
-+		return -EIO;
-+	}
-+
-+	// Init buffers.
-+	for (unsigned int i = 0; i < NUM_RX_BUF; i++)
- 		CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i);
-+
- 	writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail);
- 	return 0;
- }
--void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) {
--	bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
--	CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
-+
-+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel)
-+{
-+	bitsb_set(&ithc->regs->dma_rx[channel].control,
-+		DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
-+	CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status,
-+		DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
- }
- 
--int ithc_dma_tx_init(struct ithc *ithc) {
-+int ithc_dma_tx_init(struct ithc *ithc)
-+{
- 	struct ithc_dma_tx *tx = &ithc->dma_tx;
- 	mutex_init(&tx->mutex);
-+
-+	// Allocate buffers.
- 	tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes);
--	unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
--	pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages);
-+	unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+	pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n",
-+		tx->max_size, num_pages);
- 	CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE);
- 	CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf);
-+
-+	// Init registers.
- 	lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr);
- 	writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds);
-+
-+	// Init buffers.
- 	CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
- 	return 0;
- }
- 
--static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) {
-+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data,
-+	u8 channel, u8 buf)
-+{
- 	if (buf >= NUM_RX_BUF) {
- 		pci_err(ithc->pci, "invalid dma ringbuffer index\n");
- 		return -EINVAL;
- 	}
--	ithc_set_active(ithc);
- 	u32 len = data->data_size;
- 	struct ithc_dma_rx_header *hdr = data->addr;
- 	u8 *hiddata = (void *)(hdr + 1);
--	if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) {
-+	if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) {
-+		// The THC sends a reset request when we need to reinitialize the device.
-+		// This usually only happens if we send an invalid command or put the device
-+		// in a bad state.
- 		CHECK(ithc_reset, ithc);
--	} else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) {
-+	} else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) {
- 		if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
--			// When the CPU enters a low power state during DMA, we can get truncated messages.
--			// Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes.
-+			// When the CPU enters a low power state during DMA, we can get truncated
-+			// messages. For Surface devices, this will typically be a single touch
-+			// report that is only 1 byte, or a multitouch report that is 257 bytes.
- 			// See also ithc_set_active().
- 		} else {
--			pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size);
--			print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+			pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n",
-+				channel, buf, len, hdr->code, hdr->data_size);
-+			print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
-+				hdr, min(len, 0x400u), 0);
- 		}
- 	} else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) {
-+		// Response to a 'get report descriptor' request.
-+		// The actual descriptor is preceded by 8 nul bytes.
- 		CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8);
- 		WRITE_ONCE(ithc->hid_parse_done, true);
- 		wake_up(&ithc->wait_hid_parse);
- 	} else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-+		// Standard HID input report containing touch data.
- 		CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1);
- 	} else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) {
-+		// Response to a 'get feature' request.
- 		bool done = false;
- 		mutex_lock(&ithc->hid_get_feature_mutex);
- 		if (ithc->hid_get_feature_buf) {
--			if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size;
-+			if (hdr->data_size < ithc->hid_get_feature_size)
-+				ithc->hid_get_feature_size = hdr->data_size;
- 			memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size);
- 			ithc->hid_get_feature_buf = NULL;
- 			done = true;
- 		}
- 		mutex_unlock(&ithc->hid_get_feature_mutex);
--		if (done) wake_up(&ithc->wait_hid_get_feature);
--		else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1);
-+		if (done) {
-+			wake_up(&ithc->wait_hid_get_feature);
-+		} else {
-+			// Received data without a matching request, or the request already
-+			// timed out. (XXX What's the correct thing to do here?)
-+			CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT,
-+				hiddata, hdr->data_size, 1);
-+		}
- 	} else {
--		pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code);
--		print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+		pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n",
-+			channel, buf, len, hdr->code);
-+		print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
-+			hdr, min(len, 0x400u), 0);
- 	}
- 	return 0;
- }
- 
--static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
-+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel)
-+{
-+	// Process all filled RX buffers from the ringbuffer.
- 	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
--	unsigned n = rx->num_received;
-+	unsigned int n = rx->num_received;
- 	u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head);
- 	while (1) {
- 		u8 tail = n % NUM_RX_BUF;
-@@ -204,7 +307,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
- 		writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail);
- 		// ringbuffer is full if tail_wrap == head_wrap
- 		// ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG
--		if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0;
-+		if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG))
-+			return 0;
- 
- 		// take the buffer that the device just filled
- 		struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF];
-@@ -218,7 +322,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
- 		CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail);
- 	}
- }
--int ithc_dma_rx(struct ithc *ithc, u8 channel) {
-+int ithc_dma_rx(struct ithc *ithc, u8 channel)
-+{
- 	struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
- 	mutex_lock(&rx->mutex);
- 	int ret = ithc_dma_rx_unlocked(ithc, channel);
-@@ -226,14 +331,21 @@ int ithc_dma_rx(struct ithc *ithc, u8 channel) {
- 	return ret;
- }
- 
--static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
-+{
-+	ithc_set_active(ithc, 100 * USEC_PER_MSEC);
-+
-+	// Send a single TX buffer to the THC.
- 	pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize);
- 	struct ithc_dma_tx_header *hdr;
-+	// Data must be padded to next 4-byte boundary.
- 	u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0;
--	unsigned fullsize = sizeof *hdr + datasize + padding;
--	if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL;
-+	unsigned int fullsize = sizeof(*hdr) + datasize + padding;
-+	if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE)
-+		return -EINVAL;
- 	CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
- 
-+	// Fill the TX buffer with header and data.
- 	ithc->dma_tx.buf.data_size = fullsize;
- 	hdr = ithc->dma_tx.buf.addr;
- 	hdr->code = cmdcode;
-@@ -241,15 +353,18 @@ static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, vo
- 	u8 *dest = (void *)(hdr + 1);
- 	memcpy(dest, data, datasize);
- 	dest += datasize;
--	for (u8 p = 0; p < padding; p++) *dest++ = 0;
-+	for (u8 p = 0; p < padding; p++)
-+		*dest++ = 0;
- 	CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
- 
-+	// Let the THC process the buffer.
- 	bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND);
- 	CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0);
- 	writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status);
- 	return 0;
- }
--int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
-+{
- 	mutex_lock(&ithc->dma_tx.mutex);
- 	int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data);
- 	mutex_unlock(&ithc->dma_tx.mutex);
-diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h
-index d9f2c19a13f3a..93652e4476bf8 100644
---- a/drivers/hid/ithc/ithc-dma.h
-+++ b/drivers/hid/ithc/ithc-dma.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #define PRD_SIZE_MASK            0xffffff
- #define PRD_FLAG_END             0x1000000
- #define PRD_FLAG_SUCCESS         0x2000000
-@@ -59,7 +61,7 @@ struct ithc_dma_rx {
- 	struct ithc_dma_data_buffer bufs[NUM_RX_BUF];
- };
- 
--int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname);
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel);
- void ithc_dma_rx_enable(struct ithc *ithc, u8 channel);
- int ithc_dma_tx_init(struct ithc *ithc);
- int ithc_dma_rx(struct ithc *ithc, u8 channel);
-diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c
-index 09512b9cb4d31..87ed4aa70fda0 100644
---- a/drivers/hid/ithc/ithc-main.c
-+++ b/drivers/hid/ithc/ithc-main.c
-@@ -1,3 +1,5 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
- 
- MODULE_DESCRIPTION("Intel Touch Host Controller driver");
-@@ -42,6 +44,9 @@ static const struct pci_device_id ithc_pci_tbl[] = {
- 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) },
- 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) },
- 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) },
-+	// XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN,
-+	// so instead of the device list we could just do:
-+	// { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, },
- 	{}
- };
- MODULE_DEVICE_TABLE(pci, ithc_pci_tbl);
-@@ -52,6 +57,7 @@ static bool ithc_use_polling = false;
- module_param_named(poll, ithc_use_polling, bool, 0);
- MODULE_PARM_DESC(poll, "Use polling instead of interrupts");
- 
-+// Since all known devices seem to use only channel 1, by default we disable channel 0.
- static bool ithc_use_rx0 = false;
- module_param_named(rx0, ithc_use_rx0, bool, 0);
- MODULE_PARM_DESC(rx0, "Use DMA RX channel 0");
-@@ -60,37 +66,56 @@ static bool ithc_use_rx1 = true;
- module_param_named(rx1, ithc_use_rx1, bool, 0);
- MODULE_PARM_DESC(rx1, "Use DMA RX channel 1");
- 
-+// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering.
-+static int ithc_dma_latency_us = 200;
-+module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0);
-+MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS");
-+
-+// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering.
-+static unsigned int ithc_dma_early_us = 2000;
-+module_param_named(dma_early_us, ithc_dma_early_us, uint, 0);
-+MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)");
-+
- static bool ithc_log_regs_enabled = false;
- module_param_named(logregs, ithc_log_regs_enabled, bool, 0);
- MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)");
- 
- // Sysfs attributes
- 
--static bool ithc_is_config_valid(struct ithc *ithc) {
-+static bool ithc_is_config_valid(struct ithc *ithc)
-+{
- 	return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC;
- }
- 
--static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- 	struct ithc *ithc = dev_get_drvdata(dev);
--	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	if (!ithc || !ithc_is_config_valid(ithc))
-+		return -ENODEV;
- 	return sprintf(buf, "0x%04x", ithc->config.vendor_id);
- }
- static DEVICE_ATTR_RO(vendor);
--static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- 	struct ithc *ithc = dev_get_drvdata(dev);
--	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	if (!ithc || !ithc_is_config_valid(ithc))
-+		return -ENODEV;
- 	return sprintf(buf, "0x%04x", ithc->config.product_id);
- }
- static DEVICE_ATTR_RO(product);
--static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- 	struct ithc *ithc = dev_get_drvdata(dev);
--	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	if (!ithc || !ithc_is_config_valid(ithc))
-+		return -ENODEV;
- 	return sprintf(buf, "%u", ithc->config.revision);
- }
- static DEVICE_ATTR_RO(revision);
--static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- 	struct ithc *ithc = dev_get_drvdata(dev);
--	if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+	if (!ithc || !ithc_is_config_valid(ithc))
-+		return -ENODEV;
- 	u32 v = ithc->config.fw_version;
- 	return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff);
- }
-@@ -117,45 +142,75 @@ static void ithc_hid_stop(struct hid_device *hdev) { }
- static int ithc_hid_open(struct hid_device *hdev) { return 0; }
- static void ithc_hid_close(struct hid_device *hdev) { }
- 
--static int ithc_hid_parse(struct hid_device *hdev) {
-+static int ithc_hid_parse(struct hid_device *hdev)
-+{
- 	struct ithc *ithc = hdev->driver_data;
- 	u64 val = 0;
- 	WRITE_ONCE(ithc->hid_parse_done, false);
--	CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val);
--	if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT;
--	return 0;
-+	for (int retries = 0; ; retries++) {
-+		CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val);
-+		if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done),
-+				msecs_to_jiffies(200)))
-+			return 0;
-+		if (retries > 5) {
-+			pci_err(ithc->pci, "failed to read report descriptor\n");
-+			return -ETIMEDOUT;
-+		}
-+		pci_warn(ithc->pci, "failed to read report descriptor, retrying\n");
-+	}
- }
- 
--static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) {
-+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf,
-+	size_t len, unsigned char rtype, int reqtype)
-+{
- 	struct ithc *ithc = hdev->driver_data;
--	if (!buf || !len) return -EINVAL;
-+	if (!buf || !len)
-+		return -EINVAL;
- 	u32 code;
--	if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT;
--	else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE;
--	else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE;
--	else {
--		pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum);
-+	if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) {
-+		code = DMA_TX_CODE_OUTPUT_REPORT;
-+	} else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) {
-+		code = DMA_TX_CODE_SET_FEATURE;
-+	} else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) {
-+		code = DMA_TX_CODE_GET_FEATURE;
-+	} else {
-+		pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n",
-+			rtype, reqtype, reportnum);
- 		return -EINVAL;
- 	}
- 	buf[0] = reportnum;
-+
- 	if (reqtype == HID_REQ_GET_REPORT) {
-+		// Prepare for response.
- 		mutex_lock(&ithc->hid_get_feature_mutex);
- 		ithc->hid_get_feature_buf = buf;
- 		ithc->hid_get_feature_size = len;
- 		mutex_unlock(&ithc->hid_get_feature_mutex);
-+
-+		// Transmit 'get feature' request.
- 		int r = CHECK(ithc_dma_tx, ithc, code, 1, buf);
- 		if (!r) {
--			r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
--			if (!r) r = -ETIMEDOUT;
--			else if (r < 0) r = -EINTR;
--			else r = 0;
-+			r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature,
-+				!ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
-+			if (!r)
-+				r = -ETIMEDOUT;
-+			else if (r < 0)
-+				r = -EINTR;
-+			else
-+				r = 0;
- 		}
-+
-+		// If everything went ok, the buffer has been filled with the response data.
-+		// Return the response size.
- 		mutex_lock(&ithc->hid_get_feature_mutex);
- 		ithc->hid_get_feature_buf = NULL;
--		if (!r) r = ithc->hid_get_feature_size;
-+		if (!r)
-+			r = ithc->hid_get_feature_size;
- 		mutex_unlock(&ithc->hid_get_feature_mutex);
- 		return r;
- 	}
-+
-+	// 'Set feature', or 'output report'. These don't have a response.
- 	CHECK_RET(ithc_dma_tx, ithc, code, len, buf);
- 	return 0;
- }
-@@ -169,17 +224,22 @@ static struct hid_ll_driver ithc_ll_driver = {
- 	.raw_request = ithc_hid_raw_request,
- };
- 
--static void ithc_hid_devres_release(struct device *dev, void *res) {
-+static void ithc_hid_devres_release(struct device *dev, void *res)
-+{
- 	struct hid_device **hidm = res;
--	if (*hidm) hid_destroy_device(*hidm);
-+	if (*hidm)
-+		hid_destroy_device(*hidm);
- }
- 
--static int ithc_hid_init(struct ithc *ithc) {
--	struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL);
--	if (!hidm) return -ENOMEM;
-+static int ithc_hid_init(struct ithc *ithc)
-+{
-+	struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL);
-+	if (!hidm)
-+		return -ENOMEM;
- 	devres_add(&ithc->pci->dev, hidm);
- 	struct hid_device *hid = hid_allocate_device();
--	if (IS_ERR(hid)) return PTR_ERR(hid);
-+	if (IS_ERR(hid))
-+		return PTR_ERR(hid);
- 	*hidm = hid;
- 
- 	strscpy(hid->name, DEVFULLNAME, sizeof(hid->name));
-@@ -198,27 +258,45 @@ static int ithc_hid_init(struct ithc *ithc) {
- 
- // Interrupts/polling
- 
--static void ithc_activity_timer_callback(struct timer_list *t) {
--	struct ithc *ithc = container_of(t, struct ithc, activity_timer);
--	cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t)
-+{
-+	struct ithc *ithc = container_of(t, struct ithc, activity_start_timer);
-+	ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC);
-+	return HRTIMER_NORESTART;
- }
- 
--void ithc_set_active(struct ithc *ithc) {
--	// When CPU usage is very low, the CPU can enter various low power states (C2-C10).
--	// This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens.
--	// The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor).
--	// To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions.
--	cpu_latency_qos_update_request(&ithc->activity_qos, 0);
--	mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000));
--}
--
--static int ithc_set_device_enabled(struct ithc *ithc, bool enable) {
--	u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2
--		| (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
--	return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x);
-+static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t)
-+{
-+	struct ithc *ithc = container_of(t, struct ithc, activity_end_timer);
-+	cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+	return HRTIMER_NORESTART;
- }
- 
--static void ithc_disable_interrupts(struct ithc *ithc) {
-+void ithc_set_active(struct ithc *ithc, unsigned int duration_us)
-+{
-+	if (ithc_dma_latency_us < 0)
-+		return;
-+	// When CPU usage is very low, the CPU can enter various low power states (C2-C10).
-+	// This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be
-+	// set when this happens. The amount of truncated messages can become very high, resulting
-+	// in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency
-+	// QoS request to prevent the CPU from entering low power states during touch interactions.
-+	cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us);
-+	hrtimer_start_range_ns(&ithc->activity_end_timer,
-+		ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL);
-+}
-+
-+static int ithc_set_device_enabled(struct ithc *ithc, bool enable)
-+{
-+	u32 x = ithc->config.touch_cfg =
-+		(ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 |
-+		(enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
-+	return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE,
-+		offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x);
-+}
-+
-+static void ithc_disable_interrupts(struct ithc *ithc)
-+{
- 	writel(0, &ithc->regs->error_control);
- 	bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0);
- 	bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0);
-@@ -226,43 +304,85 @@ static void ithc_disable_interrupts(struct ithc *ithc) {
- 	bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0);
- }
- 
--static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) {
--	writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status);
-+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel)
-+{
-+	writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA,
-+		&ithc->regs->dma_rx[channel].status);
- }
- 
--static void ithc_clear_interrupts(struct ithc *ithc) {
-+static void ithc_clear_interrupts(struct ithc *ithc)
-+{
- 	writel(0xffffffff, &ithc->regs->error_flags);
- 	writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status);
- 	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
- 	ithc_clear_dma_rx_interrupts(ithc, 0);
- 	ithc_clear_dma_rx_interrupts(ithc, 1);
--	writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status);
-+	writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2,
-+		&ithc->regs->dma_tx.status);
- }
- 
--static void ithc_process(struct ithc *ithc) {
-+static void ithc_process(struct ithc *ithc)
-+{
- 	ithc_log_regs(ithc);
- 
--	// read and clear error bits
-+	bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
-+	bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
-+
-+	// Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer
-+	ktime_t t = ktime_get();
-+	ktime_t dt = ktime_sub(t, ithc->last_rx_time);
-+	if (rx0 || rx1) {
-+		ithc->last_rx_time = t;
-+		if (dt > ms_to_ktime(100)) {
-+			ithc->cur_rx_seq_count = 0;
-+			ithc->cur_rx_seq_errors = 0;
-+		}
-+		ithc->cur_rx_seq_count++;
-+		if (!ithc_use_polling && ithc_dma_latency_us >= 0) {
-+			// Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below)
-+			cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+			hrtimer_try_to_cancel(&ithc->activity_end_timer);
-+		}
-+	}
-+
-+	// Read and clear error bits
- 	u32 err = readl(&ithc->regs->error_flags);
- 	if (err) {
--		if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err);
- 		writel(err, &ithc->regs->error_flags);
-+		if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT)
-+			pci_err(ithc->pci, "error flags: 0x%08x\n", err);
-+		if (err & ERROR_FLAG_DMA_RX_TIMEOUT) {
-+			// Only log an error if we see a significant number of these errors.
-+			ithc->cur_rx_seq_errors++;
-+			if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10)
-+				pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n",
-+					ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt));
-+		}
- 	}
- 
--	// process DMA rx
-+	// Process DMA rx
- 	if (ithc_use_rx0) {
- 		ithc_clear_dma_rx_interrupts(ithc, 0);
--		ithc_dma_rx(ithc, 0);
-+		if (rx0)
-+			ithc_dma_rx(ithc, 0);
- 	}
- 	if (ithc_use_rx1) {
- 		ithc_clear_dma_rx_interrupts(ithc, 1);
--		ithc_dma_rx(ithc, 1);
-+		if (rx1)
-+			ithc_dma_rx(ithc, 1);
-+	}
-+
-+	// Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT
-+	if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) {
-+		ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us));
-+		hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS);
- 	}
- 
- 	ithc_log_regs(ithc);
- }
- 
--static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
-+static irqreturn_t ithc_interrupt_thread(int irq, void *arg)
-+{
- 	struct ithc *ithc = arg;
- 	pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n",
- 		readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags),
-@@ -274,14 +394,21 @@ static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
- 	return IRQ_HANDLED;
- }
- 
--static int ithc_poll_thread(void *arg) {
-+static int ithc_poll_thread(void *arg)
-+{
- 	struct ithc *ithc = arg;
--	unsigned sleep = 100;
-+	unsigned int sleep = 100;
- 	while (!kthread_should_stop()) {
- 		u32 n = ithc->dma_rx[1].num_received;
- 		ithc_process(ithc);
--		if (n != ithc->dma_rx[1].num_received) sleep = 20;
--		else sleep = min(200u, sleep + (sleep >> 4) + 1);
-+		// Decrease polling interval to 20ms if we received data, otherwise slowly
-+		// increase it up to 200ms.
-+		if (n != ithc->dma_rx[1].num_received) {
-+			ithc_set_active(ithc, 100 * USEC_PER_MSEC);
-+			sleep = 20;
-+		} else {
-+			sleep = min(200u, sleep + (sleep >> 4) + 1);
-+		}
- 		msleep_interruptible(sleep);
- 	}
- 	return 0;
-@@ -289,7 +416,8 @@ static int ithc_poll_thread(void *arg) {
- 
- // Device initialization and shutdown
- 
--static void ithc_disable(struct ithc *ithc) {
-+static void ithc_disable(struct ithc *ithc)
-+{
- 	bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE);
- 	CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED);
- 	bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-@@ -301,81 +429,112 @@ static void ithc_disable(struct ithc *ithc) {
- 	ithc_clear_interrupts(ithc);
- }
- 
--static int ithc_init_device(struct ithc *ithc) {
-+static int ithc_init_device(struct ithc *ithc)
-+{
- 	ithc_log_regs(ithc);
- 	bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0;
- 	ithc_disable(ithc);
- 	CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY);
-+
-+	// Since we don't yet know which SPI config the device wants, use default speed and mode
-+	// initially for reading config data.
- 	ithc_set_spi_config(ithc, 10, 0);
--	bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config
- 
--	if (was_enabled) if (msleep_interruptible(100)) return -EINTR;
-+	// Setting the following bit seems to make reading the config more reliable.
-+	bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000);
-+
-+	// If the device was previously enabled, wait a bit to make sure it's fully shut down.
-+	if (was_enabled)
-+		if (msleep_interruptible(100))
-+			return -EINTR;
-+
-+	// Take the touch device out of reset.
- 	bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0);
- 	CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0);
- 	for (int retries = 0; ; retries++) {
- 		ithc_log_regs(ithc);
- 		bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET);
--		if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break;
-+		if (!waitl(ithc, &ithc->regs->state, 0xf, 2))
-+			break;
- 		if (retries > 5) {
--			pci_err(ithc->pci, "too many retries, failed to reset device\n");
-+			pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state));
- 			return -ETIMEDOUT;
- 		}
--		pci_err(ithc->pci, "invalid state, retrying reset\n");
-+		pci_warn(ithc->pci, "invalid state, retrying reset\n");
- 		bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
--		if (msleep_interruptible(1000)) return -EINTR;
-+		if (msleep_interruptible(1000))
-+			return -EINTR;
- 	}
- 	ithc_log_regs(ithc);
- 
-+	// Waiting for the following status bit makes reading config much more reliable,
-+	// however the official driver does not seem to do this...
- 	CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4);
- 
--	// read config
-+	// Read configuration data.
- 	for (int retries = 0; ; retries++) {
- 		ithc_log_regs(ithc);
--		memset(&ithc->config, 0, sizeof ithc->config);
--		CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config);
-+		memset(&ithc->config, 0, sizeof(ithc->config));
-+		CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config);
- 		u32 *p = (void *)&ithc->config;
- 		pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- 			p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
--		if (ithc_is_config_valid(ithc)) break;
-+		if (ithc_is_config_valid(ithc))
-+			break;
- 		if (retries > 10) {
--			pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id);
-+			pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n",
-+				ithc->config.device_id);
- 			return -EIO;
- 		}
--		pci_err(ithc->pci, "failed to read config, retrying\n");
--		if (msleep_interruptible(100)) return -EINTR;
-+		pci_warn(ithc->pci, "failed to read config, retrying\n");
-+		if (msleep_interruptible(100))
-+			return -EINTR;
- 	}
- 	ithc_log_regs(ithc);
- 
--	CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config));
-+	// Apply SPI config and enable touch device.
-+	CHECK_RET(ithc_set_spi_config, ithc,
-+		DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config),
-+		DEVCFG_SPI_MODE(ithc->config.spi_config));
- 	CHECK_RET(ithc_set_device_enabled, ithc, true);
- 	ithc_log_regs(ithc);
- 	return 0;
- }
- 
--int ithc_reset(struct ithc *ithc) {
--	// FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA
--	// processing, that would have to be done asynchronously (schedule_work()?). And with extra locking?
-+int ithc_reset(struct ithc *ithc)
-+{
-+	// FIXME This should probably do devres_release_group()+ithc_start().
-+	// But because this is called during DMA processing, that would have to be done
-+	// asynchronously (schedule_work()?). And with extra locking?
- 	pci_err(ithc->pci, "reset\n");
- 	CHECK(ithc_init_device, ithc);
--	if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
--	if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+	if (ithc_use_rx0)
-+		ithc_dma_rx_enable(ithc, 0);
-+	if (ithc_use_rx1)
-+		ithc_dma_rx_enable(ithc, 1);
- 	ithc_log_regs(ithc);
- 	pci_dbg(ithc->pci, "reset completed\n");
- 	return 0;
- }
- 
--static void ithc_stop(void *res) {
-+static void ithc_stop(void *res)
-+{
- 	struct ithc *ithc = res;
- 	pci_dbg(ithc->pci, "stopping\n");
- 	ithc_log_regs(ithc);
--	if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread);
--	if (ithc->irq >= 0) disable_irq(ithc->irq);
-+
-+	if (ithc->poll_thread)
-+		CHECK(kthread_stop, ithc->poll_thread);
-+	if (ithc->irq >= 0)
-+		disable_irq(ithc->irq);
- 	CHECK(ithc_set_device_enabled, ithc, false);
- 	ithc_disable(ithc);
--	del_timer_sync(&ithc->activity_timer);
-+	hrtimer_cancel(&ithc->activity_start_timer);
-+	hrtimer_cancel(&ithc->activity_end_timer);
- 	cpu_latency_qos_remove_request(&ithc->activity_qos);
--	// clear dma config
--	for(unsigned i = 0; i < 2; i++) {
-+
-+	// Clear DMA config.
-+	for (unsigned int i = 0; i < 2; i++) {
- 		CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0);
- 		lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr);
- 		writeb(0, &ithc->regs->dma_rx[i].num_bufs);
-@@ -383,35 +542,43 @@ static void ithc_stop(void *res) {
- 	}
- 	lo_hi_writeq(0, &ithc->regs->dma_tx.addr);
- 	writeb(0, &ithc->regs->dma_tx.num_prds);
-+
- 	ithc_log_regs(ithc);
- 	pci_dbg(ithc->pci, "stopped\n");
- }
- 
--static void ithc_clear_drvdata(void *res) {
-+static void ithc_clear_drvdata(void *res)
-+{
- 	struct pci_dev *pci = res;
- 	pci_set_drvdata(pci, NULL);
- }
- 
--static int ithc_start(struct pci_dev *pci) {
-+static int ithc_start(struct pci_dev *pci)
-+{
- 	pci_dbg(pci, "starting\n");
- 	if (pci_get_drvdata(pci)) {
- 		pci_err(pci, "device already initialized\n");
- 		return -EINVAL;
- 	}
--	if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM;
-+	if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL))
-+		return -ENOMEM;
- 
--	struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL);
--	if (!ithc) return -ENOMEM;
-+	// Allocate/init main driver struct.
-+	struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL);
-+	if (!ithc)
-+		return -ENOMEM;
- 	ithc->irq = -1;
- 	ithc->pci = pci;
--	snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci));
-+	snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci));
- 	init_waitqueue_head(&ithc->wait_hid_parse);
- 	init_waitqueue_head(&ithc->wait_hid_get_feature);
- 	mutex_init(&ithc->hid_get_feature_mutex);
- 	pci_set_drvdata(pci, ithc);
- 	CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci);
--	if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL);
-+	if (ithc_log_regs_enabled)
-+		ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL);
- 
-+	// PCI initialization.
- 	CHECK_RET(pcim_enable_device, pci);
- 	pci_set_master(pci);
- 	CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs");
-@@ -419,29 +586,39 @@ static int ithc_start(struct pci_dev *pci) {
- 	CHECK_RET(pci_set_power_state, pci, PCI_D0);
- 	ithc->regs = pcim_iomap_table(pci)[0];
- 
-+	// Allocate IRQ.
- 	if (!ithc_use_polling) {
- 		CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
- 		ithc->irq = CHECK(pci_irq_vector, pci, 0);
--		if (ithc->irq < 0) return ithc->irq;
-+		if (ithc->irq < 0)
-+			return ithc->irq;
- 	}
- 
-+	// Initialize THC and touch device.
- 	CHECK_RET(ithc_init_device, ithc);
- 	CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups);
--	if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME);
--	if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME);
-+	if (ithc_use_rx0)
-+		CHECK_RET(ithc_dma_rx_init, ithc, 0);
-+	if (ithc_use_rx1)
-+		CHECK_RET(ithc_dma_rx_init, ithc, 1);
- 	CHECK_RET(ithc_dma_tx_init, ithc);
- 
--	CHECK_RET(ithc_hid_init, ithc);
--
- 	cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
--	timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0);
-+	hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-+	ithc->activity_start_timer.function = ithc_activity_start_timer_callback;
-+	hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+	ithc->activity_end_timer.function = ithc_activity_end_timer_callback;
- 
--	// add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed
-+	// Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are
-+	// disabled BEFORE the buffers are freed.
- 	CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc);
- 
-+	CHECK_RET(ithc_hid_init, ithc);
-+
-+	// Start polling/IRQ.
- 	if (ithc_use_polling) {
- 		pci_info(pci, "using polling instead of irq\n");
--		// use a thread instead of simple timer because we want to be able to sleep
-+		// Use a thread instead of simple timer because we want to be able to sleep.
- 		ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll");
- 		if (IS_ERR(ithc->poll_thread)) {
- 			int err = PTR_ERR(ithc->poll_thread);
-@@ -449,13 +626,17 @@ static int ithc_start(struct pci_dev *pci) {
- 			return err;
- 		}
- 	} else {
--		CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
-+		CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL,
-+			ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
- 	}
- 
--	if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
--	if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+	if (ithc_use_rx0)
-+		ithc_dma_rx_enable(ithc, 0);
-+	if (ithc_use_rx1)
-+		ithc_dma_rx_enable(ithc, 1);
- 
--	// hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA
-+	// hid_add_device() can only be called after irq/polling is started and DMA is enabled,
-+	// because it calls ithc_hid_parse() which reads the report descriptor via DMA.
- 	CHECK_RET(hid_add_device, ithc->hid);
- 
- 	CHECK(ithc_debug_init, ithc);
-@@ -464,43 +645,54 @@ static int ithc_start(struct pci_dev *pci) {
- 	return 0;
- }
- 
--static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) {
-+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id)
-+{
- 	pci_dbg(pci, "device probe\n");
- 	return ithc_start(pci);
- }
- 
--static void ithc_remove(struct pci_dev *pci) {
-+static void ithc_remove(struct pci_dev *pci)
-+{
- 	pci_dbg(pci, "device remove\n");
- 	// all cleanup is handled by devres
- }
- 
--static int ithc_suspend(struct device *dev) {
-+// For suspend/resume, we just deinitialize and reinitialize everything.
-+// TODO It might be cleaner to keep the HID device around, however we would then have to signal
-+// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set
-+// feature' requests. Hidraw does not seem to have a facility to do that.
-+static int ithc_suspend(struct device *dev)
-+{
- 	struct pci_dev *pci = to_pci_dev(dev);
- 	pci_dbg(pci, "pm suspend\n");
- 	devres_release_group(dev, ithc_start);
- 	return 0;
- }
- 
--static int ithc_resume(struct device *dev) {
-+static int ithc_resume(struct device *dev)
-+{
- 	struct pci_dev *pci = to_pci_dev(dev);
- 	pci_dbg(pci, "pm resume\n");
- 	return ithc_start(pci);
- }
- 
--static int ithc_freeze(struct device *dev) {
-+static int ithc_freeze(struct device *dev)
-+{
- 	struct pci_dev *pci = to_pci_dev(dev);
- 	pci_dbg(pci, "pm freeze\n");
- 	devres_release_group(dev, ithc_start);
- 	return 0;
- }
- 
--static int ithc_thaw(struct device *dev) {
-+static int ithc_thaw(struct device *dev)
-+{
- 	struct pci_dev *pci = to_pci_dev(dev);
- 	pci_dbg(pci, "pm thaw\n");
- 	return ithc_start(pci);
- }
- 
--static int ithc_restore(struct device *dev) {
-+static int ithc_restore(struct device *dev)
-+{
- 	struct pci_dev *pci = to_pci_dev(dev);
- 	pci_dbg(pci, "pm restore\n");
- 	return ithc_start(pci);
-@@ -521,11 +713,13 @@ static struct pci_driver ithc_driver = {
- 	//.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway
- };
- 
--static int __init ithc_init(void) {
-+static int __init ithc_init(void)
-+{
- 	return pci_register_driver(&ithc_driver);
- }
- 
--static void __exit ithc_exit(void) {
-+static void __exit ithc_exit(void)
-+{
- 	pci_unregister_driver(&ithc_driver);
- }
- 
-diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c
-index 85d567b05761f..e058721886e37 100644
---- a/drivers/hid/ithc/ithc-regs.c
-+++ b/drivers/hid/ithc/ithc-regs.c
-@@ -1,63 +1,95 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
- 
- #define reg_num(r) (0x1fff & (u16)(__force u64)(r))
- 
--void bitsl(__iomem u32 *reg, u32 mask, u32 val) {
--	if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+void bitsl(__iomem u32 *reg, u32 mask, u32 val)
-+{
-+	if (val & ~mask)
-+		pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
-+			reg_num(reg), val, mask);
- 	writel((readl(reg) & ~mask) | (val & mask), reg);
- }
- 
--void bitsb(__iomem u8 *reg, u8 mask, u8 val) {
--	if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+void bitsb(__iomem u8 *reg, u8 mask, u8 val)
-+{
-+	if (val & ~mask)
-+		pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
-+			reg_num(reg), val, mask);
- 	writeb((readb(reg) & ~mask) | (val & mask), reg);
- }
- 
--int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) {
--	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val)
-+{
-+	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
-+		reg_num(reg), mask, val);
- 	u32 x;
- 	if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
--		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
-+			reg_num(reg), mask, val);
- 		return -ETIMEDOUT;
- 	}
- 	pci_dbg(ithc->pci, "done waiting\n");
- 	return 0;
- }
- 
--int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) {
--	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val)
-+{
-+	pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
-+		reg_num(reg), mask, val);
- 	u8 x;
- 	if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
--		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+		pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
-+			reg_num(reg), mask, val);
- 		return -ETIMEDOUT;
- 	}
- 	pci_dbg(ithc->pci, "done waiting\n");
- 	return 0;
- }
- 
--int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) {
-+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode)
-+{
- 	pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode);
--	if (mode == 3) mode = 2;
-+	if (mode == 3)
-+		mode = 2;
- 	bitsl(&ithc->regs->spi_config,
- 		SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff),
- 		SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed));
- 	return 0;
- }
- 
--int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) {
-+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data)
-+{
- 	pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset);
--	if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL;
-+	if (size > sizeof(ithc->regs->spi_cmd.data))
-+		return -EINVAL;
-+
-+	// Wait if the device is still busy.
- 	CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-+	// Clear result flags.
- 	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
-+
-+	// Init SPI command data.
- 	writeb(command, &ithc->regs->spi_cmd.code);
- 	writew(size, &ithc->regs->spi_cmd.size);
- 	writel(offset, &ithc->regs->spi_cmd.offset);
- 	u32 *p = data, n = (size + 3) / 4;
--	for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]);
-+	for (u32 i = 0; i < n; i++)
-+		writel(p[i], &ithc->regs->spi_cmd.data[i]);
-+
-+	// Start transmission.
- 	bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND);
- 	CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
--	if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO;
--	if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE;
--	for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]);
-+
-+	// Read response.
-+	if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE)
-+		return -EIO;
-+	if (readw(&ithc->regs->spi_cmd.size) != size)
-+		return -EMSGSIZE;
-+	for (u32 i = 0; i < n; i++)
-+		p[i] = readl(&ithc->regs->spi_cmd.data[i]);
-+
- 	writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
- 	return 0;
- }
-diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h
-index 1a96092ed7eed..d4007d9e2bacc 100644
---- a/drivers/hid/ithc/ithc-regs.h
-+++ b/drivers/hid/ithc/ithc-regs.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #define CONTROL_QUIESCE                     BIT(1)
- #define CONTROL_IS_QUIESCED                 BIT(2)
- #define CONTROL_NRESET                      BIT(3)
-@@ -24,7 +26,7 @@
- 
- #define ERROR_FLAG_DMA_UNKNOWN_9            BIT(9)
- #define ERROR_FLAG_DMA_UNKNOWN_10           BIT(10)
--#define ERROR_FLAG_DMA_UNKNOWN_12           BIT(12) // set when we receive a truncated DMA message
-+#define ERROR_FLAG_DMA_RX_TIMEOUT           BIT(12) // set when we receive a truncated DMA message
- #define ERROR_FLAG_DMA_UNKNOWN_13           BIT(13)
- #define ERROR_FLAG_SPI_BUS_TURNAROUND       BIT(16)
- #define ERROR_FLAG_SPI_RESPONSE_TIMEOUT     BIT(17)
-@@ -67,6 +69,7 @@
- #define DMA_RX_STATUS_HAVE_DATA             BIT(5)
- #define DMA_RX_STATUS_ENABLED               BIT(8)
- 
-+// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC.
- #define COUNTER_RESET                       BIT(31)
- 
- struct ithc_registers {
-@@ -147,15 +150,15 @@ static_assert(sizeof(struct ithc_registers) == 0x1300);
- #define DEVCFG_SPI_MAX_FREQ(x)         (((x) >> 1) & 0xf) // high bit = use high speed mode?
- #define DEVCFG_SPI_MODE(x)             (((x) >> 6) & 3)
- #define DEVCFG_SPI_UNKNOWN_8(x)        (((x) >> 8) & 0x3f)
--#define DEVCFG_SPI_NEEDS_HEARTBEAT     BIT(20)
--#define DEVCFG_SPI_HEARTBEAT_INTERVAL  (((x) >> 21) & 7)
-+#define DEVCFG_SPI_NEEDS_HEARTBEAT     BIT(20) // TODO implement heartbeat
-+#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7)
- #define DEVCFG_SPI_UNKNOWN_25          BIT(25)
- #define DEVCFG_SPI_UNKNOWN_26          BIT(26)
- #define DEVCFG_SPI_UNKNOWN_27          BIT(27)
--#define DEVCFG_SPI_DELAY               (((x) >> 28) & 7)
--#define DEVCFG_SPI_USE_EXT_READ_CFG    BIT(31)
-+#define DEVCFG_SPI_DELAY(x)            (((x) >> 28) & 7) // TODO use this
-+#define DEVCFG_SPI_USE_EXT_READ_CFG    BIT(31) // TODO use this?
- 
--struct ithc_device_config {
-+struct ithc_device_config { // (Example values are from an SP7+.)
- 	u32 _unknown_00;      // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET)
- 	u32 _unknown_04;      // 04 = 0x00000000
- 	u32 dma_buf_sizes;    // 08 = 0x000a00ff
-@@ -166,9 +169,9 @@ struct ithc_device_config {
- 	u16 vendor_id;        // 1c = 0x045e = Microsoft Corp.
- 	u16 product_id;       // 1e = 0x0c1a
- 	u32 revision;         // 20 = 0x00000001
--	u32 fw_version;       // 24 = 0x05008a8b = 5.0.138.139
-+	u32 fw_version;       // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices)
- 	u32 _unknown_28;      // 28 = 0x00000000
--	u32 fw_mode;          // 2c = 0x00000000
-+	u32 fw_mode;          // 2c = 0x00000000 (for fw update?)
- 	u32 _unknown_30;      // 30 = 0x00000000
- 	u32 _unknown_34;      // 34 = 0x0404035e (u8,u8,u8,u8 = version?)
- 	u32 _unknown_38;      // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET)
-diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h
-index 6a9b0d480bc15..028e55a4ec53e 100644
---- a/drivers/hid/ithc/ithc.h
-+++ b/drivers/hid/ithc/ithc.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #include <linux/module.h>
- #include <linux/input.h>
- #include <linux/hid.h>
-@@ -21,7 +23,7 @@
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- 
- #define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; })
--#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0)
-+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0)
- 
- #define NUM_RX_BUF 16
- 
-@@ -35,8 +37,13 @@ struct ithc {
- 	struct pci_dev *pci;
- 	int irq;
- 	struct task_struct *poll_thread;
-+
- 	struct pm_qos_request activity_qos;
--	struct timer_list activity_timer;
-+	struct hrtimer activity_start_timer;
-+	struct hrtimer activity_end_timer;
-+	ktime_t last_rx_time;
-+	unsigned int cur_rx_seq_count;
-+	unsigned int cur_rx_seq_errors;
- 
- 	struct hid_device *hid;
- 	bool hid_parse_done;
-@@ -54,7 +61,7 @@ struct ithc {
- };
- 
- int ithc_reset(struct ithc *ithc);
--void ithc_set_active(struct ithc *ithc);
-+void ithc_set_active(struct ithc *ithc, unsigned int duration_us);
- int ithc_debug_init(struct ithc *ithc);
- void ithc_log_regs(struct ithc *ithc);
- 
--- 
-2.42.0
-
-From c4cbbcd24ea10e6558753174ae6dabcc9b54e438 Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sun, 22 Oct 2023 14:57:11 +0200
-Subject: [PATCH] platform/surface: aggregator_registry: Add support for
- Surface Laptop Go 3
-
-Add SAM client device nodes for the Surface Laptop Go 3. It seems to use
-the same SAM client devices as the Surface Laptop Go 1 and 2, so re-use
-their node group.
-
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-sam
----
- drivers/platform/surface/surface_aggregator_registry.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
-index 0fe5be5396525..0d8c8395c5886 100644
---- a/drivers/platform/surface/surface_aggregator_registry.c
-+++ b/drivers/platform/surface/surface_aggregator_registry.c
-@@ -367,6 +367,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
- 	/* Surface Laptop Go 2 */
- 	{ "MSHW0290", (unsigned long)ssam_node_group_slg1 },
- 
-+	/* Surface Laptop Go 3 */
-+	{ "MSHW0440", (unsigned long)ssam_node_group_slg1 },
-+
- 	/* Surface Laptop Studio */
- 	{ "MSHW0123", (unsigned long)ssam_node_group_sls },
- 
--- 
-2.42.0
-
-From 0bb0adce3efad7a43fc3811f6cc24148c8c75253 Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Mon, 20 Nov 2023 19:47:00 +0100
-Subject: [PATCH] platform/surface: aggregator_registry: Add support for
- Surface Laptop Studio 2
-
-Add SAM client device nodes for the Surface Laptop Studio 2 (SLS2). The
-SLS2 is quite similar to the SLS1, but it does not provide the touchpad
-as a SAM-HID device. Therefore, add a new node group for the SLS2 and
-update the comments accordingly
-
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-sam
----
- .../surface/surface_aggregator_registry.c     | 25 ++++++++++++++++---
- 1 file changed, 21 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
-index 0d8c8395c5886..530db4db71aba 100644
---- a/drivers/platform/surface/surface_aggregator_registry.c
-+++ b/drivers/platform/surface/surface_aggregator_registry.c
-@@ -247,8 +247,8 @@ static const struct software_node *ssam_node_group_sl5[] = {
- 	NULL,
- };
- 
--/* Devices for Surface Laptop Studio. */
--static const struct software_node *ssam_node_group_sls[] = {
-+/* Devices for Surface Laptop Studio 1. */
-+static const struct software_node *ssam_node_group_sls1[] = {
- 	&ssam_node_root,
- 	&ssam_node_bat_ac,
- 	&ssam_node_bat_main,
-@@ -263,6 +263,20 @@ static const struct software_node *ssam_node_group_sls[] = {
- 	NULL,
- };
- 
-+/* Devices for Surface Laptop Studio 2. */
-+static const struct software_node *ssam_node_group_sls2[] = {
-+	&ssam_node_root,
-+	&ssam_node_bat_ac,
-+	&ssam_node_bat_main,
-+	&ssam_node_tmp_pprof,
-+	&ssam_node_pos_tablet_switch,
-+	&ssam_node_hid_sam_keyboard,
-+	&ssam_node_hid_sam_penstash,
-+	&ssam_node_hid_sam_sensors,
-+	&ssam_node_hid_sam_ucm_ucsi,
-+	NULL,
-+};
-+
- /* Devices for Surface Laptop Go. */
- static const struct software_node *ssam_node_group_slg1[] = {
- 	&ssam_node_root,
-@@ -370,8 +384,11 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
- 	/* Surface Laptop Go 3 */
- 	{ "MSHW0440", (unsigned long)ssam_node_group_slg1 },
- 
--	/* Surface Laptop Studio */
--	{ "MSHW0123", (unsigned long)ssam_node_group_sls },
-+	/* Surface Laptop Studio 1 */
-+	{ "MSHW0123", (unsigned long)ssam_node_group_sls1 },
-+
-+	/* Surface Laptop Studio 2 */
-+	{ "MSHW0360", (unsigned long)ssam_node_group_sls2 },
- 
- 	{ },
- };
--- 
-2.42.0
-
-From 3772b511c710c369b737fd0a111fbda63b028f1d Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sat, 25 Jul 2020 17:19:53 +0200
-Subject: [PATCH] i2c: acpi: Implement RawBytes read access
-
-Microsoft Surface Pro 4 and Book 1 devices access the MSHW0030 I2C
-device via a generic serial bus operation region and RawBytes read
-access. On the Surface Book 1, this access is required to turn on (and
-off) the discrete GPU.
-
-Multiple things are to note here:
-
-a) The RawBytes access is device/driver dependent. The ACPI
-   specification states:
-
-   > Raw accesses assume that the writer has knowledge of the bus that
-   > the access is made over and the device that is being accessed. The
-   > protocol may only ensure that the buffer is transmitted to the
-   > appropriate driver, but the driver must be able to interpret the
-   > buffer to communicate to a register.
-
-   Thus this implementation may likely not work on other devices
-   accessing I2C via the RawBytes accessor type.
-
-b) The MSHW0030 I2C device is an HID-over-I2C device which seems to
-   serve multiple functions:
-
-   1. It is the main access point for the legacy-type Surface Aggregator
-      Module (also referred to as SAM-over-HID, as opposed to the newer
-      SAM-over-SSH/UART). It has currently not been determined on how
-      support for the legacy SAM should be implemented. Likely via a
-      custom HID driver.
-
-   2. It seems to serve as the HID device for the Integrated Sensor Hub.
-      This might complicate matters with regards to implementing a
-      SAM-over-HID driver required by legacy SAM.
-
-In light of this, the simplest approach has been chosen for now.
-However, it may make more sense regarding breakage and compatibility to
-either provide functionality for replacing or enhancing the default
-operation region handler via some additional API functions, or even to
-completely blacklist MSHW0030 from the I2C core and provide a custom
-driver for it.
-
-Replacing/enhancing the default operation region handler would, however,
-either require some sort of secondary driver and access point for it,
-from which the new API functions would be called and the new handler
-(part) would be installed, or hard-coding them via some sort of
-quirk-like interface into the I2C core.
-
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-sam-over-hid
----
- drivers/i2c/i2c-core-acpi.c | 35 +++++++++++++++++++++++++++++++++++
- 1 file changed, 35 insertions(+)
-
-diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
-index d6037a3286690..a290ebc77aea2 100644
---- a/drivers/i2c/i2c-core-acpi.c
-+++ b/drivers/i2c/i2c-core-acpi.c
-@@ -628,6 +628,28 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
- 	return (ret == 1) ? 0 : -EIO;
- }
- 
-+static int acpi_gsb_i2c_write_raw_bytes(struct i2c_client *client,
-+		u8 *data, u8 data_len)
-+{
-+	struct i2c_msg msgs[1];
-+	int ret = AE_OK;
-+
-+	msgs[0].addr = client->addr;
-+	msgs[0].flags = client->flags;
-+	msgs[0].len = data_len + 1;
-+	msgs[0].buf = data;
-+
-+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-+
-+	if (ret < 0) {
-+		dev_err(&client->adapter->dev, "i2c write failed: %d\n", ret);
-+		return ret;
-+	}
-+
-+	/* 1 transfer must have completed successfully */
-+	return (ret == 1) ? 0 : -EIO;
-+}
-+
- static acpi_status
- i2c_acpi_space_handler(u32 function, acpi_physical_address command,
- 			u32 bits, u64 *value64,
-@@ -729,6 +751,19 @@ i2c_acpi_space_handler(u32 function, acpi_physical_address command,
- 		}
- 		break;
- 
-+	case ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES:
-+		if (action == ACPI_READ) {
-+			dev_warn(&adapter->dev,
-+				 "protocol 0x%02x not supported for client 0x%02x\n",
-+				 accessor_type, client->addr);
-+			ret = AE_BAD_PARAMETER;
-+			goto err;
-+		} else {
-+			status = acpi_gsb_i2c_write_raw_bytes(client,
-+					gsb->data, info->access_length);
-+		}
-+		break;
-+
- 	default:
- 		dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
- 			 accessor_type, client->addr);
--- 
-2.42.0
-
-From f45a16750118da615fca44e7214204c83631ee7f Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sat, 13 Feb 2021 16:41:18 +0100
-Subject: [PATCH] platform/surface: Add driver for Surface Book 1 dGPU switch
-
-Add driver exposing the discrete GPU power-switch of the  Microsoft
-Surface Book 1 to user-space.
-
-On the Surface Book 1, the dGPU power is controlled via the Surface
-System Aggregator Module (SAM). The specific SAM-over-HID command for
-this is exposed via ACPI. This module provides a simple driver exposing
-the ACPI call via a sysfs parameter to user-space, so that users can
-easily power-on/-off the dGPU.
-
-Patchset: surface-sam-over-hid
----
- drivers/platform/surface/Kconfig              |   7 +
- drivers/platform/surface/Makefile             |   1 +
- .../surface/surfacebook1_dgpu_switch.c        | 162 ++++++++++++++++++
- 3 files changed, 170 insertions(+)
- create mode 100644 drivers/platform/surface/surfacebook1_dgpu_switch.c
-
-diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig
-index b629e82af97c0..68656e8f309ed 100644
---- a/drivers/platform/surface/Kconfig
-+++ b/drivers/platform/surface/Kconfig
-@@ -149,6 +149,13 @@ config SURFACE_AGGREGATOR_TABLET_SWITCH
- 	  Select M or Y here, if you want to provide tablet-mode switch input
- 	  events on the Surface Pro 8, Surface Pro X, and Surface Laptop Studio.
- 
-+config SURFACE_BOOK1_DGPU_SWITCH
-+	tristate "Surface Book 1 dGPU Switch Driver"
-+	depends on SYSFS
-+	help
-+	  This driver provides a sysfs switch to set the power-state of the
-+	  discrete GPU found on the Microsoft Surface Book 1.
-+
- config SURFACE_DTX
- 	tristate "Surface DTX (Detachment System) Driver"
- 	depends on SURFACE_AGGREGATOR
-diff --git a/drivers/platform/surface/Makefile b/drivers/platform/surface/Makefile
-index 53344330939bf..7efcd0cdb5329 100644
---- a/drivers/platform/surface/Makefile
-+++ b/drivers/platform/surface/Makefile
-@@ -12,6 +12,7 @@ obj-$(CONFIG_SURFACE_AGGREGATOR_CDEV)	+= surface_aggregator_cdev.o
- obj-$(CONFIG_SURFACE_AGGREGATOR_HUB)	+= surface_aggregator_hub.o
- obj-$(CONFIG_SURFACE_AGGREGATOR_REGISTRY) += surface_aggregator_registry.o
- obj-$(CONFIG_SURFACE_AGGREGATOR_TABLET_SWITCH) += surface_aggregator_tabletsw.o
-+obj-$(CONFIG_SURFACE_BOOK1_DGPU_SWITCH) += surfacebook1_dgpu_switch.o
- obj-$(CONFIG_SURFACE_DTX)		+= surface_dtx.o
- obj-$(CONFIG_SURFACE_GPE)		+= surface_gpe.o
- obj-$(CONFIG_SURFACE_HOTPLUG)		+= surface_hotplug.o
-diff --git a/drivers/platform/surface/surfacebook1_dgpu_switch.c b/drivers/platform/surface/surfacebook1_dgpu_switch.c
-new file mode 100644
-index 0000000000000..8b816ed8f35c6
---- /dev/null
-+++ b/drivers/platform/surface/surfacebook1_dgpu_switch.c
-@@ -0,0 +1,162 @@
-+// SPDX-License-Identifier: GPL-2.0-or-later
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/acpi.h>
-+#include <linux/platform_device.h>
-+
-+
-+#ifdef pr_fmt
-+#undef pr_fmt
-+#endif
-+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
-+
-+
-+static const guid_t dgpu_sw_guid = GUID_INIT(0x6fd05c69, 0xcde3, 0x49f4,
-+	0x95, 0xed, 0xab, 0x16, 0x65, 0x49, 0x80, 0x35);
-+
-+#define DGPUSW_ACPI_PATH_DSM	"\\_SB_.PCI0.LPCB.EC0_.VGBI"
-+#define DGPUSW_ACPI_PATH_HGON	"\\_SB_.PCI0.RP05.HGON"
-+#define DGPUSW_ACPI_PATH_HGOF	"\\_SB_.PCI0.RP05.HGOF"
-+
-+
-+static int sb1_dgpu_sw_dsmcall(void)
-+{
-+	union acpi_object *ret;
-+	acpi_handle handle;
-+	acpi_status status;
-+
-+	status = acpi_get_handle(NULL, DGPUSW_ACPI_PATH_DSM, &handle);
-+	if (status)
-+		return -EINVAL;
-+
-+	ret = acpi_evaluate_dsm_typed(handle, &dgpu_sw_guid, 1, 1, NULL, ACPI_TYPE_BUFFER);
-+	if (!ret)
-+		return -EINVAL;
-+
-+	ACPI_FREE(ret);
-+	return 0;
-+}
-+
-+static int sb1_dgpu_sw_hgon(void)
-+{
-+	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
-+	acpi_status status;
-+
-+	status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGON, NULL, &buf);
-+	if (status) {
-+		pr_err("failed to run HGON: %d\n", status);
-+		return -EINVAL;
-+	}
-+
-+	if (buf.pointer)
-+		ACPI_FREE(buf.pointer);
-+
-+	pr_info("turned-on dGPU via HGON\n");
-+	return 0;
-+}
-+
-+static int sb1_dgpu_sw_hgof(void)
-+{
-+	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
-+	acpi_status status;
-+
-+	status = acpi_evaluate_object(NULL, DGPUSW_ACPI_PATH_HGOF, NULL, &buf);
-+	if (status) {
-+		pr_err("failed to run HGOF: %d\n", status);
-+		return -EINVAL;
-+	}
-+
-+	if (buf.pointer)
-+		ACPI_FREE(buf.pointer);
-+
-+	pr_info("turned-off dGPU via HGOF\n");
-+	return 0;
-+}
-+
-+
-+static ssize_t dgpu_dsmcall_store(struct device *dev, struct device_attribute *attr,
-+				  const char *buf, size_t len)
-+{
-+	int status, value;
-+
-+	status = kstrtoint(buf, 0, &value);
-+	if (status < 0)
-+		return status;
-+
-+	if (value != 1)
-+		return -EINVAL;
-+
-+	status = sb1_dgpu_sw_dsmcall();
-+
-+	return status < 0 ? status : len;
-+}
-+
-+static ssize_t dgpu_power_store(struct device *dev, struct device_attribute *attr,
-+				const char *buf, size_t len)
-+{
-+	bool power;
-+	int status;
-+
-+	status = kstrtobool(buf, &power);
-+	if (status < 0)
-+		return status;
-+
-+	if (power)
-+		status = sb1_dgpu_sw_hgon();
-+	else
-+		status = sb1_dgpu_sw_hgof();
-+
-+	return status < 0 ? status : len;
-+}
-+
-+static DEVICE_ATTR_WO(dgpu_dsmcall);
-+static DEVICE_ATTR_WO(dgpu_power);
-+
-+static struct attribute *sb1_dgpu_sw_attrs[] = {
-+	&dev_attr_dgpu_dsmcall.attr,
-+	&dev_attr_dgpu_power.attr,
-+	NULL,
-+};
-+
-+static const struct attribute_group sb1_dgpu_sw_attr_group = {
-+	.attrs = sb1_dgpu_sw_attrs,
-+};
-+
-+
-+static int sb1_dgpu_sw_probe(struct platform_device *pdev)
-+{
-+	return sysfs_create_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group);
-+}
-+
-+static int sb1_dgpu_sw_remove(struct platform_device *pdev)
-+{
-+	sysfs_remove_group(&pdev->dev.kobj, &sb1_dgpu_sw_attr_group);
-+	return 0;
-+}
-+
-+/*
-+ * The dGPU power seems to be actually handled by MSHW0040. However, that is
-+ * also the power-/volume-button device with a mainline driver. So let's use
-+ * MSHW0041 instead for now, which seems to be the LTCH (latch/DTX) device.
-+ */
-+static const struct acpi_device_id sb1_dgpu_sw_match[] = {
-+	{ "MSHW0041", },
-+	{ },
-+};
-+MODULE_DEVICE_TABLE(acpi, sb1_dgpu_sw_match);
-+
-+static struct platform_driver sb1_dgpu_sw = {
-+	.probe = sb1_dgpu_sw_probe,
-+	.remove = sb1_dgpu_sw_remove,
-+	.driver = {
-+		.name = "surfacebook1_dgpu_switch",
-+		.acpi_match_table = sb1_dgpu_sw_match,
-+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-+	},
-+};
-+module_platform_driver(sb1_dgpu_sw);
-+
-+MODULE_AUTHOR("Maximilian Luz <luzmaximilian@gmail.com>");
-+MODULE_DESCRIPTION("Discrete GPU Power-Switch for Surface Book 1");
-+MODULE_LICENSE("GPL");
--- 
-2.42.0
-
-From a5d9cf4762a27e2bf7f38c0d5a223b9df8b4ba8a Mon Sep 17 00:00:00 2001
-From: Sachi King <nakato@nakato.io>
-Date: Tue, 5 Oct 2021 00:05:09 +1100
-Subject: [PATCH] Input: soc_button_array - support AMD variant Surface devices
-
-The power button on the AMD variant of the Surface Laptop uses the
-same MSHW0040 device ID as the 5th and later generation of Surface
-devices, however they report 0 for their OEM platform revision.  As the
-_DSM does not exist on the devices requiring special casing, check for
-the existance of the _DSM to determine if soc_button_array should be
-loaded.
-
-Fixes: c394159310d0 ("Input: soc_button_array - add support for newer surface devices")
-Co-developed-by: Maximilian Luz <luzmaximilian@gmail.com>
-
-Signed-off-by: Sachi King <nakato@nakato.io>
-Patchset: surface-button
----
- drivers/input/misc/soc_button_array.c | 33 +++++++--------------------
- 1 file changed, 8 insertions(+), 25 deletions(-)
-
-diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
-index e79f5497948b8..2bddbe6e9ea4d 100644
---- a/drivers/input/misc/soc_button_array.c
-+++ b/drivers/input/misc/soc_button_array.c
-@@ -537,8 +537,8 @@ static const struct soc_device_data soc_device_MSHW0028 = {
-  * Both, the Surface Pro 4 (surfacepro3_button.c) and the above mentioned
-  * devices use MSHW0040 for power and volume buttons, however the way they
-  * have to be addressed differs. Make sure that we only load this drivers
-- * for the correct devices by checking the OEM Platform Revision provided by
-- * the _DSM method.
-+ * for the correct devices by checking if the OEM Platform Revision DSM call
-+ * exists.
-  */
- #define MSHW0040_DSM_REVISION		0x01
- #define MSHW0040_DSM_GET_OMPR		0x02	// get OEM Platform Revision
-@@ -549,31 +549,14 @@ static const guid_t MSHW0040_DSM_UUID =
- static int soc_device_check_MSHW0040(struct device *dev)
- {
- 	acpi_handle handle = ACPI_HANDLE(dev);
--	union acpi_object *result;
--	u64 oem_platform_rev = 0;	// valid revisions are nonzero
--
--	// get OEM platform revision
--	result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID,
--					 MSHW0040_DSM_REVISION,
--					 MSHW0040_DSM_GET_OMPR, NULL,
--					 ACPI_TYPE_INTEGER);
--
--	if (result) {
--		oem_platform_rev = result->integer.value;
--		ACPI_FREE(result);
--	}
--
--	/*
--	 * If the revision is zero here, the _DSM evaluation has failed. This
--	 * indicates that we have a Pro 4 or Book 1 and this driver should not
--	 * be used.
--	 */
--	if (oem_platform_rev == 0)
--		return -ENODEV;
-+	bool exists;
- 
--	dev_dbg(dev, "OEM Platform Revision %llu\n", oem_platform_rev);
-+	// check if OEM platform revision DSM call exists
-+	exists = acpi_check_dsm(handle, &MSHW0040_DSM_UUID,
-+				MSHW0040_DSM_REVISION,
-+				BIT(MSHW0040_DSM_GET_OMPR));
- 
--	return 0;
-+	return exists ? 0 : -ENODEV;
- }
- 
- /*
--- 
-2.42.0
-
-From 66f0a34801ad81ff08cc3ae0e175e0958959c461 Mon Sep 17 00:00:00 2001
-From: Sachi King <nakato@nakato.io>
-Date: Tue, 5 Oct 2021 00:22:57 +1100
-Subject: [PATCH] platform/surface: surfacepro3_button: don't load on amd
- variant
-
-The AMD variant of the Surface Laptop report 0 for their OEM platform
-revision.  The Surface devices that require the surfacepro3_button
-driver do not have the _DSM that gets the OEM platform revision.  If the
-method does not exist, load surfacepro3_button.
-
-Fixes: 64dd243d7356 ("platform/x86: surfacepro3_button: Fix device check")
-Co-developed-by: Maximilian Luz <luzmaximilian@gmail.com>
-
-Signed-off-by: Sachi King <nakato@nakato.io>
-Patchset: surface-button
----
- drivers/platform/surface/surfacepro3_button.c | 30 ++++---------------
- 1 file changed, 6 insertions(+), 24 deletions(-)
-
-diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c
-index 2755601f979cd..4240c98ca2265 100644
---- a/drivers/platform/surface/surfacepro3_button.c
-+++ b/drivers/platform/surface/surfacepro3_button.c
-@@ -149,7 +149,8 @@ static int surface_button_resume(struct device *dev)
- /*
-  * Surface Pro 4 and Surface Book 2 / Surface Pro 2017 use the same device
-  * ID (MSHW0040) for the power/volume buttons. Make sure this is the right
-- * device by checking for the _DSM method and OEM Platform Revision.
-+ * device by checking for the _DSM method and OEM Platform Revision DSM
-+ * function.
-  *
-  * Returns true if the driver should bind to this device, i.e. the device is
-  * either MSWH0028 (Pro 3) or MSHW0040 on a Pro 4 or Book 1.
-@@ -157,30 +158,11 @@ static int surface_button_resume(struct device *dev)
- static bool surface_button_check_MSHW0040(struct acpi_device *dev)
- {
- 	acpi_handle handle = dev->handle;
--	union acpi_object *result;
--	u64 oem_platform_rev = 0;	// valid revisions are nonzero
--
--	// get OEM platform revision
--	result = acpi_evaluate_dsm_typed(handle, &MSHW0040_DSM_UUID,
--					 MSHW0040_DSM_REVISION,
--					 MSHW0040_DSM_GET_OMPR,
--					 NULL, ACPI_TYPE_INTEGER);
--
--	/*
--	 * If evaluating the _DSM fails, the method is not present. This means
--	 * that we have either MSHW0028 or MSHW0040 on Pro 4 or Book 1, so we
--	 * should use this driver. We use revision 0 indicating it is
--	 * unavailable.
--	 */
--
--	if (result) {
--		oem_platform_rev = result->integer.value;
--		ACPI_FREE(result);
--	}
--
--	dev_dbg(&dev->dev, "OEM Platform Revision %llu\n", oem_platform_rev);
- 
--	return oem_platform_rev == 0;
-+	// make sure that OEM platform revision DSM call does not exist
-+	return !acpi_check_dsm(handle, &MSHW0040_DSM_UUID,
-+			       MSHW0040_DSM_REVISION,
-+			       BIT(MSHW0040_DSM_GET_OMPR));
- }
- 
- 
--- 
-2.42.0
-
-From a55587ce4f5065bedb604f9031082ad47612a163 Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sat, 18 Feb 2023 01:02:49 +0100
-Subject: [PATCH] USB: quirks: Add USB_QUIRK_DELAY_INIT for Surface Go 3
- Type-Cover
-
-The touchpad on the Type-Cover of the Surface Go 3 is sometimes not
-being initialized properly. Apply USB_QUIRK_DELAY_INIT to fix this
-issue.
-
-More specifically, the device in question is a fairly standard modern
-touchpad with pointer and touchpad input modes. During setup, the device
-needs to be switched from pointer- to touchpad-mode (which is done in
-hid-multitouch) to fully utilize it as intended. Unfortunately, however,
-this seems to occasionally fail silently, leaving the device in
-pointer-mode. Applying USB_QUIRK_DELAY_INIT seems to fix this.
-
-Link: https://github.com/linux-surface/linux-surface/issues/1059
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-typecover
----
- drivers/usb/core/quirks.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
-index 15e9bd180a1d2..0d70461d01e16 100644
---- a/drivers/usb/core/quirks.c
-+++ b/drivers/usb/core/quirks.c
-@@ -220,6 +220,9 @@ static const struct usb_device_id usb_quirk_list[] = {
- 	/* Microsoft Surface Dock Ethernet (RTL8153 GigE) */
- 	{ USB_DEVICE(0x045e, 0x07c6), .driver_info = USB_QUIRK_NO_LPM },
- 
-+	/* Microsoft Surface Go 3 Type-Cover */
-+	{ USB_DEVICE(0x045e, 0x09b5), .driver_info = USB_QUIRK_DELAY_INIT },
-+
- 	/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
- 	{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
- 
--- 
-2.42.0
-
-From 678999792d6b1c72e56c6b63fc3909b93db47b32 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
-Date: Thu, 5 Nov 2020 13:09:45 +0100
-Subject: [PATCH] hid/multitouch: Turn off Type Cover keyboard backlight when
- suspending
-
-The Type Cover for Microsoft Surface devices supports a special usb
-control request to disable or enable the built-in keyboard backlight.
-On Windows, this request happens when putting the device into suspend or
-resuming it, without it the backlight of the Type Cover will remain
-enabled for some time even though the computer is suspended, which looks
-weird to the user.
-
-So add support for this special usb control request to hid-multitouch,
-which is the driver that's handling the Type Cover.
-
-The reason we have to use a pm_notifier for this instead of the usual
-suspend/resume methods is that those won't get called in case the usb
-device is already autosuspended.
-
-Also, if the device is autosuspended, we have to briefly autoresume it
-in order to send the request. Doing that should be fine, the usb-core
-driver does something similar during suspend inside choose_wakeup().
-
-To make sure we don't send that request to every device but only to
-devices which support it, add a new quirk
-MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER to hid-multitouch. For now this quirk
-is only enabled for the usb id of the Surface Pro 2017 Type Cover, which
-is where I confirmed that it's working.
-
-Patchset: surface-typecover
----
- drivers/hid/hid-multitouch.c | 100 ++++++++++++++++++++++++++++++++++-
- 1 file changed, 98 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
-index 8db4ae05febc8..99a5efef45258 100644
---- a/drivers/hid/hid-multitouch.c
-+++ b/drivers/hid/hid-multitouch.c
-@@ -34,7 +34,10 @@
- #include <linux/device.h>
- #include <linux/hid.h>
- #include <linux/module.h>
-+#include <linux/pm_runtime.h>
- #include <linux/slab.h>
-+#include <linux/suspend.h>
-+#include <linux/usb.h>
- #include <linux/input/mt.h>
- #include <linux/jiffies.h>
- #include <linux/string.h>
-@@ -47,6 +50,7 @@ MODULE_DESCRIPTION("HID multitouch panels");
- MODULE_LICENSE("GPL");
- 
- #include "hid-ids.h"
-+#include "usbhid/usbhid.h"
- 
- /* quirks to control the device */
- #define MT_QUIRK_NOT_SEEN_MEANS_UP	BIT(0)
-@@ -72,12 +76,15 @@ MODULE_LICENSE("GPL");
- #define MT_QUIRK_FORCE_MULTI_INPUT	BIT(20)
- #define MT_QUIRK_DISABLE_WAKEUP		BIT(21)
- #define MT_QUIRK_ORIENTATION_INVERT	BIT(22)
-+#define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT	BIT(23)
- 
- #define MT_INPUTMODE_TOUCHSCREEN	0x02
- #define MT_INPUTMODE_TOUCHPAD		0x03
- 
- #define MT_BUTTONTYPE_CLICKPAD		0
- 
-+#define MS_TYPE_COVER_FEATURE_REPORT_USAGE	0xff050086
-+
- enum latency_mode {
- 	HID_LATENCY_NORMAL = 0,
- 	HID_LATENCY_HIGH = 1,
-@@ -169,6 +176,8 @@ struct mt_device {
- 
- 	struct list_head applications;
- 	struct list_head reports;
-+
-+	struct notifier_block pm_notifier;
- };
- 
- static void mt_post_parse_default_settings(struct mt_device *td,
-@@ -213,6 +222,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
- #define MT_CLS_GOOGLE				0x0111
- #define MT_CLS_RAZER_BLADE_STEALTH		0x0112
- #define MT_CLS_SMART_TECH			0x0113
-+#define MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER	0x0114
- 
- #define MT_DEFAULT_MAXCONTACT	10
- #define MT_MAX_MAXCONTACT	250
-@@ -397,6 +407,16 @@ static const struct mt_class mt_classes[] = {
- 			MT_QUIRK_CONTACT_CNT_ACCURATE |
- 			MT_QUIRK_SEPARATE_APP_REPORT,
- 	},
-+	{ .name = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER,
-+		.quirks = MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT |
-+			MT_QUIRK_ALWAYS_VALID |
-+			MT_QUIRK_IGNORE_DUPLICATES |
-+			MT_QUIRK_HOVERING |
-+			MT_QUIRK_CONTACT_CNT_ACCURATE |
-+			MT_QUIRK_STICKY_FINGERS |
-+			MT_QUIRK_WIN8_PTP_BUTTONS,
-+		.export_all_inputs = true
-+	},
- 	{ }
- };
- 
-@@ -1721,6 +1741,69 @@ static void mt_expired_timeout(struct timer_list *t)
- 	clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
- }
- 
-+static void get_type_cover_backlight_field(struct hid_device *hdev,
-+					   struct hid_field **field)
-+{
-+	struct hid_report_enum *rep_enum;
-+	struct hid_report *rep;
-+	struct hid_field *cur_field;
-+	int i, j;
-+
-+	rep_enum = &hdev->report_enum[HID_FEATURE_REPORT];
-+	list_for_each_entry(rep, &rep_enum->report_list, list) {
-+		for (i = 0; i < rep->maxfield; i++) {
-+			cur_field = rep->field[i];
-+
-+			for (j = 0; j < cur_field->maxusage; j++) {
-+				if (cur_field->usage[j].hid
-+				    == MS_TYPE_COVER_FEATURE_REPORT_USAGE) {
-+					*field = cur_field;
-+					return;
-+				}
-+			}
-+		}
-+	}
-+}
-+
-+static void update_keyboard_backlight(struct hid_device *hdev, bool enabled)
-+{
-+	struct usb_device *udev = hid_to_usb_dev(hdev);
-+	struct hid_field *field = NULL;
-+
-+	/* Wake up the device in case it's already suspended */
-+	pm_runtime_get_sync(&udev->dev);
-+
-+	get_type_cover_backlight_field(hdev, &field);
-+	if (!field) {
-+		hid_err(hdev, "couldn't find backlight field\n");
-+		goto out;
-+	}
-+
-+	field->value[field->index] = enabled ? 0x01ff00ff : 0x00ff00ff;
-+	hid_hw_request(hdev, field->report, HID_REQ_SET_REPORT);
-+
-+out:
-+	pm_runtime_put_sync(&udev->dev);
-+}
-+
-+static int mt_pm_notifier(struct notifier_block *notifier,
-+			  unsigned long pm_event,
-+			  void *unused)
-+{
-+	struct mt_device *td =
-+		container_of(notifier, struct mt_device, pm_notifier);
-+	struct hid_device *hdev = td->hdev;
-+
-+	if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT) {
-+		if (pm_event == PM_SUSPEND_PREPARE)
-+			update_keyboard_backlight(hdev, 0);
-+		else if (pm_event == PM_POST_SUSPEND)
-+			update_keyboard_backlight(hdev, 1);
-+	}
-+
-+	return NOTIFY_DONE;
-+}
-+
- static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
- {
- 	int ret, i;
-@@ -1744,6 +1827,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 	td->inputmode_value = MT_INPUTMODE_TOUCHSCREEN;
- 	hid_set_drvdata(hdev, td);
- 
-+	td->pm_notifier.notifier_call = mt_pm_notifier;
-+	register_pm_notifier(&td->pm_notifier);
-+
- 	INIT_LIST_HEAD(&td->applications);
- 	INIT_LIST_HEAD(&td->reports);
- 
-@@ -1782,15 +1868,19 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
- 	timer_setup(&td->release_timer, mt_expired_timeout, 0);
- 
- 	ret = hid_parse(hdev);
--	if (ret != 0)
-+	if (ret != 0) {
-+		unregister_pm_notifier(&td->pm_notifier);
- 		return ret;
-+	}
- 
- 	if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID)
- 		mt_fix_const_fields(hdev, HID_DG_CONTACTID);
- 
- 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
--	if (ret)
-+	if (ret) {
-+		unregister_pm_notifier(&td->pm_notifier);
- 		return ret;
-+	}
- 
- 	ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group);
- 	if (ret)
-@@ -1842,6 +1932,7 @@ static void mt_remove(struct hid_device *hdev)
- {
- 	struct mt_device *td = hid_get_drvdata(hdev);
- 
-+	unregister_pm_notifier(&td->pm_notifier);
- 	del_timer_sync(&td->release_timer);
- 
- 	sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group);
-@@ -2223,6 +2314,11 @@ static const struct hid_device_id mt_devices[] = {
- 		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
- 			USB_DEVICE_ID_XIROKU_CSR2) },
- 
-+	/* Microsoft Surface type cover */
-+	{ .driver_data = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER,
-+		HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY,
-+			USB_VENDOR_ID_MICROSOFT, 0x09c0) },
-+
- 	/* Google MT devices */
- 	{ .driver_data = MT_CLS_GOOGLE,
- 		HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
--- 
-2.42.0
-
-From 12427f01e38ebf653ccf44faefdcb92110c43c20 Mon Sep 17 00:00:00 2001
-From: PJungkamp <p.jungkamp@gmail.com>
-Date: Fri, 25 Feb 2022 12:04:25 +0100
-Subject: [PATCH] hid/multitouch: Add support for surface pro type cover tablet
- switch
-
-The Surface Pro Type Cover has several non standard HID usages in it's
-hid report descriptor.
-I noticed that, upon folding the typecover back, a vendor specific range
-of 4 32 bit integer hid usages is transmitted.
-Only the first byte of the message seems to convey reliable information
-about the keyboard state.
-
-0x22 => Normal (keys enabled)
-0x33 => Folded back (keys disabled)
-0x53 => Rotated left/right side up (keys disabled)
-0x13 => Cover closed (keys disabled)
-0x43 => Folded back and Tablet upside down (keys disabled)
-This list may not be exhaustive.
-
-The tablet mode switch will be disabled for a value of 0x22 and enabled
-on any other value.
-
-Patchset: surface-typecover
----
- drivers/hid/hid-multitouch.c | 148 +++++++++++++++++++++++++++++------
- 1 file changed, 122 insertions(+), 26 deletions(-)
-
-diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
-index 99a5efef45258..6ae43ea90bcd5 100644
---- a/drivers/hid/hid-multitouch.c
-+++ b/drivers/hid/hid-multitouch.c
-@@ -77,6 +77,7 @@ MODULE_LICENSE("GPL");
- #define MT_QUIRK_DISABLE_WAKEUP		BIT(21)
- #define MT_QUIRK_ORIENTATION_INVERT	BIT(22)
- #define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT	BIT(23)
-+#define MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH	BIT(24)
- 
- #define MT_INPUTMODE_TOUCHSCREEN	0x02
- #define MT_INPUTMODE_TOUCHPAD		0x03
-@@ -84,6 +85,8 @@ MODULE_LICENSE("GPL");
- #define MT_BUTTONTYPE_CLICKPAD		0
- 
- #define MS_TYPE_COVER_FEATURE_REPORT_USAGE	0xff050086
-+#define MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE	0xff050072
-+#define MS_TYPE_COVER_APPLICATION	0xff050050
- 
- enum latency_mode {
- 	HID_LATENCY_NORMAL = 0,
-@@ -409,6 +412,7 @@ static const struct mt_class mt_classes[] = {
- 	},
- 	{ .name = MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER,
- 		.quirks = MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT |
-+			MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH |
- 			MT_QUIRK_ALWAYS_VALID |
- 			MT_QUIRK_IGNORE_DUPLICATES |
- 			MT_QUIRK_HOVERING |
-@@ -1390,6 +1394,9 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- 	    field->application != HID_CP_CONSUMER_CONTROL &&
- 	    field->application != HID_GD_WIRELESS_RADIO_CTLS &&
- 	    field->application != HID_GD_SYSTEM_MULTIAXIS &&
-+	    !(field->application == MS_TYPE_COVER_APPLICATION &&
-+	      application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH &&
-+	      usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) &&
- 	    !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS &&
- 	      application->quirks & MT_QUIRK_ASUS_CUSTOM_UP))
- 		return -1;
-@@ -1417,6 +1424,21 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- 		return 1;
- 	}
- 
-+	/*
-+	 * The Microsoft Surface Pro Typecover has a non-standard HID
-+	 * tablet mode switch on a vendor specific usage page with vendor
-+	 * specific usage.
-+	 */
-+	if (field->application == MS_TYPE_COVER_APPLICATION &&
-+	    application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH &&
-+	    usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) {
-+		usage->type = EV_SW;
-+		usage->code = SW_TABLET_MODE;
-+		*max = SW_MAX;
-+		*bit = hi->input->swbit;
-+		return 1;
-+	}
-+
- 	if (rdata->is_mt_collection)
- 		return mt_touch_input_mapping(hdev, hi, field, usage, bit, max,
- 					      application);
-@@ -1438,6 +1460,7 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi,
- {
- 	struct mt_device *td = hid_get_drvdata(hdev);
- 	struct mt_report_data *rdata;
-+	struct input_dev *input;
- 
- 	rdata = mt_find_report_data(td, field->report);
- 	if (rdata && rdata->is_mt_collection) {
-@@ -1445,6 +1468,19 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi,
- 		return -1;
- 	}
- 
-+	/*
-+	 * We own an input device which acts as a tablet mode switch for
-+	 * the Surface Pro Typecover.
-+	 */
-+	if (field->application == MS_TYPE_COVER_APPLICATION &&
-+	    rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH &&
-+	    usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) {
-+		input = hi->input;
-+		input_set_capability(input, EV_SW, SW_TABLET_MODE);
-+		input_report_switch(input, SW_TABLET_MODE, 0);
-+		return -1;
-+	}
-+
- 	/* let hid-core decide for the others */
- 	return 0;
- }
-@@ -1454,11 +1490,21 @@ static int mt_event(struct hid_device *hid, struct hid_field *field,
- {
- 	struct mt_device *td = hid_get_drvdata(hid);
- 	struct mt_report_data *rdata;
-+	struct input_dev *input;
- 
- 	rdata = mt_find_report_data(td, field->report);
- 	if (rdata && rdata->is_mt_collection)
- 		return mt_touch_event(hid, field, usage, value);
- 
-+	if (field->application == MS_TYPE_COVER_APPLICATION &&
-+	    rdata->application->quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH &&
-+	    usage->hid == MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE) {
-+		input = field->hidinput->input;
-+		input_report_switch(input, SW_TABLET_MODE, (value & 0xFF) != 0x22);
-+		input_sync(input);
-+		return 1;
-+	}
-+
- 	return 0;
- }
- 
-@@ -1611,6 +1657,42 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app)
- 		app->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
- }
- 
-+static int get_type_cover_field(struct hid_report_enum *rep_enum,
-+				struct hid_field **field, int usage)
-+{
-+	struct hid_report *rep;
-+	struct hid_field *cur_field;
-+	int i, j;
-+
-+	list_for_each_entry(rep, &rep_enum->report_list, list) {
-+		for (i = 0; i < rep->maxfield; i++) {
-+			cur_field = rep->field[i];
-+			if (cur_field->application != MS_TYPE_COVER_APPLICATION)
-+				continue;
-+			for (j = 0; j < cur_field->maxusage; j++) {
-+				if (cur_field->usage[j].hid == usage) {
-+					*field = cur_field;
-+					return true;
-+				}
-+			}
-+		}
-+	}
-+	return false;
-+}
-+
-+static void request_type_cover_tablet_mode_switch(struct hid_device *hdev)
-+{
-+	struct hid_field *field;
-+
-+	if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT],
-+				 &field,
-+				 MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) {
-+		hid_hw_request(hdev, field->report, HID_REQ_GET_REPORT);
-+	} else {
-+		hid_err(hdev, "couldn't find tablet mode field\n");
-+	}
-+}
-+
- static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
- {
- 	struct mt_device *td = hid_get_drvdata(hdev);
-@@ -1659,6 +1741,13 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
- 		/* force BTN_STYLUS to allow tablet matching in udev */
- 		__set_bit(BTN_STYLUS, hi->input->keybit);
- 		break;
-+	case MS_TYPE_COVER_APPLICATION:
-+		if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) {
-+			suffix = "Tablet Mode Switch";
-+			request_type_cover_tablet_mode_switch(hdev);
-+			break;
-+		}
-+		fallthrough;
- 	default:
- 		suffix = "UNKNOWN";
- 		break;
-@@ -1741,30 +1830,6 @@ static void mt_expired_timeout(struct timer_list *t)
- 	clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
- }
- 
--static void get_type_cover_backlight_field(struct hid_device *hdev,
--					   struct hid_field **field)
--{
--	struct hid_report_enum *rep_enum;
--	struct hid_report *rep;
--	struct hid_field *cur_field;
--	int i, j;
--
--	rep_enum = &hdev->report_enum[HID_FEATURE_REPORT];
--	list_for_each_entry(rep, &rep_enum->report_list, list) {
--		for (i = 0; i < rep->maxfield; i++) {
--			cur_field = rep->field[i];
--
--			for (j = 0; j < cur_field->maxusage; j++) {
--				if (cur_field->usage[j].hid
--				    == MS_TYPE_COVER_FEATURE_REPORT_USAGE) {
--					*field = cur_field;
--					return;
--				}
--			}
--		}
--	}
--}
--
- static void update_keyboard_backlight(struct hid_device *hdev, bool enabled)
- {
- 	struct usb_device *udev = hid_to_usb_dev(hdev);
-@@ -1773,8 +1838,9 @@ static void update_keyboard_backlight(struct hid_device *hdev, bool enabled)
- 	/* Wake up the device in case it's already suspended */
- 	pm_runtime_get_sync(&udev->dev);
- 
--	get_type_cover_backlight_field(hdev, &field);
--	if (!field) {
-+	if (!get_type_cover_field(&hdev->report_enum[HID_FEATURE_REPORT],
-+				  &field,
-+				  MS_TYPE_COVER_FEATURE_REPORT_USAGE)) {
- 		hid_err(hdev, "couldn't find backlight field\n");
- 		goto out;
- 	}
-@@ -1909,13 +1975,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state)
- 
- static int mt_reset_resume(struct hid_device *hdev)
- {
-+	struct mt_device *td = hid_get_drvdata(hdev);
-+
- 	mt_release_contacts(hdev);
- 	mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);
-+
-+	/* Request an update on the typecover folding state on resume
-+	 * after reset.
-+	 */
-+	if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH)
-+		request_type_cover_tablet_mode_switch(hdev);
-+
- 	return 0;
- }
- 
- static int mt_resume(struct hid_device *hdev)
- {
-+	struct mt_device *td = hid_get_drvdata(hdev);
-+
- 	/* Some Elan legacy devices require SET_IDLE to be set on resume.
- 	 * It should be safe to send it to other devices too.
- 	 * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */
-@@ -1924,6 +2001,10 @@ static int mt_resume(struct hid_device *hdev)
- 
- 	mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);
- 
-+	/* Request an update on the typecover folding state on resume. */
-+	if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH)
-+		request_type_cover_tablet_mode_switch(hdev);
-+
- 	return 0;
- }
- #endif
-@@ -1931,6 +2012,21 @@ static int mt_resume(struct hid_device *hdev)
- static void mt_remove(struct hid_device *hdev)
- {
- 	struct mt_device *td = hid_get_drvdata(hdev);
-+	struct hid_field *field;
-+	struct input_dev *input;
-+
-+	/* Reset tablet mode switch on disconnect. */
-+	if (td->mtclass.quirks & MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH) {
-+		if (get_type_cover_field(&hdev->report_enum[HID_INPUT_REPORT],
-+					 &field,
-+					 MS_TYPE_COVER_TABLET_MODE_SWITCH_USAGE)) {
-+			input = field->hidinput->input;
-+			input_report_switch(input, SW_TABLET_MODE, 0);
-+			input_sync(input);
-+		} else {
-+			hid_err(hdev, "couldn't find tablet mode field\n");
-+		}
-+	}
- 
- 	unregister_pm_notifier(&td->pm_notifier);
- 	del_timer_sync(&td->release_timer);
--- 
-2.42.0
-
-From 151f9dba2f3d6d066d160128da109a0173a3ff4c Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sun, 19 Feb 2023 22:12:24 +0100
-Subject: [PATCH] PCI: Add quirk to prevent calling shutdown mehtod
-
-Work around buggy EFI firmware: On some Microsoft Surface devices
-(Surface Pro 9 and Surface Laptop 5) the EFI ResetSystem call with
-EFI_RESET_SHUTDOWN doesn't function properly. Instead of shutting the
-system down, it returns and the system stays on.
-
-It turns out that this only happens after PCI shutdown callbacks ran for
-specific devices. Excluding those devices from the shutdown process
-makes the ResetSystem call work as expected.
-
-TODO: Maybe we can find a better way or the root cause of this?
-
-Not-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-shutdown
----
- drivers/pci/pci-driver.c |  3 +++
- drivers/pci/quirks.c     | 36 ++++++++++++++++++++++++++++++++++++
- include/linux/pci.h      |  1 +
- 3 files changed, 40 insertions(+)
-
-diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
-index 51ec9e7e784f0..40554890d7211 100644
---- a/drivers/pci/pci-driver.c
-+++ b/drivers/pci/pci-driver.c
-@@ -507,6 +507,9 @@ static void pci_device_shutdown(struct device *dev)
- 	struct pci_dev *pci_dev = to_pci_dev(dev);
- 	struct pci_driver *drv = pci_dev->driver;
- 
-+	if (pci_dev->no_shutdown)
-+		return;
-+
- 	pm_runtime_resume(dev);
- 
- 	if (drv && drv->shutdown)
-diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index ae95d0950..7a6d76c41 100644
---- a/drivers/pci/quirks.c
-+++ b/drivers/pci/quirks.c
-@@ -6212,6 +6212,42 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node);
-
-+static const struct dmi_system_id no_shutdown_dmi_table[] = {
-+	/*
-+	 * Systems on which some devices should not be touched during shutdown.
-+	 */
-+	{
-+		.ident = "Microsoft Surface Pro 9",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
-+			DMI_MATCH(DMI_PRODUCT_NAME, "Surface Pro 9"),
-+		},
-+	},
-+	{
-+		.ident = "Microsoft Surface Laptop 5",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
-+			DMI_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 5"),
-+		},
-+	},
-+	{}
-+};
-+
-+static void quirk_no_shutdown(struct pci_dev *dev)
-+{
-+	if (!dmi_check_system(no_shutdown_dmi_table))
-+		return;
-+
-+	dev->no_shutdown = 1;
-+	pci_info(dev, "disabling shutdown ops for [%04x:%04x]\n",
-+		 dev->vendor, dev->device);
-+}
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461e, quirk_no_shutdown);  // Thunderbolt 4 USB Controller
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x461f, quirk_no_shutdown);  // Thunderbolt 4 PCI Express Root Port
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x462f, quirk_no_shutdown);  // Thunderbolt 4 PCI Express Root Port
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x466d, quirk_no_shutdown);  // Thunderbolt 4 NHI
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x46a8, quirk_no_shutdown);  // GPU
-+
- /*
-  * Devices known to require a longer delay before first config space access
-  * after reset recovery or resume from D3cold:
-
-diff --git a/include/linux/pci.h b/include/linux/pci.h
-index 8c7c2c3c6c652..0c223b04dff91 100644
---- a/include/linux/pci.h
-+++ b/include/linux/pci.h
-@@ -465,6 +465,7 @@ struct pci_dev {
- 	unsigned int	no_command_memory:1;	/* No PCI_COMMAND_MEMORY */
- 	unsigned int	rom_bar_overlap:1;	/* ROM BAR disable broken */
- 	unsigned int	rom_attr_enabled:1;	/* Display of ROM attribute enabled? */
-+	unsigned int	no_shutdown:1;		/* Do not touch device on shutdown */
- 	pci_dev_flags_t dev_flags;
- 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
- 
--- 
-2.42.0
-
-From 912e956823b3cadd7203d3ce94418d162ff701be Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Sun, 12 Mar 2023 01:41:57 +0100
-Subject: [PATCH] platform/surface: gpe: Add support for Surface Pro 9
-
-Add the lid GPE used by the Surface Pro 9.
-
-Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
-Patchset: surface-gpe
----
- drivers/platform/surface/surface_gpe.c | 17 +++++++++++++++++
- 1 file changed, 17 insertions(+)
-
-diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c
-index c219b840d491a..69c4352e8406b 100644
---- a/drivers/platform/surface/surface_gpe.c
-+++ b/drivers/platform/surface/surface_gpe.c
-@@ -41,6 +41,11 @@ static const struct property_entry lid_device_props_l4F[] = {
- 	{},
- };
- 
-+static const struct property_entry lid_device_props_l52[] = {
-+	PROPERTY_ENTRY_U32("gpe", 0x52),
-+	{},
-+};
-+
- static const struct property_entry lid_device_props_l57[] = {
- 	PROPERTY_ENTRY_U32("gpe", 0x57),
- 	{},
-@@ -107,6 +112,18 @@ static const struct dmi_system_id dmi_lid_device_table[] = {
- 		},
- 		.driver_data = (void *)lid_device_props_l4B,
- 	},
-+	{
-+		/*
-+		 * We match for SKU here due to product name clash with the ARM
-+		 * version.
-+		 */
-+		.ident = "Surface Pro 9",
-+		.matches = {
-+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
-+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_9_2038"),
-+		},
-+		.driver_data = (void *)lid_device_props_l52,
-+	},
- 	{
- 		.ident = "Surface Book 1",
- 		.matches = {
--- 
-2.42.0
-
-From df083025f8c63824279c19de8ec3339440f819c9 Mon Sep 17 00:00:00 2001
-From: Hans de Goede <hdegoede@redhat.com>
-Date: Sun, 10 Oct 2021 20:56:57 +0200
-Subject: [PATCH] ACPI: delay enumeration of devices with a _DEP pointing to an
- INT3472 device
-
-The clk and regulator frameworks expect clk/regulator consumer-devices
-to have info about the consumed clks/regulators described in the device's
-fw_node.
-
-To work around cases where this info is not present in the firmware tables,
-which is often the case on x86/ACPI devices, both frameworks allow the
-provider-driver to attach info about consumers to the clks/regulators
-when registering these.
-
-This causes problems with the probe ordering wrt drivers for consumers
-of these clks/regulators. Since the lookups are only registered when the
-provider-driver binds, trying to get these clks/regulators before then
-results in a -ENOENT error for clks and a dummy regulator for regulators.
-
-One case where we hit this issue is camera sensors such as e.g. the OV8865
-sensor found on the Microsoft Surface Go. The sensor uses clks, regulators
-and GPIOs provided by a TPS68470 PMIC which is described in an INT3472
-ACPI device. There is special platform code handling this and setting
-platform_data with the necessary consumer info on the MFD cells
-instantiated for the PMIC under: drivers/platform/x86/intel/int3472.
-
-For this to work properly the ov8865 driver must not bind to the I2C-client
-for the OV8865 sensor until after the TPS68470 PMIC gpio, regulator and
-clk MFD cells have all been fully setup.
-
-The OV8865 on the Microsoft Surface Go is just one example, all X86
-devices using the Intel IPU3 camera block found on recent Intel SoCs
-have similar issues where there is an INT3472 HID ACPI-device, which
-describes the clks and regulators, and the driver for this INT3472 device
-must be fully initialized before the sensor driver (any sensor driver)
-binds for things to work properly.
-
-On these devices the ACPI nodes describing the sensors all have a _DEP
-dependency on the matching INT3472 ACPI device (there is one per sensor).
-
-This allows solving the probe-ordering problem by delaying the enumeration
-(instantiation of the I2C-client in the ov8865 example) of ACPI-devices
-which have a _DEP dependency on an INT3472 device.
-
-The new acpi_dev_ready_for_enumeration() helper used for this is also
-exported because for devices, which have the enumeration_by_parent flag
-set, the parent-driver will do its own scan of child ACPI devices and
-it will try to enumerate those during its probe(). Code doing this such
-as e.g. the i2c-core-acpi.c code must call this new helper to ensure
-that it too delays the enumeration until all the _DEP dependencies are
-met on devices which have the new honor_deps flag set.
-
-Signed-off-by: Hans de Goede <hdegoede@redhat.com>
-Patchset: cameras
----
- drivers/acpi/scan.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
-index 691d4b7686ee7..9283217689279 100644
---- a/drivers/acpi/scan.c
-+++ b/drivers/acpi/scan.c
-@@ -2108,6 +2108,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used,
- 
- static void acpi_default_enumeration(struct acpi_device *device)
- {
-+	if (!acpi_dev_ready_for_enumeration(device))
-+		return;
-+
- 	/*
- 	 * Do not enumerate devices with enumeration_by_parent flag set as
- 	 * they will be enumerated by their respective parents.
--- 
-2.42.0
-
-From 87650a001d3068a8b614fd688e21bb87c2d3a3e6 Mon Sep 17 00:00:00 2001
-From: zouxiaoh <xiaohong.zou@intel.com>
-Date: Fri, 25 Jun 2021 08:52:59 +0800
-Subject: [PATCH] iommu: intel-ipu: use IOMMU passthrough mode for Intel IPUs
-
-Intel IPU(Image Processing Unit) has its own (IO)MMU hardware,
-The IPU driver allocates its own page table that is not mapped
-via the DMA, and thus the Intel IOMMU driver blocks access giving
-this error: DMAR: DRHD: handling fault status reg 3 DMAR:
-[DMA Read] Request device [00:05.0] PASID ffffffff
-fault addr 76406000 [fault reason 06] PTE Read access is not set
-As IPU is not an external facing device which is not risky, so use
-IOMMU passthrough mode for Intel IPUs.
-
-Change-Id: I6dcccdadac308cf42e20a18e1b593381391e3e6b
-Depends-On: Iacd67578e8c6a9b9ac73285f52b4081b72fb68a6
-Tracked-On: #JIITL8-411
-Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
-Signed-off-by: zouxiaoh <xiaohong.zou@intel.com>
-Signed-off-by: Xu Chongyang <chongyang.xu@intel.com>
-Patchset: cameras
----
- drivers/iommu/intel/iommu.c | 30 ++++++++++++++++++++++++++++++
- 1 file changed, 30 insertions(+)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 5a627e081797c..da866ac6b30ba 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -38,6 +38,12 @@
- #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
- #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
- #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
-+#define IS_INTEL_IPU(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL &&	\
-+			   ((pdev)->device == 0x9a19 ||		\
-+			    (pdev)->device == 0x9a39 ||		\
-+			    (pdev)->device == 0x4e19 ||		\
-+			    (pdev)->device == 0x465d ||		\
-+			    (pdev)->device == 0x1919))
- #define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL &&	\
- 			    ((pdev)->device == 0x9d3e))
- #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
-@@ -295,12 +301,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled);
- 
- static int dmar_map_gfx = 1;
- static int dmar_map_ipts = 1;
-+static int dmar_map_ipu = 1;
- static int intel_iommu_superpage = 1;
- static int iommu_identity_mapping;
- static int iommu_skip_te_disable;
- 
- #define IDENTMAP_GFX		2
- #define IDENTMAP_AZALIA		4
-+#define IDENTMAP_IPU		8
- #define IDENTMAP_IPTS		16
- 
- const struct iommu_ops intel_iommu_ops;
-@@ -2547,6 +2555,9 @@ static int device_def_domain_type(struct device *dev)
- 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
- 			return IOMMU_DOMAIN_IDENTITY;
- 
-+		if ((iommu_identity_mapping & IDENTMAP_IPU) && IS_INTEL_IPU(pdev))
-+			return IOMMU_DOMAIN_IDENTITY;
-+
- 		if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev))
- 			return IOMMU_DOMAIN_IDENTITY;
- 	}
-@@ -2856,6 +2867,9 @@ static int __init init_dmars(void)
- 	if (!dmar_map_gfx)
- 		iommu_identity_mapping |= IDENTMAP_GFX;
- 
-+	if (!dmar_map_ipu)
-+		iommu_identity_mapping |= IDENTMAP_IPU;
-+
- 	if (!dmar_map_ipts)
- 		iommu_identity_mapping |= IDENTMAP_IPTS;
- 
-@@ -4838,6 +4852,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
- 	dmar_map_gfx = 0;
- }
- 
-+static void quirk_iommu_ipu(struct pci_dev *dev)
-+{
-+	if (!IS_INTEL_IPU(dev))
-+		return;
-+
-+	if (risky_device(dev))
-+		return;
-+
-+	pci_info(dev, "Passthrough IOMMU for integrated Intel IPU\n");
-+	dmar_map_ipu = 0;
-+}
-+
- static void quirk_iommu_ipts(struct pci_dev *dev)
- {
- 	if (!IS_IPTS(dev))
-@@ -4849,6 +4875,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev)
- 	pci_info(dev, "Passthrough IOMMU for IPTS\n");
- 	dmar_map_ipts = 0;
- }
-+
- /* G4x/GM45 integrated gfx dmar support is totally busted. */
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
-@@ -4884,6 +4911,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
- 
-+/* disable IPU dmar support */
-+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_iommu_ipu);
-+
- /* disable IPTS dmar support */
- DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts);
- 
--- 
-2.42.0
-
-From 76fec27d978bf7708a60862d4aab2e1fe7ec3f27 Mon Sep 17 00:00:00 2001
-From: Daniel Scally <djrscally@gmail.com>
-Date: Sun, 10 Oct 2021 20:57:02 +0200
-Subject: [PATCH] platform/x86: int3472: Enable I2c daisy chain
-
-The TPS68470 PMIC has an I2C passthrough mode through which I2C traffic
-can be forwarded to a device connected to the PMIC as though it were
-connected directly to the system bus. Enable this mode when the chip
-is initialised.
-
-Signed-off-by: Daniel Scally <djrscally@gmail.com>
-Patchset: cameras
----
- drivers/platform/x86/intel/int3472/tps68470.c | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c
-index 1e107fd49f828..e3e1696e7f0ee 100644
---- a/drivers/platform/x86/intel/int3472/tps68470.c
-+++ b/drivers/platform/x86/intel/int3472/tps68470.c
-@@ -46,6 +46,13 @@ static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
- 		return ret;
- 	}
- 
-+	/* Enable I2C daisy chain */
-+	ret = regmap_write(regmap, TPS68470_REG_S_I2C_CTL, 0x03);
-+	if (ret) {
-+		dev_err(dev, "Failed to enable i2c daisy chain\n");
-+		return ret;
-+	}
-+
- 	dev_info(dev, "TPS68470 REVID: 0x%02x\n", version);
- 
- 	return 0;
--- 
-2.42.0
-
-From 232a0f88ecc21141c6f0d94cc74eb63c7869c217 Mon Sep 17 00:00:00 2001
-From: Daniel Scally <dan.scally@ideasonboard.com>
-Date: Thu, 2 Mar 2023 12:59:39 +0000
-Subject: [PATCH] platform/x86: int3472: Remap reset GPIO for INT347E
-
-ACPI _HID INT347E represents the OmniVision 7251 camera sensor. The
-driver for this sensor expects a single pin named "enable", but on
-some Microsoft Surface platforms the sensor is assigned a single
-GPIO who's type flag is INT3472_GPIO_TYPE_RESET.
-
-Remap the GPIO pin's function from "reset" to "enable". This is done
-outside of the existing remap table since it is a more widespread
-discrepancy than that method is designed for. Additionally swap the
-polarity of the pin to match the driver's expectation.
-
-Signed-off-by: Daniel Scally <dan.scally@ideasonboard.com>
-Patchset: cameras
----
- drivers/platform/x86/intel/int3472/discrete.c | 14 ++++++++++++++
- 1 file changed, 14 insertions(+)
-
-diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
-index e33c2d75975cf..c0c90ae66b705 100644
---- a/drivers/platform/x86/intel/int3472/discrete.c
-+++ b/drivers/platform/x86/intel/int3472/discrete.c
-@@ -57,6 +57,9 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
- 					  const char *func, u32 polarity)
- {
- 	char *path = agpio->resource_source.string_ptr;
-+	const struct acpi_device_id ov7251_ids[] = {
-+		{ "INT347E" },
-+	};
- 	struct gpiod_lookup *table_entry;
- 	struct acpi_device *adev;
- 	acpi_handle handle;
-@@ -67,6 +70,17 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
- 		return -EINVAL;
- 	}
- 
-+	/*
-+	 * In addition to the function remap table we need to bulk remap the
-+	 * "reset" GPIO for the OmniVision 7251 sensor, as the driver for that
-+	 * expects its only GPIO pin to be called "enable" (and to have the
-+	 * opposite polarity).
-+	 */
-+	if (!strcmp(func, "reset") && !acpi_match_device_ids(int3472->sensor, ov7251_ids)) {
-+		func = "enable";
-+		polarity = GPIO_ACTIVE_HIGH;
-+	}
-+
- 	status = acpi_get_handle(NULL, path, &handle);
- 	if (ACPI_FAILURE(status))
- 		return -EINVAL;
--- 
-2.42.0
-
-From 0cfd5c05a675388bbb2edfa87423dc5ad931cc97 Mon Sep 17 00:00:00 2001
-From: Daniel Scally <dan.scally@ideasonboard.com>
-Date: Tue, 21 Mar 2023 13:45:26 +0000
-Subject: [PATCH] media: i2c: Clarify that gain is Analogue gain in OV7251
-
-Update the control ID for the gain control in the ov7251 driver to
-V4L2_CID_ANALOGUE_GAIN.
-
-Signed-off-by: Daniel Scally <dan.scally@ideasonboard.com>
-Patchset: cameras
----
- drivers/media/i2c/ov7251.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c
-index 675fb37a6feae..43b30db08c9e4 100644
---- a/drivers/media/i2c/ov7251.c
-+++ b/drivers/media/i2c/ov7251.c
-@@ -1051,7 +1051,7 @@ static int ov7251_s_ctrl(struct v4l2_ctrl *ctrl)
- 	case V4L2_CID_EXPOSURE:
- 		ret = ov7251_set_exposure(ov7251, ctrl->val);
- 		break;
--	case V4L2_CID_GAIN:
-+	case V4L2_CID_ANALOGUE_GAIN:
- 		ret = ov7251_set_gain(ov7251, ctrl->val);
- 		break;
- 	case V4L2_CID_TEST_PATTERN:
-@@ -1551,7 +1551,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251)
- 	ov7251->exposure = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops,
- 					     V4L2_CID_EXPOSURE, 1, 32, 1, 32);
- 	ov7251->gain = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops,
--					 V4L2_CID_GAIN, 16, 1023, 1, 16);
-+					 V4L2_CID_ANALOGUE_GAIN, 16, 1023, 1, 16);
- 	v4l2_ctrl_new_std_menu_items(&ov7251->ctrls, &ov7251_ctrl_ops,
- 				     V4L2_CID_TEST_PATTERN,
- 				     ARRAY_SIZE(ov7251_test_pattern_menu) - 1,
--- 
-2.42.0
-
-From 18fa273c21f1dd86160f18242a81947392272443 Mon Sep 17 00:00:00 2001
-From: Daniel Scally <dan.scally@ideasonboard.com>
-Date: Wed, 22 Mar 2023 11:01:42 +0000
-Subject: [PATCH] media: v4l2-core: Acquire privacy led in
- v4l2_async_register_subdev()
-
-The current call to v4l2_subdev_get_privacy_led() is contained in
-v4l2_async_register_subdev_sensor(), but that function isn't used by
-all the sensor drivers. Move the acquisition of the privacy led to
-v4l2_async_register_subdev() instead.
-
-Signed-off-by: Daniel Scally <dan.scally@ideasonboard.com>
-Patchset: cameras
----
- drivers/media/v4l2-core/v4l2-async.c  | 4 ++++
- drivers/media/v4l2-core/v4l2-fwnode.c | 4 ----
- 2 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
-index 091e8cf4114ba..cca10f5355844 100644
---- a/drivers/media/v4l2-core/v4l2-async.c
-+++ b/drivers/media/v4l2-core/v4l2-async.c
-@@ -796,6 +796,10 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd)
- 
- 	INIT_LIST_HEAD(&sd->asc_list);
- 
-+	ret = v4l2_subdev_get_privacy_led(sd);
-+	if (ret < 0)
-+		return ret;
-+
- 	/*
- 	 * No reference taken. The reference is held by the device (struct
- 	 * v4l2_subdev.dev), and async sub-device does not exist independently
-diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
-index 7f181fbbb1407..1c0347de4e216 100644
---- a/drivers/media/v4l2-core/v4l2-fwnode.c
-+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
-@@ -1217,10 +1217,6 @@ int v4l2_async_register_subdev_sensor(struct v4l2_subdev *sd)
- 
- 	v4l2_async_subdev_nf_init(notifier, sd);
- 
--	ret = v4l2_subdev_get_privacy_led(sd);
--	if (ret < 0)
--		goto out_cleanup;
--
- 	ret = v4l2_async_nf_parse_fwnode_sensor(sd->dev, notifier);
- 	if (ret < 0)
- 		goto out_cleanup;
--- 
-2.42.0
-
-From 07e01113f2641afab78b155d42e9d9d399a9e164 Mon Sep 17 00:00:00 2001
-From: Kate Hsuan <hpa@redhat.com>
-Date: Tue, 21 Mar 2023 23:37:16 +0800
-Subject: [PATCH] platform: x86: int3472: Add MFD cell for tps68470 LED
-
-Add MFD cell for tps68470-led.
-
-Reviewed-by: Daniel Scally <dan.scally@ideasonboard.com>
-Signed-off-by: Kate Hsuan <hpa@redhat.com>
-Reviewed-by: Hans de Goede <hdegoede@redhat.com>
-Patchset: cameras
----
- drivers/platform/x86/intel/int3472/tps68470.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c
-index e3e1696e7f0ee..423dc555093f7 100644
---- a/drivers/platform/x86/intel/int3472/tps68470.c
-+++ b/drivers/platform/x86/intel/int3472/tps68470.c
-@@ -17,7 +17,7 @@
- #define DESIGNED_FOR_CHROMEOS		1
- #define DESIGNED_FOR_WINDOWS		2
- 
--#define TPS68470_WIN_MFD_CELL_COUNT	3
-+#define TPS68470_WIN_MFD_CELL_COUNT	4
- 
- static const struct mfd_cell tps68470_cros[] = {
- 	{ .name = "tps68470-gpio" },
-@@ -200,7 +200,8 @@ static int skl_int3472_tps68470_probe(struct i2c_client *client)
- 		cells[1].name = "tps68470-regulator";
- 		cells[1].platform_data = (void *)board_data->tps68470_regulator_pdata;
- 		cells[1].pdata_size = sizeof(struct tps68470_regulator_platform_data);
--		cells[2].name = "tps68470-gpio";
-+		cells[2].name = "tps68470-led";
-+		cells[3].name = "tps68470-gpio";
- 
- 		for (i = 0; i < board_data->n_gpiod_lookups; i++)
- 			gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_tables[i]);
--- 
-2.42.0
-
-From a704bf822539e09b00015110b48bc997692c92ce Mon Sep 17 00:00:00 2001
-From: Kate Hsuan <hpa@redhat.com>
-Date: Tue, 21 Mar 2023 23:37:17 +0800
-Subject: [PATCH] include: mfd: tps68470: Add masks for LEDA and LEDB
-
-Add flags for both LEDA(TPS68470_ILEDCTL_ENA), LEDB
-(TPS68470_ILEDCTL_ENB), and current control mask for LEDB
-(TPS68470_ILEDCTL_CTRLB)
-
-Reviewed-by: Daniel Scally <dan.scally@ideasonboard.com>
-Reviewed-by: Hans de Goede <hdegoede@redhat.com>
-Signed-off-by: Kate Hsuan <hpa@redhat.com>
-Patchset: cameras
----
- include/linux/mfd/tps68470.h | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h
-index 7807fa329db00..2d2abb25b944f 100644
---- a/include/linux/mfd/tps68470.h
-+++ b/include/linux/mfd/tps68470.h
-@@ -34,6 +34,7 @@
- #define TPS68470_REG_SGPO		0x22
- #define TPS68470_REG_GPDI		0x26
- #define TPS68470_REG_GPDO		0x27
-+#define TPS68470_REG_ILEDCTL		0x28
- #define TPS68470_REG_VCMVAL		0x3C
- #define TPS68470_REG_VAUX1VAL		0x3D
- #define TPS68470_REG_VAUX2VAL		0x3E
-@@ -94,4 +95,8 @@
- #define TPS68470_GPIO_MODE_OUT_CMOS	2
- #define TPS68470_GPIO_MODE_OUT_ODRAIN	3
- 
-+#define TPS68470_ILEDCTL_ENA		BIT(2)
-+#define TPS68470_ILEDCTL_ENB		BIT(6)
-+#define TPS68470_ILEDCTL_CTRLB		GENMASK(5, 4)
-+
- #endif /* __LINUX_MFD_TPS68470_H */
--- 
-2.42.0
-
-From c8a6ce96be3a4dca7e9e99613b28494d10b4ade0 Mon Sep 17 00:00:00 2001
-From: Kate Hsuan <hpa@redhat.com>
-Date: Tue, 21 Mar 2023 23:37:18 +0800
-Subject: [PATCH] leds: tps68470: Add LED control for tps68470
-
-There are two LED controllers, LEDA indicator LED and LEDB flash LED for
-tps68470. LEDA can be enabled by setting TPS68470_ILEDCTL_ENA. Moreover,
-tps68470 provides four levels of power status for LEDB. If the
-properties called "ti,ledb-current" can be found, the current will be
-set according to the property values. These two LEDs can be controlled
-through the LED class of sysfs (tps68470-leda and tps68470-ledb).
-
-Signed-off-by: Kate Hsuan <hpa@redhat.com>
-Reviewed-by: Hans de Goede <hdegoede@redhat.com>
-Patchset: cameras
----
- drivers/leds/Kconfig         |  12 +++
- drivers/leds/Makefile        |   1 +
- drivers/leds/leds-tps68470.c | 185 +++++++++++++++++++++++++++++++++++
- 3 files changed, 198 insertions(+)
- create mode 100644 drivers/leds/leds-tps68470.c
-
-diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
-index b92208eccdea9..312c0c21cc5ef 100644
---- a/drivers/leds/Kconfig
-+++ b/drivers/leds/Kconfig
-@@ -873,6 +873,18 @@ config LEDS_TPS6105X
- 	  It is a single boost converter primarily for white LEDs and
- 	  audio amplifiers.
- 
-+config LEDS_TPS68470
-+	tristate "LED support for TI TPS68470"
-+	depends on LEDS_CLASS
-+	depends on INTEL_SKL_INT3472
-+	help
-+	  This driver supports TPS68470 PMIC with LED chip.
-+	  It provides two LED controllers, with the ability to drive 2
-+	  indicator LEDs and 2 flash LEDs.
-+
-+	  To compile this driver as a module, choose M and it will be
-+	  called leds-tps68470
-+
- config LEDS_IP30
- 	tristate "LED support for SGI Octane machines"
- 	depends on LEDS_CLASS
-diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
-index d7348e8bc019a..10caea4e7c614 100644
---- a/drivers/leds/Makefile
-+++ b/drivers/leds/Makefile
-@@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
- obj-$(CONFIG_LEDS_TI_LMU_COMMON)	+= leds-ti-lmu-common.o
- obj-$(CONFIG_LEDS_TLC591XX)		+= leds-tlc591xx.o
- obj-$(CONFIG_LEDS_TPS6105X)		+= leds-tps6105x.o
-+obj-$(CONFIG_LEDS_TPS68470)		+= leds-tps68470.o
- obj-$(CONFIG_LEDS_TURRIS_OMNIA)		+= leds-turris-omnia.o
- obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
- obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
-diff --git a/drivers/leds/leds-tps68470.c b/drivers/leds/leds-tps68470.c
-new file mode 100644
-index 0000000000000..35aeb5db89c8f
---- /dev/null
-+++ b/drivers/leds/leds-tps68470.c
-@@ -0,0 +1,185 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * LED driver for TPS68470 PMIC
-+ *
-+ * Copyright (C) 2023 Red Hat
-+ *
-+ * Authors:
-+ *	Kate Hsuan <hpa@redhat.com>
-+ */
-+
-+#include <linux/leds.h>
-+#include <linux/mfd/tps68470.h>
-+#include <linux/module.h>
-+#include <linux/platform_device.h>
-+#include <linux/property.h>
-+#include <linux/regmap.h>
-+
-+
-+#define lcdev_to_led(led_cdev) \
-+	container_of(led_cdev, struct tps68470_led, lcdev)
-+
-+#define led_to_tps68470(led, index) \
-+	container_of(led, struct tps68470_device, leds[index])
-+
-+enum tps68470_led_ids {
-+	TPS68470_ILED_A,
-+	TPS68470_ILED_B,
-+	TPS68470_NUM_LEDS
-+};
-+
-+static const char *tps68470_led_names[] = {
-+	[TPS68470_ILED_A] = "tps68470-iled_a",
-+	[TPS68470_ILED_B] = "tps68470-iled_b",
-+};
-+
-+struct tps68470_led {
-+	unsigned int led_id;
-+	struct led_classdev lcdev;
-+};
-+
-+struct tps68470_device {
-+	struct device *dev;
-+	struct regmap *regmap;
-+	struct tps68470_led leds[TPS68470_NUM_LEDS];
-+};
-+
-+enum ctrlb_current {
-+	CTRLB_2MA	= 0,
-+	CTRLB_4MA	= 1,
-+	CTRLB_8MA	= 2,
-+	CTRLB_16MA	= 3,
-+};
-+
-+static int tps68470_brightness_set(struct led_classdev *led_cdev, enum led_brightness brightness)
-+{
-+	struct tps68470_led *led = lcdev_to_led(led_cdev);
-+	struct tps68470_device *tps68470 = led_to_tps68470(led, led->led_id);
-+	struct regmap *regmap = tps68470->regmap;
-+
-+	switch (led->led_id) {
-+	case TPS68470_ILED_A:
-+		return regmap_update_bits(regmap, TPS68470_REG_ILEDCTL, TPS68470_ILEDCTL_ENA,
-+					  brightness ? TPS68470_ILEDCTL_ENA : 0);
-+	case TPS68470_ILED_B:
-+		return regmap_update_bits(regmap, TPS68470_REG_ILEDCTL, TPS68470_ILEDCTL_ENB,
-+					  brightness ? TPS68470_ILEDCTL_ENB : 0);
-+	}
-+	return -EINVAL;
-+}
-+
-+static enum led_brightness tps68470_brightness_get(struct led_classdev *led_cdev)
-+{
-+	struct tps68470_led *led = lcdev_to_led(led_cdev);
-+	struct tps68470_device *tps68470 = led_to_tps68470(led, led->led_id);
-+	struct regmap *regmap = tps68470->regmap;
-+	int ret = 0;
-+	int value = 0;
-+
-+	ret =  regmap_read(regmap, TPS68470_REG_ILEDCTL, &value);
-+	if (ret)
-+		return dev_err_probe(led_cdev->dev, -EINVAL, "failed on reading register\n");
-+
-+	switch (led->led_id) {
-+	case TPS68470_ILED_A:
-+		value = value & TPS68470_ILEDCTL_ENA;
-+		break;
-+	case TPS68470_ILED_B:
-+		value = value & TPS68470_ILEDCTL_ENB;
-+		break;
-+	}
-+
-+	return value ? LED_ON : LED_OFF;
-+}
-+
-+
-+static int tps68470_ledb_current_init(struct platform_device *pdev,
-+				      struct tps68470_device *tps68470)
-+{
-+	int ret = 0;
-+	unsigned int curr;
-+
-+	/* configure LEDB current if the properties can be got */
-+	if (!device_property_read_u32(&pdev->dev, "ti,ledb-current", &curr)) {
-+		if (curr > CTRLB_16MA) {
-+			dev_err(&pdev->dev,
-+				"Invalid LEDB current value: %d\n",
-+				curr);
-+			return -EINVAL;
-+		}
-+		ret = regmap_update_bits(tps68470->regmap, TPS68470_REG_ILEDCTL,
-+					 TPS68470_ILEDCTL_CTRLB, curr);
-+	}
-+	return ret;
-+}
-+
-+static int tps68470_leds_probe(struct platform_device *pdev)
-+{
-+	int i = 0;
-+	int ret = 0;
-+	struct tps68470_device *tps68470;
-+	struct tps68470_led *led;
-+	struct led_classdev *lcdev;
-+
-+	tps68470 = devm_kzalloc(&pdev->dev, sizeof(struct tps68470_device),
-+				GFP_KERNEL);
-+	if (!tps68470)
-+		return -ENOMEM;
-+
-+	tps68470->dev = &pdev->dev;
-+	tps68470->regmap = dev_get_drvdata(pdev->dev.parent);
-+
-+	for (i = 0; i < TPS68470_NUM_LEDS; i++) {
-+		led = &tps68470->leds[i];
-+		lcdev = &led->lcdev;
-+
-+		led->led_id = i;
-+
-+		lcdev->name = devm_kasprintf(tps68470->dev, GFP_KERNEL, "%s::%s",
-+					     tps68470_led_names[i], LED_FUNCTION_INDICATOR);
-+		if (!lcdev->name)
-+			return -ENOMEM;
-+
-+		lcdev->max_brightness = 1;
-+		lcdev->brightness = 0;
-+		lcdev->brightness_set_blocking = tps68470_brightness_set;
-+		lcdev->brightness_get = tps68470_brightness_get;
-+		lcdev->dev = &pdev->dev;
-+
-+		ret = devm_led_classdev_register(tps68470->dev, lcdev);
-+		if (ret) {
-+			dev_err_probe(tps68470->dev, ret,
-+				      "error registering led\n");
-+			goto err_exit;
-+		}
-+
-+		if (i == TPS68470_ILED_B) {
-+			ret = tps68470_ledb_current_init(pdev, tps68470);
-+			if (ret)
-+				goto err_exit;
-+		}
-+	}
-+
-+err_exit:
-+	if (ret) {
-+		for (i = 0; i < TPS68470_NUM_LEDS; i++) {
-+			if (tps68470->leds[i].lcdev.name)
-+				devm_led_classdev_unregister(&pdev->dev,
-+							     &tps68470->leds[i].lcdev);
-+		}
-+	}
-+
-+	return ret;
-+}
-+static struct platform_driver tps68470_led_driver = {
-+	.driver = {
-+		   .name = "tps68470-led",
-+	},
-+	.probe = tps68470_leds_probe,
-+};
-+
-+module_platform_driver(tps68470_led_driver);
-+
-+MODULE_ALIAS("platform:tps68470-led");
-+MODULE_DESCRIPTION("LED driver for TPS68470 PMIC");
-+MODULE_LICENSE("GPL v2");
--- 
-2.42.0
-
-From 82252c3764ecee6c09218077759072f15001f9ee Mon Sep 17 00:00:00 2001
-From: Sachi King <nakato@nakato.io>
-Date: Sat, 29 May 2021 17:47:38 +1000
-Subject: [PATCH] ACPI: Add quirk for Surface Laptop 4 AMD missing irq 7
- override
-
-This patch is the work of Thomas Gleixner <tglx@linutronix.de> and is
-copied from:
-https://lore.kernel.org/lkml/87lf8ddjqx.ffs@nanos.tec.linutronix.de/
-
-This patch adds a quirk to the ACPI setup to patch in the the irq 7 pin
-setup that is missing in the laptops ACPI table.
-
-This patch was used for validation of the issue, and is not a proper
-fix, but is probably a better temporary hack than continuing to probe
-the Legacy PIC and run with the PIC in an unknown state.
-
-Patchset: amd-gpio
----
- arch/x86/kernel/acpi/boot.c | 17 +++++++++++++++++
- 1 file changed, 17 insertions(+)
-
-diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index c55c0ef47a187..f29740cf89ff6 100644
---- a/arch/x86/kernel/acpi/boot.c
-+++ b/arch/x86/kernel/acpi/boot.c
-@@ -22,6 +22,7 @@
- #include <linux/efi-bgrt.h>
- #include <linux/serial_core.h>
- #include <linux/pgtable.h>
-+#include <linux/dmi.h>
- 
- #include <asm/e820/api.h>
- #include <asm/irqdomain.h>
-@@ -1255,6 +1256,17 @@ static void __init mp_config_acpi_legacy_irqs(void)
- 	}
- }
- 
-+static const struct dmi_system_id surface_quirk[] __initconst = {
-+	{
-+		.ident = "Microsoft Surface Laptop 4 (AMD)",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
-+			DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1952:1953")
-+		},
-+	},
-+	{}
-+};
-+
- /*
-  * Parse IOAPIC related entries in MADT
-  * returns 0 on success, < 0 on error
-@@ -1310,6 +1322,11 @@ static int __init acpi_parse_madt_ioapic_entries(void)
- 		acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0,
- 				      acpi_gbl_FADT.sci_interrupt);
- 
-+	if (dmi_check_system(surface_quirk)) {
-+		pr_warn("Surface hack: Override irq 7\n");
-+		mp_override_legacy_irq(7, 3, 3, 7);
-+	}
-+
- 	/* Fill in identity legacy mappings where no override */
- 	mp_config_acpi_legacy_irqs();
- 
--- 
-2.42.0
-
-From 52e3f50633128a93bf99ca5c97f98929da66a9ed Mon Sep 17 00:00:00 2001
-From: Maximilian Luz <luzmaximilian@gmail.com>
-Date: Thu, 3 Jun 2021 14:04:26 +0200
-Subject: [PATCH] ACPI: Add AMD 13" Surface Laptop 4 model to irq 7 override
- quirk
-
-The 13" version of the Surface Laptop 4 has the same problem as the 15"
-version, but uses a different SKU. Add that SKU to the quirk as well.
-
-Patchset: amd-gpio
----
- arch/x86/kernel/acpi/boot.c | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index f29740cf89ff6..247d2a8bcdf4b 100644
---- a/arch/x86/kernel/acpi/boot.c
-+++ b/arch/x86/kernel/acpi/boot.c
-@@ -1258,12 +1258,19 @@ static void __init mp_config_acpi_legacy_irqs(void)
- 
- static const struct dmi_system_id surface_quirk[] __initconst = {
- 	{
--		.ident = "Microsoft Surface Laptop 4 (AMD)",
-+		.ident = "Microsoft Surface Laptop 4 (AMD 15\")",
- 		.matches = {
- 			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
- 			DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1952:1953")
- 		},
- 	},
-+	{
-+		.ident = "Microsoft Surface Laptop 4 (AMD 13\")",
-+		.matches = {
-+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
-+			DMI_MATCH(DMI_PRODUCT_SKU, "Surface_Laptop_4_1958:1959")
-+		},
-+	},
- 	{}
- };
- 
--- 
-2.42.0
-
-From 8cd23b1bb3a8b7a3ef7cec2c37e7e46e6397a858 Mon Sep 17 00:00:00 2001
-From: "Bart Groeneveld | GPX Solutions B.V" <bart@gpxbv.nl>
-Date: Mon, 5 Dec 2022 16:08:46 +0100
-Subject: [PATCH] acpi: allow usage of acpi_tad on HW-reduced platforms
-
-The specification [1] allows so-called HW-reduced platforms,
-which do not implement everything, especially the wakeup related stuff.
-
-In that case, it is still usable as a RTC. This is helpful for [2]
-and [3], which is about a device with no other working RTC,
-but it does have an HW-reduced TAD, which can be used as a RTC instead.
-
-[1]: https://uefi.org/specs/ACPI/6.5/09_ACPI_Defined_Devices_and_Device_Specific_Objects.html#time-and-alarm-device
-[2]: https://bugzilla.kernel.org/show_bug.cgi?id=212313
-[3]: https://github.com/linux-surface/linux-surface/issues/415
-
-Signed-off-by: Bart Groeneveld | GPX Solutions B.V. <bart@gpxbv.nl>
-Patchset: rtc
----
- drivers/acpi/acpi_tad.c | 36 ++++++++++++++++++++++++------------
- 1 file changed, 24 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
-index 33c3b16af556b..900445d06623d 100644
---- a/drivers/acpi/acpi_tad.c
-+++ b/drivers/acpi/acpi_tad.c
-@@ -432,6 +432,14 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
- 
- static DEVICE_ATTR_RO(caps);
- 
-+static struct attribute *acpi_tad_attrs[] = {
-+	&dev_attr_caps.attr,
-+	NULL,
-+};
-+static const struct attribute_group acpi_tad_attr_group = {
-+	.attrs	= acpi_tad_attrs,
-+};
-+
- static ssize_t ac_alarm_store(struct device *dev, struct device_attribute *attr,
- 			      const char *buf, size_t count)
- {
-@@ -480,15 +488,14 @@ static ssize_t ac_status_show(struct device *dev, struct device_attribute *attr,
- 
- static DEVICE_ATTR_RW(ac_status);
- 
--static struct attribute *acpi_tad_attrs[] = {
--	&dev_attr_caps.attr,
-+static struct attribute *acpi_tad_ac_attrs[] = {
- 	&dev_attr_ac_alarm.attr,
- 	&dev_attr_ac_policy.attr,
- 	&dev_attr_ac_status.attr,
- 	NULL,
- };
--static const struct attribute_group acpi_tad_attr_group = {
--	.attrs	= acpi_tad_attrs,
-+static const struct attribute_group acpi_tad_ac_attr_group = {
-+	.attrs	= acpi_tad_ac_attrs,
- };
- 
- static ssize_t dc_alarm_store(struct device *dev, struct device_attribute *attr,
-@@ -564,13 +571,18 @@ static int acpi_tad_remove(struct platform_device *pdev)
- 
- 	pm_runtime_get_sync(dev);
- 
-+	if (dd->capabilities & ACPI_TAD_AC_WAKE)
-+		sysfs_remove_group(&dev->kobj, &acpi_tad_ac_attr_group);
-+
- 	if (dd->capabilities & ACPI_TAD_DC_WAKE)
- 		sysfs_remove_group(&dev->kobj, &acpi_tad_dc_attr_group);
- 
- 	sysfs_remove_group(&dev->kobj, &acpi_tad_attr_group);
- 
--	acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER);
--	acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER);
-+	if (dd->capabilities & ACPI_TAD_AC_WAKE) {
-+		acpi_tad_disable_timer(dev, ACPI_TAD_AC_TIMER);
-+		acpi_tad_clear_status(dev, ACPI_TAD_AC_TIMER);
-+	}
- 	if (dd->capabilities & ACPI_TAD_DC_WAKE) {
- 		acpi_tad_disable_timer(dev, ACPI_TAD_DC_TIMER);
- 		acpi_tad_clear_status(dev, ACPI_TAD_DC_TIMER);
-@@ -613,12 +625,6 @@ static int acpi_tad_probe(struct platform_device *pdev)
- 		goto remove_handler;
- 	}
- 
--	if (!acpi_has_method(handle, "_PRW")) {
--		dev_info(dev, "Missing _PRW\n");
--		ret = -ENODEV;
--		goto remove_handler;
--	}
--
- 	dd = devm_kzalloc(dev, sizeof(*dd), GFP_KERNEL);
- 	if (!dd) {
- 		ret = -ENOMEM;
-@@ -649,6 +655,12 @@ static int acpi_tad_probe(struct platform_device *pdev)
- 	if (ret)
- 		goto fail;
- 
-+	if (caps & ACPI_TAD_AC_WAKE) {
-+		ret = sysfs_create_group(&dev->kobj, &acpi_tad_ac_attr_group);
-+		if (ret)
-+			goto fail;
-+	}
-+
- 	if (caps & ACPI_TAD_DC_WAKE) {
- 		ret = sysfs_create_group(&dev->kobj, &acpi_tad_dc_attr_group);
- 		if (ret)
--- 
-2.42.0
-
diff --git a/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch b/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch
deleted file mode 100644
index a397014..0000000
--- a/patches/nobara/mt76:-mt7921:-Disable-powersave-features-by-default.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan200101 <sentrycraft123@gmail.com>
-Date: Mon, 27 Nov 2023 15:25:48 +0100
-Subject: [PATCH] mt76: mt7921: Disable powersave features by default
-
-This brings WiFi latency down considerably and makes latency consistent by
-disabling runtime PM and typical powersave features by default. The actual
-power consumption difference is inconsequential on desktops and laptops,
-while the performance difference is monumental. Latencies of 20+ ms are no
-longer observed after this change, and the connection is much more stable.
-
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/net/wireless/mediatek/mt76/mt7921/init.c | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
-index ff63f37f67d9..840b4c606c83 100644
---- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
-+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
-@@ -220,12 +220,6 @@ int mt7921_register_device(struct mt792x_dev *dev)
- 	dev->pm.idle_timeout = MT792x_PM_TIMEOUT;
- 	dev->pm.stats.last_wake_event = jiffies;
- 	dev->pm.stats.last_doze_event = jiffies;
--	if (!mt76_is_usb(&dev->mt76)) {
--		dev->pm.enable_user = true;
--		dev->pm.enable = true;
--		dev->pm.ds_enable_user = true;
--		dev->pm.ds_enable = true;
--	}
-
- 	if (!mt76_is_mmio(&dev->mt76))
- 		hw->extra_tx_headroom += MT_SDIO_TXD_SIZE + MT_SDIO_HDR_SIZE;
-@@ -240,6 +234,8 @@ int mt7921_register_device(struct mt792x_dev *dev)
- 	if (ret)
- 		return ret;
-
-+	hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
-+
- 	hw->wiphy->reg_notifier = mt7921_regd_notifier;
- 	dev->mphy.sband_2g.sband.ht_cap.cap |=
- 			IEEE80211_HT_CAP_LDPC_CODING |
diff --git a/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch b/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch
deleted file mode 100644
index 8431cf7..0000000
--- a/patches/nobara/set-ps4-bt-poll-rate-1000hz.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 0f2c07ab93dca496a1f34399ad2ff8a954690a72 Mon Sep 17 00:00:00 2001
-From: GloriousEggroll <gloriouseggroll@gmail.com>
-Date: Mon, 29 May 2023 17:15:14 -0600
-Subject: [PATCH] set ds controller bluetooth pollrate to 1 ms
-
----
- drivers/hid/hid-playstation.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
-index 8ac8f7b8e..1130663c3 100644
---- a/drivers/hid/hid-playstation.c
-+++ b/drivers/hid/hid-playstation.c
-@@ -330,8 +330,8 @@ struct dualsense_output_report {
-  * 0x3F - disabled
-  */
- #define DS4_OUTPUT_HWCTL_BT_POLL_MASK	0x3F
--/* Default to 4ms poll interval, which is same as USB (not adjustable). */
--#define DS4_BT_DEFAULT_POLL_INTERVAL_MS	4
-+/* Default to 1ms poll interval (1000Hz, lower latency). */
-+#define DS4_BT_DEFAULT_POLL_INTERVAL_MS	1
- #define DS4_OUTPUT_HWCTL_CRC32		0x40
- #define DS4_OUTPUT_HWCTL_HID		0x80
- 
--- 
-2.40.1
-
diff --git a/patches/nobara/steam-deck.patch b/patches/nobara/steam-deck.patch
deleted file mode 100644
index 9eba750..0000000
--- a/patches/nobara/steam-deck.patch
+++ /dev/null
@@ -1,2497 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sat, 19 Feb 2022 16:08:36 -0800
-Subject: [PATCH] mfd: Add MFD core driver for Steam Deck
-
-Add MFD core driver for Steam Deck. Doesn't really do much so far
-besides instantiating a number of MFD cells that implement all the
-interesting functionality.
-
-(cherry picked from commit 5f534c2d6ebdefccb9c024eb0f013bc1c0c622d9)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/mfd/Kconfig     |  11 ++++
- drivers/mfd/Makefile    |   2 +
- drivers/mfd/steamdeck.c | 127 ++++++++++++++++++++++++++++++++++++++++
- 3 files changed, 140 insertions(+)
- create mode 100644 drivers/mfd/steamdeck.c
-
-diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
-index 8b93856de432..af335d9150e9 100644
---- a/drivers/mfd/Kconfig
-+++ b/drivers/mfd/Kconfig
-@@ -2260,5 +2260,16 @@ config MFD_RSMU_SPI
- 	  Additional drivers must be enabled in order to use the functionality
- 	  of the device.
-
-+config MFD_STEAMDECK
-+	tristate "Valve Steam Deck"
-+	select MFD_CORE
-+	depends on ACPI
-+	depends on X86_64 || COMPILE_TEST
-+	help
-+	  This driver registers various MFD cells that expose aspects
-+	  of Steam Deck specific ACPI functionality.
-+
-+	  Say N here, unless you are running on Steam Deck hardware.
-+
- endmenu
- endif
-diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
-index 7ed3ef4a698c..d01254ef0106 100644
---- a/drivers/mfd/Makefile
-+++ b/drivers/mfd/Makefile
-@@ -280,3 +280,5 @@ rsmu-i2c-objs			:= rsmu_core.o rsmu_i2c.o
- rsmu-spi-objs			:= rsmu_core.o rsmu_spi.o
- obj-$(CONFIG_MFD_RSMU_I2C)	+= rsmu-i2c.o
- obj-$(CONFIG_MFD_RSMU_SPI)	+= rsmu-spi.o
-+
-+obj-$(CONFIG_MFD_STEAMDECK)	+= steamdeck.o
-diff --git a/drivers/mfd/steamdeck.c b/drivers/mfd/steamdeck.c
-new file mode 100644
-index 000000000000..0e504b3c2796
---- /dev/null
-+++ b/drivers/mfd/steamdeck.c
-@@ -0,0 +1,127 @@
-+// SPDX-License-Identifier: GPL-2.0+
-+
-+/*
-+ * Steam Deck EC MFD core driver
-+ *
-+ * Copyright (C) 2021-2022 Valve Corporation
-+ *
-+ */
-+
-+#include <linux/acpi.h>
-+#include <linux/platform_device.h>
-+#include <linux/mfd/core.h>
-+
-+#define STEAMDECK_STA_OK			\
-+	(ACPI_STA_DEVICE_ENABLED |		\
-+	 ACPI_STA_DEVICE_PRESENT |		\
-+	 ACPI_STA_DEVICE_FUNCTIONING)
-+
-+struct steamdeck {
-+	struct acpi_device *adev;
-+	struct device *dev;
-+};
-+
-+#define STEAMDECK_ATTR_RO(_name, _method)				\
-+	static ssize_t _name##_show(struct device *dev,			\
-+				    struct device_attribute *attr,	\
-+				    char *buf)				\
-+	{								\
-+		struct steamdeck *sd = dev_get_drvdata(dev);		\
-+		unsigned long long val;					\
-+									\
-+		if (ACPI_FAILURE(acpi_evaluate_integer(			\
-+					 sd->adev->handle,		\
-+					 _method, NULL, &val)))		\
-+			return -EIO;					\
-+									\
-+		return sysfs_emit(buf, "%llu\n", val);			\
-+	}								\
-+	static DEVICE_ATTR_RO(_name)
-+
-+STEAMDECK_ATTR_RO(firmware_version, "PDFW");
-+STEAMDECK_ATTR_RO(board_id, "BOID");
-+
-+static struct attribute *steamdeck_attrs[] = {
-+	&dev_attr_firmware_version.attr,
-+	&dev_attr_board_id.attr,
-+	NULL
-+};
-+
-+ATTRIBUTE_GROUPS(steamdeck);
-+
-+static const struct mfd_cell steamdeck_cells[] = {
-+	{ .name = "steamdeck-hwmon"  },
-+	{ .name = "steamdeck-leds"   },
-+	{ .name = "steamdeck-extcon" },
-+};
-+
-+static void steamdeck_remove_sysfs_groups(void *data)
-+{
-+	struct steamdeck *sd = data;
-+
-+	sysfs_remove_groups(&sd->dev->kobj, steamdeck_groups);
-+}
-+
-+static int steamdeck_probe(struct platform_device *pdev)
-+{
-+	struct device *dev = &pdev->dev;
-+	unsigned long long sta;
-+	struct steamdeck *sd;
-+	acpi_status status;
-+	int ret;
-+
-+	sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL);
-+	if (!sd)
-+		return -ENOMEM;
-+	sd->adev = ACPI_COMPANION(dev);
-+	sd->dev = dev;
-+	platform_set_drvdata(pdev, sd);
-+
-+	status = acpi_evaluate_integer(sd->adev->handle, "_STA",
-+				       NULL, &sta);
-+	if (ACPI_FAILURE(status)) {
-+		dev_err(dev, "Status check failed (0x%x)\n", status);
-+		return -EINVAL;
-+	}
-+
-+	if ((sta & STEAMDECK_STA_OK) != STEAMDECK_STA_OK) {
-+		dev_err(dev, "Device is not ready\n");
-+		return -EINVAL;
-+	}
-+
-+	ret = sysfs_create_groups(&dev->kobj, steamdeck_groups);
-+	if (ret) {
-+		dev_err(dev, "Failed to create sysfs group\n");
-+		return ret;
-+	}
-+
-+	ret = devm_add_action_or_reset(dev, steamdeck_remove_sysfs_groups,
-+				       sd);
-+	if (ret) {
-+		dev_err(dev, "Failed to register devres action\n");
-+		return ret;
-+	}
-+
-+	return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
-+				    steamdeck_cells, ARRAY_SIZE(steamdeck_cells),
-+				    NULL, 0, NULL);
-+}
-+
-+static const struct acpi_device_id steamdeck_device_ids[] = {
-+	{ "VLV0100", 0 },
-+	{ "", 0 },
-+};
-+MODULE_DEVICE_TABLE(acpi, steamdeck_device_ids);
-+
-+static struct platform_driver steamdeck_driver = {
-+	.probe = steamdeck_probe,
-+	.driver = {
-+		.name = "steamdeck",
-+		.acpi_match_table = steamdeck_device_ids,
-+	},
-+};
-+module_platform_driver(steamdeck_driver);
-+
-+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
-+MODULE_DESCRIPTION("Steam Deck EC MFD core driver");
-+MODULE_LICENSE("GPL");
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sat, 19 Feb 2022 16:09:45 -0800
-Subject: [PATCH] hwmon: Add driver for Steam Deck's EC sensors
-
-Add driver for sensors exposed by EC firmware on Steam Deck hardware.
-
-(cherry picked from commit 6917aac77bee6185ae3920b936cdbe7876118c0b)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/hwmon/Kconfig           |  11 ++
- drivers/hwmon/Makefile          |   1 +
- drivers/hwmon/steamdeck-hwmon.c | 224 ++++++++++++++++++++++++++++++++
- 3 files changed, 236 insertions(+)
- create mode 100644 drivers/hwmon/steamdeck-hwmon.c
-
-diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
-index 7ac3daaf59ce..d784c78417cf 100644
---- a/drivers/hwmon/Kconfig
-+++ b/drivers/hwmon/Kconfig
-@@ -1900,6 +1900,17 @@ config SENSORS_SCH5636
- 	  This driver can also be built as a module. If so, the module
- 	  will be called sch5636.
-
-+config SENSORS_STEAMDECK
-+	tristate "Steam Deck EC sensors"
-+	depends on MFD_STEAMDECK
-+	help
-+	  If you say yes here you get support for the hardware
-+	  monitoring features exposed by EC firmware on Steam Deck
-+	  devices
-+
-+	  This driver can also be built as a module. If so, the module
-+	  will be called steamdeck-hwmon.
-+
- config SENSORS_STTS751
- 	tristate "ST Microelectronics STTS751"
- 	depends on I2C
-diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
-index 11d076cad8a2..d03c1e1d339f 100644
---- a/drivers/hwmon/Makefile
-+++ b/drivers/hwmon/Makefile
-@@ -191,6 +191,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
- obj-$(CONFIG_SENSORS_SMSC47M1)	+= smsc47m1.o
- obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
- obj-$(CONFIG_SENSORS_SPARX5)	+= sparx5-temp.o
-+obj-$(CONFIG_SENSORS_STEAMDECK) += steamdeck-hwmon.o
- obj-$(CONFIG_SENSORS_STTS751)	+= stts751.o
- obj-$(CONFIG_SENSORS_SY7636A)	+= sy7636a-hwmon.o
- obj-$(CONFIG_SENSORS_AMC6821)	+= amc6821.o
-diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c
-new file mode 100644
-index 000000000000..fab9e9460bd4
---- /dev/null
-+++ b/drivers/hwmon/steamdeck-hwmon.c
-@@ -0,0 +1,224 @@
-+// SPDX-License-Identifier: GPL-2.0+
-+/*
-+ * Steam Deck EC sensors driver
-+ *
-+ * Copyright (C) 2021-2022 Valve Corporation
-+ */
-+
-+#include <linux/acpi.h>
-+#include <linux/hwmon.h>
-+#include <linux/platform_device.h>
-+
-+#define STEAMDECK_HWMON_NAME	"steamdeck-hwmon"
-+
-+struct steamdeck_hwmon {
-+	struct acpi_device *adev;
-+};
-+
-+static long
-+steamdeck_hwmon_get(struct steamdeck_hwmon *sd, const char *method)
-+{
-+	unsigned long long val;
-+	if (ACPI_FAILURE(acpi_evaluate_integer(sd->adev->handle,
-+					       (char *)method, NULL, &val)))
-+		return -EIO;
-+
-+	return val;
-+}
-+
-+static int
-+steamdeck_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
-+		     u32 attr, int channel, long *out)
-+{
-+	struct steamdeck_hwmon *sd = dev_get_drvdata(dev);
-+
-+	switch (type) {
-+	case hwmon_curr:
-+		if (attr != hwmon_curr_input)
-+			return -EOPNOTSUPP;
-+
-+		*out = steamdeck_hwmon_get(sd, "PDAM");
-+		if (*out < 0)
-+			return *out;
-+		break;
-+	case hwmon_in:
-+		if (attr != hwmon_in_input)
-+			return -EOPNOTSUPP;
-+
-+		*out = steamdeck_hwmon_get(sd, "PDVL");
-+		if (*out < 0)
-+			return *out;
-+		break;
-+	case hwmon_temp:
-+		if (attr != hwmon_temp_input)
-+			return -EOPNOTSUPP;
-+
-+		*out = steamdeck_hwmon_get(sd, "BATT");
-+		if (*out < 0)
-+			return *out;
-+		/*
-+		 * Assuming BATT returns deg C we need to mutiply it
-+		 * by 1000 to convert to mC
-+		 */
-+		*out *= 1000;
-+		break;
-+	case hwmon_fan:
-+		switch (attr) {
-+		case hwmon_fan_input:
-+			*out = steamdeck_hwmon_get(sd, "FANR");
-+			if (*out < 0)
-+				return *out;
-+			break;
-+		case hwmon_fan_target:
-+			*out = steamdeck_hwmon_get(sd, "FSSR");
-+			if (*out < 0)
-+				return *out;
-+			break;
-+		case hwmon_fan_fault:
-+			*out = steamdeck_hwmon_get(sd, "FANC");
-+			if (*out < 0)
-+				return *out;
-+			/*
-+			 * FANC (Fan check):
-+			 * 0: Abnormal
-+			 * 1: Normal
-+			 */
-+			*out = !*out;
-+			break;
-+		default:
-+			return -EOPNOTSUPP;
-+		}
-+		break;
-+	default:
-+		return -EOPNOTSUPP;
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+steamdeck_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
-+			    u32 attr, int channel, const char **str)
-+{
-+	switch (type) {
-+		/*
-+		 * These two aren't, strictly speaking, measured. EC
-+		 * firmware just reports what PD negotiation resulted
-+		 * in.
-+		 */
-+	case hwmon_curr:
-+		*str = "PD Contract Current";
-+		break;
-+	case hwmon_in:
-+		*str = "PD Contract Voltage";
-+		break;
-+	case hwmon_temp:
-+		*str = "Battery Temp";
-+		break;
-+	case hwmon_fan:
-+		*str = "System Fan";
-+		break;
-+	default:
-+		return -EOPNOTSUPP;
-+	}
-+
-+	return 0;
-+}
-+
-+static int
-+steamdeck_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
-+		      u32 attr, int channel, long val)
-+{
-+	struct steamdeck_hwmon *sd = dev_get_drvdata(dev);
-+
-+	if (type != hwmon_fan ||
-+	    attr != hwmon_fan_target)
-+		return -EOPNOTSUPP;
-+
-+	val = clamp_val(val, 0, 7300);
-+
-+	if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle,
-+						    "FANS", val)))
-+		return -EIO;
-+
-+	return 0;
-+}
-+
-+static umode_t
-+steamdeck_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
-+			   u32 attr, int channel)
-+{
-+	if (type == hwmon_fan &&
-+	    attr == hwmon_fan_target)
-+		return 0644;
-+
-+	return 0444;
-+}
-+
-+static const struct hwmon_channel_info *steamdeck_hwmon_info[] = {
-+	HWMON_CHANNEL_INFO(in,
-+			   HWMON_I_INPUT | HWMON_I_LABEL),
-+	HWMON_CHANNEL_INFO(curr,
-+			   HWMON_C_INPUT | HWMON_C_LABEL),
-+	HWMON_CHANNEL_INFO(temp,
-+			   HWMON_T_INPUT | HWMON_T_LABEL),
-+	HWMON_CHANNEL_INFO(fan,
-+			   HWMON_F_INPUT | HWMON_F_LABEL |
-+			   HWMON_F_TARGET | HWMON_F_FAULT),
-+	NULL
-+};
-+
-+static const struct hwmon_ops steamdeck_hwmon_ops = {
-+	.is_visible = steamdeck_hwmon_is_visible,
-+	.read = steamdeck_hwmon_read,
-+	.read_string = steamdeck_hwmon_read_string,
-+	.write = steamdeck_hwmon_write,
-+};
-+
-+static const struct hwmon_chip_info steamdeck_hwmon_chip_info = {
-+	.ops = &steamdeck_hwmon_ops,
-+	.info = steamdeck_hwmon_info,
-+};
-+
-+static int steamdeck_hwmon_probe(struct platform_device *pdev)
-+{
-+	struct device *dev = &pdev->dev;
-+	struct steamdeck_hwmon *sd;
-+	struct device *hwmon;
-+
-+	sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL);
-+	if (!sd)
-+		return -ENOMEM;
-+
-+	sd->adev = ACPI_COMPANION(dev->parent);
-+	hwmon = devm_hwmon_device_register_with_info(dev,
-+						     "steamdeck_hwmon",
-+						     sd,
-+						     &steamdeck_hwmon_chip_info,
-+						     NULL);
-+	if (IS_ERR(hwmon)) {
-+		dev_err(dev, "Failed to register HWMON device");
-+		return PTR_ERR(hwmon);
-+	}
-+
-+	return 0;
-+}
-+
-+static const struct platform_device_id steamdeck_hwmon_id_table[] = {
-+	{ .name = STEAMDECK_HWMON_NAME },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(platform, steamdeck_hwmon_id_table);
-+
-+static struct platform_driver steamdeck_hwmon_driver = {
-+	.probe = steamdeck_hwmon_probe,
-+	.driver = {
-+		.name = STEAMDECK_HWMON_NAME,
-+	},
-+	.id_table = steamdeck_hwmon_id_table,
-+};
-+module_platform_driver(steamdeck_hwmon_driver);
-+
-+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
-+MODULE_DESCRIPTION("Steam Deck EC sensors driver");
-+MODULE_LICENSE("GPL");
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sun, 27 Feb 2022 12:58:05 -0800
-Subject: [PATCH] leds: steamdeck: Add support for Steam Deck LED
-
-(cherry picked from commit 85a86d19aa7022ff0555023d53aef78323a42d0c)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/leds/Kconfig          |  7 ++++
- drivers/leds/Makefile         |  1 +
- drivers/leds/leds-steamdeck.c | 74 +++++++++++++++++++++++++++++++++++
- 3 files changed, 82 insertions(+)
- create mode 100644 drivers/leds/leds-steamdeck.c
-
-diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
-index 499d0f215a8b..d1d761695cd6 100644
---- a/drivers/leds/Kconfig
-+++ b/drivers/leds/Kconfig
-@@ -864,6 +864,13 @@ config LEDS_ACER_A500
- 	  This option enables support for the Power Button LED of
- 	  Acer Iconia Tab A500.
-
-+config LEDS_STEAMDECK
-+	tristate "LED support for Steam Deck"
-+	depends on LEDS_CLASS && MFD_STEAMDECK
-+	help
-+	  This option enabled support for the status LED (next to the
-+	  power button) on Steam Deck
-+
- source "drivers/leds/blink/Kconfig"
-
- comment "Flash and Torch LED drivers"
-diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
-index 4fd2f92cd198..130a1c175dde 100644
---- a/drivers/leds/Makefile
-+++ b/drivers/leds/Makefile
-@@ -75,6 +75,7 @@
- obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
- obj-$(CONFIG_LEDS_REGULATOR)		+= leds-regulator.o
- obj-$(CONFIG_LEDS_SC27XX_BLTC)		+= leds-sc27xx-bltc.o
-+obj-$(CONFIG_LEDS_STEAMDECK)		+= leds-steamdeck.o
- obj-$(CONFIG_LEDS_SUNFIRE)		+= leds-sunfire.o
- obj-$(CONFIG_LEDS_SYSCON)		+= leds-syscon.o
- obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
-diff --git a/drivers/leds/leds-steamdeck.c b/drivers/leds/leds-steamdeck.c
-new file mode 100644
-index 000000000000..686500b8de73
---- /dev/null
-+++ b/drivers/leds/leds-steamdeck.c
-@@ -0,0 +1,74 @@
-+// SPDX-License-Identifier: GPL-2.0+
-+
-+/*
-+ * Steam Deck EC MFD LED cell driver
-+ *
-+ * Copyright (C) 2021-2022 Valve Corporation
-+ *
-+ */
-+
-+#include <linux/acpi.h>
-+#include <linux/leds.h>
-+#include <linux/platform_device.h>
-+
-+struct steamdeck_led {
-+	struct acpi_device *adev;
-+	struct led_classdev cdev;
-+};
-+
-+static int steamdeck_leds_brightness_set(struct led_classdev *cdev,
-+					 enum led_brightness value)
-+{
-+	struct steamdeck_led *sd = container_of(cdev, struct steamdeck_led,
-+						cdev);
-+
-+	if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle,
-+						    "CHBV", value)))
-+		return -EIO;
-+
-+	return 0;
-+}
-+
-+static int steamdeck_leds_probe(struct platform_device *pdev)
-+{
-+  	struct device *dev = &pdev->dev;
-+	struct steamdeck_led *sd;
-+	int ret;
-+
-+	sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL);
-+	if (!sd)
-+		return -ENOMEM;
-+
-+	sd->adev = ACPI_COMPANION(dev->parent);
-+
-+	sd->cdev.name = "status:white";
-+	sd->cdev.brightness_set_blocking = steamdeck_leds_brightness_set;
-+	sd->cdev.max_brightness = 100;
-+
-+	ret = devm_led_classdev_register(dev, &sd->cdev);
-+	if (ret) {
-+		dev_err(dev, "Failed to register LEDs device: %d\n", ret);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static const struct platform_device_id steamdeck_leds_id_table[] = {
-+	{ .name = "steamdeck-leds" },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(platform, steamdeck_leds_id_table);
-+
-+static struct platform_driver steamdeck_leds_driver = {
-+	.probe = steamdeck_leds_probe,
-+	.driver = {
-+		.name = "steamdeck-leds",
-+	},
-+	.id_table = steamdeck_leds_id_table,
-+};
-+module_platform_driver(steamdeck_leds_driver);
-+
-+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
-+MODULE_DESCRIPTION("Steam Deck LEDs driver");
-+MODULE_LICENSE("GPL");
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sun, 27 Feb 2022 14:46:08 -0800
-Subject: [PATCH] extcon: Add driver for Steam Deck
-
-(cherry picked from commit f9f2eddae582ae39d5f89c1218448fc259b90aa8)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/extcon/Kconfig            |   7 ++
- drivers/extcon/Makefile           |   1 +
- drivers/extcon/extcon-steamdeck.c | 180 ++++++++++++++++++++++++++++++
- 3 files changed, 188 insertions(+)
- create mode 100644 drivers/extcon/extcon-steamdeck.c
-
-diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
-index 290186e44e6b..4d444a9e2c1f 100644
---- a/drivers/extcon/Kconfig
-+++ b/drivers/extcon/Kconfig
-@@ -189,4 +189,11 @@ config EXTCON_USBC_TUSB320
- 	  Say Y here to enable support for USB Type C cable detection extcon
- 	  support using a TUSB320.
-
-+config EXTCON_STEAMDECK
-+	tristate "Steam Deck extcon support"
-+	depends on MFD_STEAMDECK
-+	help
-+	  Say Y here to enable support of USB Type C cable detection extcon
-+	  support on Steam Deck devices
-+
- endif
-diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
-index 1b390d934ca9..1c7e217f29e4 100644
---- a/drivers/extcon/Makefile
-+++ b/drivers/extcon/Makefile
-@@ -25,3 +25,4 @@ obj-$(CONFIG_EXTCON_SM5502)	+= extcon-sm5502.o
- obj-$(CONFIG_EXTCON_USB_GPIO)	+= extcon-usb-gpio.o
- obj-$(CONFIG_EXTCON_USBC_CROS_EC) += extcon-usbc-cros-ec.o
- obj-$(CONFIG_EXTCON_USBC_TUSB320) += extcon-usbc-tusb320.o
-+obj-$(CONFIG_EXTCON_STEAMDECK)  += extcon-steamdeck.o
-diff --git a/drivers/extcon/extcon-steamdeck.c b/drivers/extcon/extcon-steamdeck.c
-new file mode 100644
-index 000000000000..74f190adc8ea
---- /dev/null
-+++ b/drivers/extcon/extcon-steamdeck.c
-@@ -0,0 +1,180 @@
-+
-+#include <linux/acpi.h>
-+#include <linux/platform_device.h>
-+#include <linux/extcon-provider.h>
-+
-+#define ACPI_STEAMDECK_NOTIFY_STATUS	0x80
-+
-+/* 0 - port connected, 1 -port disconnected */
-+#define ACPI_STEAMDECK_PORT_CONNECT	BIT(0)
-+/* 0 - Upstream Facing Port, 1 - Downdstream Facing Port */
-+#define ACPI_STEAMDECK_CUR_DATA_ROLE	BIT(3)
-+/*
-+ * Debouncing delay to allow negotiation process to settle. 2s value
-+ * was arrived at via trial and error.
-+ */
-+#define STEAMDECK_ROLE_SWITCH_DELAY	(msecs_to_jiffies(2000))
-+
-+struct steamdeck_extcon {
-+	struct acpi_device *adev;
-+	struct delayed_work role_work;
-+	struct extcon_dev *edev;
-+	struct device *dev;
-+};
-+
-+static int steamdeck_read_pdcs(struct steamdeck_extcon *sd, unsigned long long *pdcs)
-+{
-+	acpi_status status;
-+
-+	status = acpi_evaluate_integer(sd->adev->handle, "PDCS", NULL, pdcs);
-+	if (ACPI_FAILURE(status)) {
-+		dev_err(sd->dev, "PDCS evaluation failed: %s\n",
-+			acpi_format_exception(status));
-+		return -EIO;
-+	}
-+
-+	return 0;
-+}
-+
-+static void steamdeck_usb_role_work(struct work_struct *work)
-+{
-+	struct steamdeck_extcon *sd =
-+		container_of(work, struct steamdeck_extcon, role_work.work);
-+	unsigned long long pdcs;
-+	bool usb_host;
-+
-+	if (steamdeck_read_pdcs(sd, &pdcs))
-+		return;
-+
-+	/*
-+	 * We only care about these two
-+	 */
-+	pdcs &= ACPI_STEAMDECK_PORT_CONNECT | ACPI_STEAMDECK_CUR_DATA_ROLE;
-+
-+	/*
-+	 * For "connect" events our role is determined by a bit in
-+	 * PDCS, for "disconnect" we switch to being a gadget
-+	 * unconditionally. The thinking for the latter is we don't
-+	 * want to start acting as a USB host until we get
-+	 * confirmation from the firmware that we are a USB host
-+	 */
-+	usb_host = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ?
-+		pdcs & ACPI_STEAMDECK_CUR_DATA_ROLE : false;
-+
-+	dev_dbg(sd->dev, "USB role is %s\n", usb_host ? "host" : "device");
-+	WARN_ON(extcon_set_state_sync(sd->edev, EXTCON_USB_HOST,
-+				      usb_host));
-+
-+}
-+
-+static void steamdeck_notify(acpi_handle handle, u32 event, void *context)
-+{
-+	struct device *dev = context;
-+	struct steamdeck_extcon *sd = dev_get_drvdata(dev);
-+	unsigned long long pdcs;
-+	unsigned long delay;
-+
-+	switch (event) {
-+	case ACPI_STEAMDECK_NOTIFY_STATUS:
-+		if (steamdeck_read_pdcs(sd, &pdcs))
-+			return;
-+		/*
-+		 * We process "disconnect" events immediately and
-+		 * "connect" events with a delay to give the HW time
-+		 * to settle. For example attaching USB hub (at least
-+		 * for HW used for testing) will generate intermediary
-+		 * event with "host" bit not set, followed by the one
-+		 * that does have it set.
-+		 */
-+		delay = (pdcs & ACPI_STEAMDECK_PORT_CONNECT) ?
-+			STEAMDECK_ROLE_SWITCH_DELAY : 0;
-+
-+		queue_delayed_work(system_long_wq, &sd->role_work, delay);
-+		break;
-+	default:
-+		dev_warn(dev, "Unsupported event [0x%x]\n", event);
-+	}
-+}
-+
-+static void steamdeck_remove_notify_handler(void *data)
-+{
-+	struct steamdeck_extcon *sd = data;
-+
-+	acpi_remove_notify_handler(sd->adev->handle, ACPI_DEVICE_NOTIFY,
-+				   steamdeck_notify);
-+	cancel_delayed_work_sync(&sd->role_work);
-+}
-+
-+static const unsigned int steamdeck_extcon_cable[] = {
-+	EXTCON_USB,
-+	EXTCON_USB_HOST,
-+	EXTCON_CHG_USB_SDP,
-+	EXTCON_CHG_USB_CDP,
-+	EXTCON_CHG_USB_DCP,
-+	EXTCON_CHG_USB_ACA,
-+	EXTCON_NONE,
-+};
-+
-+static int steamdeck_extcon_probe(struct platform_device *pdev)
-+{
-+  	struct device *dev = &pdev->dev;
-+	struct steamdeck_extcon *sd;
-+	acpi_status status;
-+	int ret;
-+
-+	sd = devm_kzalloc(dev, sizeof(*sd), GFP_KERNEL);
-+	if (!sd)
-+		return -ENOMEM;
-+
-+	INIT_DELAYED_WORK(&sd->role_work, steamdeck_usb_role_work);
-+	platform_set_drvdata(pdev, sd);
-+	sd->adev = ACPI_COMPANION(dev->parent);
-+	sd->dev  = dev;
-+	sd->edev = devm_extcon_dev_allocate(dev, steamdeck_extcon_cable);
-+	if (IS_ERR(sd->edev))
-+		return PTR_ERR(sd->edev);
-+
-+	ret = devm_extcon_dev_register(dev, sd->edev);
-+	if (ret < 0) {
-+		dev_err(dev, "Failed to register extcon device: %d\n", ret);
-+		return ret;
-+	}
-+
-+	/*
-+	 * Set initial role value
-+	 */
-+	queue_delayed_work(system_long_wq, &sd->role_work, 0);
-+	flush_delayed_work(&sd->role_work);
-+
-+	status = acpi_install_notify_handler(sd->adev->handle,
-+					     ACPI_DEVICE_NOTIFY,
-+					     steamdeck_notify,
-+					     dev);
-+	if (ACPI_FAILURE(status)) {
-+		dev_err(dev, "Error installing ACPI notify handler\n");
-+		return -EIO;
-+	}
-+
-+	ret = devm_add_action_or_reset(dev, steamdeck_remove_notify_handler,
-+				       sd);
-+	return ret;
-+}
-+
-+static const struct platform_device_id steamdeck_extcon_id_table[] = {
-+	{ .name = "steamdeck-extcon" },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(platform, steamdeck_extcon_id_table);
-+
-+static struct platform_driver steamdeck_extcon_driver = {
-+	.probe = steamdeck_extcon_probe,
-+	.driver = {
-+		.name = "steamdeck-extcon",
-+	},
-+	.id_table = steamdeck_extcon_id_table,
-+};
-+module_platform_driver(steamdeck_extcon_driver);
-+
-+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
-+MODULE_DESCRIPTION("Steam Deck extcon driver");
-+MODULE_LICENSE("GPL");
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sat, 15 Jul 2023 12:58:54 -0700
-Subject: [PATCH] hwmon: steamdeck-hwmon: Add support for max battery
- level/rate
-
-Add support for max battery level/charge rate attributes.
-
-Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
-(cherry picked from commit 50af83e8fd75dc52221edd3fb6fd7a7f70c4d8a4)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/hwmon/steamdeck-hwmon.c | 72 ++++++++++++++++++++++++++++++++-
- 1 file changed, 71 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c
-index fab9e9460bd4..9d0a5471b181 100644
---- a/drivers/hwmon/steamdeck-hwmon.c
-+++ b/drivers/hwmon/steamdeck-hwmon.c
-@@ -180,6 +180,76 @@ static const struct hwmon_chip_info steamdeck_hwmon_chip_info = {
- 	.info = steamdeck_hwmon_info,
- };
-
-+
-+static ssize_t
-+steamdeck_hwmon_simple_store(struct device *dev, const char *buf, size_t count,
-+			     const char *method,
-+			     unsigned long upper_limit)
-+{
-+	struct steamdeck_hwmon *sd = dev_get_drvdata(dev);
-+	unsigned long value;
-+
-+	if (kstrtoul(buf, 10, &value) || value >= upper_limit)
-+		return -EINVAL;
-+
-+	if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle,
-+						    (char *)method, value)))
-+		return -EIO;
-+
-+	return count;
-+}
-+
-+static ssize_t
-+steamdeck_hwmon_simple_show(struct device *dev, char *buf,
-+			    const char *method)
-+{
-+	struct steamdeck_hwmon *sd = dev_get_drvdata(dev);
-+	unsigned long value;
-+
-+	value = steamdeck_hwmon_get(sd, method);
-+	if (value < 0)
-+		return value;
-+
-+	return sprintf(buf, "%ld\n", value);
-+}
-+
-+#define STEAMDECK_HWMON_ATTR_RW(_name, _set_method, _get_method,	\
-+				_upper_limit)				\
-+	static ssize_t _name##_show(struct device *dev,			\
-+				    struct device_attribute *attr,	\
-+				    char *buf)				\
-+	{								\
-+		return steamdeck_hwmon_simple_show(dev, buf,		\
-+						   _get_method);	\
-+	}								\
-+	static ssize_t _name##_store(struct device *dev,		\
-+				     struct device_attribute *attr,	\
-+				     const char *buf, size_t count)	\
-+	{								\
-+		return steamdeck_hwmon_simple_store(dev, buf, count,	\
-+						    _set_method,	\
-+						    _upper_limit);	\
-+	}								\
-+	static DEVICE_ATTR_RW(_name)
-+
-+STEAMDECK_HWMON_ATTR_RW(max_battery_charge_level, "FCBL", "SFBL", 101);
-+STEAMDECK_HWMON_ATTR_RW(max_battery_charge_rate,  "CHGR", "GCHR", 101);
-+
-+static struct attribute *steamdeck_hwmon_attributes[] = {
-+	&dev_attr_max_battery_charge_level.attr,
-+	&dev_attr_max_battery_charge_rate.attr,
-+	NULL
-+};
-+
-+static const struct attribute_group steamdeck_hwmon_group = {
-+	.attrs = steamdeck_hwmon_attributes,
-+};
-+
-+static const struct attribute_group *steamdeck_hwmon_groups[] = {
-+	&steamdeck_hwmon_group,
-+	NULL
-+};
-+
- static int steamdeck_hwmon_probe(struct platform_device *pdev)
- {
- 	struct device *dev = &pdev->dev;
-@@ -195,7 +265,7 @@ static int steamdeck_hwmon_probe(struct platform_device *pdev)
- 						     "steamdeck_hwmon",
- 						     sd,
- 						     &steamdeck_hwmon_chip_info,
--						     NULL);
-+						     steamdeck_hwmon_groups);
- 	if (IS_ERR(hwmon)) {
- 		dev_err(dev, "Failed to register HWMON device");
- 		return PTR_ERR(hwmon);
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Smirnov <andrew.smirnov@gmail.com>
-Date: Sun, 24 Sep 2023 15:02:33 -0700
-Subject: [PATCH] mfd: steamdeck: Expose controller board power in sysfs
-
-As of version 118 Deck's BIOS implements "SCBP" method that allows
-gating power of the controller board (VBUS). Add a basic WO method to
-our root MFD device to allow toggling that.
-
-Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
-(cherry picked from commit f97f32718acc10cbb51fef925842392e80904d74)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/mfd/steamdeck.c | 20 ++++++++++++++++++++
- 1 file changed, 20 insertions(+)
-
-diff --git a/drivers/mfd/steamdeck.c b/drivers/mfd/steamdeck.c
-index 0e504b3c2796..a60fa7db9141 100644
---- a/drivers/mfd/steamdeck.c
-+++ b/drivers/mfd/steamdeck.c
-@@ -41,9 +41,29 @@ struct steamdeck {
- STEAMDECK_ATTR_RO(firmware_version, "PDFW");
- STEAMDECK_ATTR_RO(board_id, "BOID");
-
-+static ssize_t controller_board_power_store(struct device *dev,
-+					    struct device_attribute *attr,
-+					    const char *buf, size_t count)
-+{
-+	struct steamdeck *sd = dev_get_drvdata(dev);
-+	bool enabled;
-+	ssize_t ret = kstrtobool(buf, &enabled);
-+
-+	if (ret)
-+		return ret;
-+
-+	if (ACPI_FAILURE(acpi_execute_simple_method(sd->adev->handle,
-+						    "SCBP", enabled)))
-+		return -EIO;
-+
-+	return count;
-+}
-+static DEVICE_ATTR_WO(controller_board_power);
-+
- static struct attribute *steamdeck_attrs[] = {
- 	&dev_attr_firmware_version.attr,
- 	&dev_attr_board_id.attr,
-+	&dev_attr_controller_board_power.attr,
- 	NULL
- };
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Thu, 30 Jun 2022 18:42:10 -0700
-Subject: [PATCH 01/10] USB: gadget: f_hid: Add Get-Feature report
-
-While the HID gadget implementation has been sufficient for devices that only
-use INTERRUPT transfers, the USB HID standard includes provisions for Set- and
-Get-Feature report CONTROL transfers that go over endpoint 0. These were
-previously impossible with the existing implementation, and would either send
-an empty reply, or stall out.
-
-As the feature is a standard part of USB HID, it stands to reason that devices
-would use it, and that the HID gadget should support it. This patch adds
-support for (polled) device-to-host Get-Feature reports through a new ioctl
-interface to the hidg class dev nodes.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
-(cherry picked from commit 8437fa3861c7198a3e286f393c8637c4fc08d2bc)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
----
- drivers/usb/gadget/function/f_hid.c | 121 ++++++++++++++++++++++++++--
- include/uapi/linux/usb/g_hid.h      |  38 +++++++++
- include/uapi/linux/usb/gadgetfs.h   |   2 +-
- 3 files changed, 154 insertions(+), 7 deletions(-)
- create mode 100644 include/uapi/linux/usb/g_hid.h
-
-diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
-index ea85e2c701a15..6fec92b5a0bd9 100644
---- a/drivers/usb/gadget/function/f_hid.c
-+++ b/drivers/usb/gadget/function/f_hid.c
-@@ -16,6 +16,7 @@
- #include <linux/wait.h>
- #include <linux/sched.h>
- #include <linux/usb/g_hid.h>
-+#include <uapi/linux/usb/g_hid.h>
-
- #include "u_f.h"
- #include "u_hid.h"
-@@ -75,6 +76,13 @@ struct f_hidg {
- 	wait_queue_head_t		write_queue;
- 	struct usb_request		*req;
-
-+	/* get report */
-+	struct usb_request		*get_req;
-+	struct usb_hidg_report		get_report;
-+	spinlock_t			get_spinlock;
-+	bool				get_pending;
-+	wait_queue_head_t		get_queue;
-+
- 	struct device			dev;
- 	struct cdev			cdev;
- 	struct usb_function		func;
-@@ -523,6 +531,64 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
- 	return status;
- }
-
-+
-+static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *buffer)
-+{
-+	struct f_hidg			*hidg = file->private_data;
-+	struct usb_composite_dev	*cdev = hidg->func.config->cdev;
-+
-+	int		status = 0;
-+	unsigned long	flags;
-+
-+	spin_lock_irqsave(&hidg->get_spinlock, flags);
-+
-+#define GET_REPORT_COND (!hidg->get_pending)
-+
-+	while (!GET_REPORT_COND) {
-+		spin_unlock_irqrestore(&hidg->get_spinlock, flags);
-+
-+		if (file->f_flags & O_NONBLOCK)
-+			return -EAGAIN;
-+
-+		if (wait_event_interruptible_exclusive(hidg->get_queue,
-+						       GET_REPORT_COND))
-+			return -ERESTARTSYS;
-+
-+		spin_lock_irqsave(&hidg->get_spinlock, flags);
-+		if (!hidg->get_pending) {
-+			spin_unlock_irqrestore(&hidg->get_spinlock, flags);
-+			return -EINVAL;
-+		}
-+	}
-+
-+	hidg->get_pending = true;
-+	spin_unlock_irqrestore(&hidg->get_spinlock, flags);
-+
-+	status = copy_from_user(&hidg->get_report, buffer,
-+				sizeof(struct usb_hidg_report));
-+	if (status != 0) {
-+		ERROR(cdev, "copy_from_user error\n");
-+		status = -EINVAL;
-+	}
-+
-+	spin_lock_irqsave(&hidg->get_spinlock, flags);
-+	hidg->get_pending = false;
-+	spin_unlock_irqrestore(&hidg->get_spinlock, flags);
-+
-+	wake_up(&hidg->get_queue);
-+	return status;
-+}
-+
-+static long f_hidg_ioctl(struct file *file, unsigned int code, unsigned long arg)
-+{
-+	switch (code) {
-+	case GADGET_HID_WRITE_GET_REPORT:
-+		return f_hidg_get_report(file, (struct usb_hidg_report __user *)arg);
-+	default:
-+		return -ENOTTY;
-+	}
-+}
-+
- static __poll_t f_hidg_poll(struct file *file, poll_table *wait)
- {
- 	struct f_hidg	*hidg  = file->private_data;
-@@ -548,6 +614,7 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait)
- #undef WRITE_COND
- #undef READ_COND_SSREPORT
- #undef READ_COND_INTOUT
-+#undef GET_REPORT_COND
-
- static int f_hidg_release(struct inode *inode, struct file *fd)
- {
-@@ -640,6 +707,10 @@ static void hidg_ssreport_complete(struct usb_ep *ep, struct usb_request *req)
- 	wake_up(&hidg->read_queue);
- }
-
-+static void hidg_get_report_complete(struct usb_ep *ep, struct usb_request *req)
-+{
-+}
-+
- static int hidg_setup(struct usb_function *f,
- 		const struct usb_ctrlrequest *ctrl)
- {
-@@ -647,6 +718,8 @@ static int hidg_setup(struct usb_function *f,
- 	struct usb_composite_dev	*cdev = f->config->cdev;
- 	struct usb_request		*req  = cdev->req;
- 	int status = 0;
-+	unsigned long flags;
-+	bool do_wake = false;
- 	__u16 value, length;
-
- 	value	= __le16_to_cpu(ctrl->wValue);
-@@ -659,14 +732,29 @@ static int hidg_setup(struct usb_function *f,
- 	switch ((ctrl->bRequestType << 8) | ctrl->bRequest) {
- 	case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
- 		  | HID_REQ_GET_REPORT):
--		VDBG(cdev, "get_report\n");
-+		VDBG(cdev, "get_report | wLength=%d\n", ctrl->wLength);
-
--		/* send an empty report */
--		length = min_t(unsigned, length, hidg->report_length);
--		memset(req->buf, 0x0, length);
-+		req = hidg->get_req;
-+		req->zero = 0;
-+		req->length = min_t(unsigned, length, hidg->report_length);
-+		status = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
-+		if (status < 0) {
-+			ERROR(cdev, "usb_ep_queue error on get_report %d\n",
-+			      status);
-
--		goto respond;
--		break;
-+			spin_lock_irqsave(&hidg->get_spinlock, flags);
-+			if (hidg->get_pending) {
-+				hidg->get_pending = false;
-+				do_wake = true;
-+			}
-+			spin_unlock_irqrestore(&hidg->get_spinlock, flags);
-+
-+			if (do_wake) {
-+				wake_up(&hidg->get_queue);
-+			}
-+		}
-+
-+		return status;
-
- 	case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
- 		  | HID_REQ_GET_PROTOCOL):
-@@ -800,6 +888,14 @@ static void hidg_disable(struct usb_function *f)
-
- 	hidg->req = NULL;
- 	spin_unlock_irqrestore(&hidg->write_spinlock, flags);
-+
-+	spin_lock_irqsave(&hidg->get_spinlock, flags);
-+	if (!hidg->get_pending) {
-+		usb_ep_free_request(f->config->cdev->gadget->ep0, hidg->get_req);
-+		hidg->get_pending = true;
-+	}
-+	hidg->get_req = NULL;
-+	spin_unlock_irqrestore(&hidg->get_spinlock, flags);
- }
-
- static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
-@@ -908,6 +1004,7 @@ static const struct file_operations f_hidg_fops = {
- 	.write		= f_hidg_write,
- 	.read		= f_hidg_read,
- 	.poll		= f_hidg_poll,
-+	.unlocked_ioctl	= f_hidg_ioctl,
- 	.llseek		= noop_llseek,
- };
-
-@@ -918,6 +1015,14 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
- 	struct usb_string	*us;
- 	int			status;
-
-+	hidg->get_req = usb_ep_alloc_request(c->cdev->gadget->ep0, GFP_ATOMIC);
-+	if (!hidg->get_req)
-+		return -ENOMEM;
-+	hidg->get_req->buf = hidg->get_report.data;
-+	hidg->get_req->zero = 0;
-+	hidg->get_req->complete = hidg_get_report_complete;
-+	hidg->get_req->context = hidg;
-+
- 	/* maybe allocate device-global string IDs, and patch descriptors */
- 	us = usb_gstrings_attach(c->cdev, ct_func_strings,
- 				 ARRAY_SIZE(ct_func_string_defs));
-@@ -1003,8 +1108,10 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
- 	hidg->write_pending = 1;
- 	hidg->req = NULL;
- 	spin_lock_init(&hidg->read_spinlock);
-+	spin_lock_init(&hidg->get_spinlock);
- 	init_waitqueue_head(&hidg->write_queue);
- 	init_waitqueue_head(&hidg->read_queue);
-+	init_waitqueue_head(&hidg->get_queue);
- 	INIT_LIST_HEAD(&hidg->completed_out_req);
-
- 	/* create char device */
-@@ -1021,6 +1128,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
- 	if (hidg->req != NULL)
- 		free_ep_req(hidg->in_ep, hidg->req);
-
-+	usb_ep_free_request(c->cdev->gadget->ep0, hidg->get_req);
-+
- 	return status;
- }
-
-diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h
-new file mode 100644
-index 0000000000000..c6068b4863543
---- /dev/null
-+++ b/include/uapi/linux/usb/g_hid.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-+/*
-+ * g_hid.h -- Header file for USB HID gadget driver
-+ *
-+ * Copyright (C) 2022 Valve Software
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#ifndef __UAPI_LINUX_USB_G_HID_H
-+#define __UAPI_LINUX_USB_G_HID_H
-+
-+#include <linux/types.h>
-+
-+struct usb_hidg_report {
-+	__u16 length;
-+	__u8 data[512];
-+};
-+
-+/* The 'g' code is also used by gadgetfs and hid gadget ioctl requests.
-+ * Don't add any colliding codes to either driver, and keep
-+ * them in unique ranges (size 0x20 for now).
-+ */
-+#define GADGET_HID_WRITE_GET_REPORT	_IOW('g', 0x42, struct usb_hidg_report)
-+
-+#endif /* __UAPI_LINUX_USB_G_HID_H */
-diff --git a/include/uapi/linux/usb/gadgetfs.h b/include/uapi/linux/usb/gadgetfs.h
-index 835473910a498..9754822b2a409 100644
---- a/include/uapi/linux/usb/gadgetfs.h
-+++ b/include/uapi/linux/usb/gadgetfs.h
-@@ -62,7 +62,7 @@ struct usb_gadgetfs_event {
- };
-
-
--/* The 'g' code is also used by printer gadget ioctl requests.
-+/* The 'g' code is also used by printer and hid gadget ioctl requests.
-  * Don't add any colliding codes to either driver, and keep
-  * them in unique ranges (size 0x20 for now).
-  */
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Thu, 30 Jun 2022 18:43:10 -0700
-Subject: [PATCH 02/10] USB: gadget: f_hid: Add Set-Feature report
-
-While the HID gadget implementation has been sufficient for devices that only
-use INTERRUPT transfers, the USB HID standard includes provisions for Set- and
-Get-Feature report CONTROL transfers that go over endpoint 0. These were
-previously impossible with the existing implementation, and would either send
-an empty reply, or stall out.
-
-As the feature is a standard part of USB HID, it stands to reason that devices
-would use it, and that the HID gadget should support it. This patch adds
-support for host-to-device Set-Feature reports through a new ioctl
-interface to the hidg class dev nodes.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
-(cherry picked from commit 3d82be0ec3aa3b947d9c927d7b06c433de15be8b)
-Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
----
- drivers/usb/gadget/function/f_hid.c | 110 ++++++++++++++++++++++++++--
- include/uapi/linux/usb/g_hid.h      |  24 +-----
- 2 files changed, 106 insertions(+), 28 deletions(-)
-
-diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
-index 6fec92b5a0bd9..172cba91aded1 100644
---- a/drivers/usb/gadget/function/f_hid.c
-+++ b/drivers/usb/gadget/function/f_hid.c
-@@ -76,6 +76,11 @@ struct f_hidg {
- 	wait_queue_head_t		write_queue;
- 	struct usb_request		*req;
-
-+	/* set report */
-+	struct list_head		completed_set_req;
-+	spinlock_t			set_spinlock;
-+	wait_queue_head_t		set_queue;
-+
- 	/* get report */
- 	struct usb_request		*get_req;
- 	struct usb_hidg_report		get_report;
-@@ -531,6 +536,54 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
- 	return status;
- }
-
-+static int f_hidg_set_report(struct file *file, struct usb_hidg_report __user *buffer)
-+{
-+	struct f_hidg		*hidg = file->private_data;
-+	struct f_hidg_req_list	*list;
-+	struct usb_request	*req;
-+	unsigned long		flags;
-+	unsigned short		length;
-+	int			status;
-+
-+	spin_lock_irqsave(&hidg->set_spinlock, flags);
-+
-+#define SET_REPORT_COND (!list_empty(&hidg->completed_set_req))
-+
-+	/* wait for at least one buffer to complete */
-+	while (!SET_REPORT_COND) {
-+		spin_unlock_irqrestore(&hidg->set_spinlock, flags);
-+		if (file->f_flags & O_NONBLOCK)
-+			return -EAGAIN;
-+
-+		if (wait_event_interruptible(hidg->set_queue, SET_REPORT_COND))
-+			return -ERESTARTSYS;
-+
-+		spin_lock_irqsave(&hidg->set_spinlock, flags);
-+	}
-+
-+	/* pick the first one */
-+	list = list_first_entry(&hidg->completed_set_req,
-+				struct f_hidg_req_list, list);
-+
-+	/*
-+	 * Remove this from list to protect it from being free()
-+	 * while host disables our function
-+	 */
-+	list_del(&list->list);
-+
-+	req = list->req;
-+	spin_unlock_irqrestore(&hidg->set_spinlock, flags);
-+
-+	/* copy to user outside spinlock */
-+	length = min_t(unsigned short, sizeof(buffer->data), req->actual);
-+	status = copy_to_user(&buffer->length, &length, sizeof(buffer->length));
-+	if (!status) {
-+	    status = copy_to_user(&buffer->data, req->buf, length);
-+	}
-+	kfree(list);
-+	free_ep_req(hidg->func.config->cdev->gadget->ep0, req);
-+	return status;
-+}
-
- static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *buffer)
- {
-@@ -582,6 +635,8 @@ static int f_hidg_get_report(struct file *file, struct usb_hidg_report __user *b
- static long f_hidg_ioctl(struct file *file, unsigned int code, unsigned long arg)
- {
- 	switch (code) {
-+	case GADGET_HID_READ_SET_REPORT:
-+		return f_hidg_set_report(file, (struct usb_hidg_report __user *)arg);
- 	case GADGET_HID_WRITE_GET_REPORT:
- 		return f_hidg_get_report(file, (struct usb_hidg_report __user *)arg);
- 	default:
-@@ -596,6 +651,7 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait)
-
- 	poll_wait(file, &hidg->read_queue, wait);
- 	poll_wait(file, &hidg->write_queue, wait);
-+	poll_wait(file, &hidg->set_queue, wait);
-
- 	if (WRITE_COND)
- 		ret |= EPOLLOUT | EPOLLWRNORM;
-@@ -608,12 +664,16 @@ static __poll_t f_hidg_poll(struct file *file, poll_table *wait)
- 			ret |= EPOLLIN | EPOLLRDNORM;
- 	}
-
-+	if (SET_REPORT_COND)
-+		ret |= EPOLLPRI;
-+
- 	return ret;
- }
-
- #undef WRITE_COND
- #undef READ_COND_SSREPORT
- #undef READ_COND_INTOUT
-+#undef SET_REPORT_COND
- #undef GET_REPORT_COND
-
- static int f_hidg_release(struct inode *inode, struct file *fd)
-@@ -658,11 +718,19 @@ static void hidg_intout_complete(struct usb_ep *ep, struct usb_request *req)
-
- 		req_list->req = req;
-
--		spin_lock_irqsave(&hidg->read_spinlock, flags);
--		list_add_tail(&req_list->list, &hidg->completed_out_req);
--		spin_unlock_irqrestore(&hidg->read_spinlock, flags);
-+		if (ep == cdev->gadget->ep0) {
-+			spin_lock_irqsave(&hidg->set_spinlock, flags);
-+			list_add_tail(&req_list->list, &hidg->completed_set_req);
-+			spin_unlock_irqrestore(&hidg->set_spinlock, flags);
-
--		wake_up(&hidg->read_queue);
-+			wake_up(&hidg->set_queue);
-+		} else {
-+			spin_lock_irqsave(&hidg->read_spinlock, flags);
-+			list_add_tail(&req_list->list, &hidg->completed_out_req);
-+			spin_unlock_irqrestore(&hidg->read_spinlock, flags);
-+
-+			wake_up(&hidg->read_queue);
-+		}
- 		break;
- 	default:
- 		ERROR(cdev, "Set report failed %d\n", req->status);
-@@ -775,12 +843,27 @@ static int hidg_setup(struct usb_function *f,
- 	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
- 		  | HID_REQ_SET_REPORT):
- 		VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength);
--		if (hidg->use_out_ep)
-+		if (!hidg->use_out_ep) {
-+			req->complete = hidg_ssreport_complete;
-+			req->context  = hidg;
-+			goto respond;
-+		}
-+		if (!length)
- 			goto stall;
--		req->complete = hidg_ssreport_complete;
-+		req = alloc_ep_req(cdev->gadget->ep0, GFP_ATOMIC);
-+		if (!req)
-+			return -ENOMEM;
-+		req->complete = hidg_intout_complete;
- 		req->context  = hidg;
--		goto respond;
--		break;
-+		req->zero = 0;
-+		req->length = length;
-+		status = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
-+		if (status < 0) {
-+			ERROR(cdev, "usb_ep_queue error on set_report %d\n", status);
-+			free_ep_req(cdev->gadget->ep0, req);
-+		}
-+
-+		return status;
-
- 	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
- 		  | HID_REQ_SET_PROTOCOL):
-@@ -880,6 +963,14 @@ static void hidg_disable(struct usb_function *f)
- 		spin_unlock_irqrestore(&hidg->read_spinlock, flags);
- 	}
-
-+	spin_lock_irqsave(&hidg->set_spinlock, flags);
-+	list_for_each_entry_safe(list, next, &hidg->completed_set_req, list) {
-+		free_ep_req(f->config->cdev->gadget->ep0, list->req);
-+		list_del(&list->list);
-+		kfree(list);
-+	}
-+	spin_unlock_irqrestore(&hidg->set_spinlock, flags);
-+
- 	spin_lock_irqsave(&hidg->write_spinlock, flags);
- 	if (!hidg->write_pending) {
- 		free_ep_req(hidg->in_ep, hidg->req);
-@@ -1108,11 +1199,14 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
- 	hidg->write_pending = 1;
- 	hidg->req = NULL;
- 	spin_lock_init(&hidg->read_spinlock);
-+	spin_lock_init(&hidg->set_spinlock);
- 	spin_lock_init(&hidg->get_spinlock);
- 	init_waitqueue_head(&hidg->write_queue);
- 	init_waitqueue_head(&hidg->read_queue);
-+	init_waitqueue_head(&hidg->set_queue);
- 	init_waitqueue_head(&hidg->get_queue);
- 	INIT_LIST_HEAD(&hidg->completed_out_req);
-+	INIT_LIST_HEAD(&hidg->completed_set_req);
-
- 	/* create char device */
- 	cdev_init(&hidg->cdev, &f_hidg_fops);
-diff --git a/include/uapi/linux/usb/g_hid.h b/include/uapi/linux/usb/g_hid.h
-index c6068b4863543..54814c2c68d60 100644
---- a/include/uapi/linux/usb/g_hid.h
-+++ b/include/uapi/linux/usb/g_hid.h
-@@ -1,38 +1,22 @@
- /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
--/*
-- * g_hid.h -- Header file for USB HID gadget driver
-- *
-- * Copyright (C) 2022 Valve Software
-- *
-- * This program is free software; you can redistribute it and/or modify
-- * it under the terms of the GNU General Public License as published by
-- * the Free Software Foundation; either version 2 of the License, or
-- * (at your option) any later version.
-- *
-- * This program is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with this program; if not, write to the Free Software
-- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-- */
-
- #ifndef __UAPI_LINUX_USB_G_HID_H
- #define __UAPI_LINUX_USB_G_HID_H
-
- #include <linux/types.h>
-
-+#define HIDG_REPORT_SIZE_MAX 64
-+
- struct usb_hidg_report {
- 	__u16 length;
--	__u8 data[512];
-+	__u8 data[HIDG_REPORT_SIZE_MAX];
- };
-
- /* The 'g' code is also used by gadgetfs and hid gadget ioctl requests.
-  * Don't add any colliding codes to either driver, and keep
-  * them in unique ranges (size 0x20 for now).
-  */
-+#define GADGET_HID_READ_SET_REPORT	_IOR('g', 0x41, struct usb_hidg_report)
- #define GADGET_HID_WRITE_GET_REPORT	_IOW('g', 0x42, struct usb_hidg_report)
-
- #endif /* __UAPI_LINUX_USB_G_HID_H */
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Tue, 29 Nov 2022 18:32:58 -0800
-Subject: [PATCH 03/10] HID: hid-steam: Update list of identifiers from SDL
-
-SDL includes a list of settings (registers), reports (cmds), and various other
-identifiers that were provided by Valve. This commit imports a significant
-chunk of that list as well as updating the guessed names and replacing a
-handful of magic constants. It also replaces bitmask definitions that used hex
-with the BIT macro.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
----
- drivers/hid/hid-steam.c | 156 +++++++++++++++++++++++++++++++---------
- 1 file changed, 121 insertions(+), 35 deletions(-)
-
-diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
-index b110818fc9458..39a9bf3b7f77d 100644
---- a/drivers/hid/hid-steam.c
-+++ b/drivers/hid/hid-steam.c
-@@ -71,7 +71,7 @@ static LIST_HEAD(steam_devices);
-
- /*
-  * Commands that can be sent in a feature report.
-- * Thanks to Valve for some valuable hints.
-+ * Thanks to Valve and SDL for some valuable hints.
-  */
- #define STEAM_CMD_SET_MAPPINGS		0x80
- #define STEAM_CMD_CLEAR_MAPPINGS	0x81
-@@ -80,27 +80,98 @@ static LIST_HEAD(steam_devices);
- #define STEAM_CMD_GET_ATTRIB_LABEL	0x84
- #define STEAM_CMD_DEFAULT_MAPPINGS	0x85
- #define STEAM_CMD_FACTORY_RESET		0x86
--#define STEAM_CMD_WRITE_REGISTER	0x87
-+#define STEAM_CMD_SET_REGISTER		0x87
- #define STEAM_CMD_CLEAR_REGISTER	0x88
--#define STEAM_CMD_READ_REGISTER		0x89
-+#define STEAM_CMD_GET_REGISTER		0x89
- #define STEAM_CMD_GET_REGISTER_LABEL	0x8a
- #define STEAM_CMD_GET_REGISTER_MAX	0x8b
- #define STEAM_CMD_GET_REGISTER_DEFAULT	0x8c
- #define STEAM_CMD_SET_MODE		0x8d
--#define STEAM_CMD_DEFAULT_MOUSE		0x8e
--#define STEAM_CMD_FORCEFEEDBAK		0x8f
--#define STEAM_CMD_REQUEST_COMM_STATUS	0xb4
--#define STEAM_CMD_GET_SERIAL		0xae
-+#define STEAM_CMD_DEFAULT_REGISTER	0x8e
-+#define STEAM_CMD_HAPTIC_PULSE		0x8f
-+#define STEAM_CMD_TURN_OFF_CONTROLLER	0x9f
-+#define STEAM_CMD_GET_DEVICE_IFNO	0xa1
-+#define STEAM_CMD_CALIBRATE_TRACKPADS	0xa7
-+#define STEAM_CMD_SET_SERIAL		0xa9
-+#define STEAM_CMD_GET_TRACKPAD_CALIB	0xaa
-+#define STEAM_CMD_GET_TRACKPAD_FACTORY_CALIB	0xab
-+#define STEAM_CMD_GET_TRACKPAD_RAW_DATA	0xac
-+#define STEAM_CMD_ENABLE_PAIRING	0xad
-+#define STEAM_CMD_GET_STRING_ATTRIB	0xae
-+#define STEAM_CMD_RADIO_ERASE_RECORDS	0xaf
-+#define STEAM_CMD_RADIO_WRITE_RECORD	0xb0
-+#define STEAM_CMD_SET_DONGLE_SETTING	0xb1
-+#define STEAM_CMD_DONGLE_DISCONNECT_DEV	0xb2
-+#define STEAM_CMD_DONGLE_COMMIT_DEV	0xb3
-+#define STEAM_CMD_DONGLE_GET_STATE	0xb4
-+#define STEAM_CMD_CALIBRATE_GYRO	0xb5
-+#define STEAM_CMD_PLAY_AUDIO		0xb6
-+#define STEAM_CMD_AUDIO_UPDATE_START	0xb7
-+#define STEAM_CMD_AUDIO_UPDATE_DATA	0xb8
-+#define STEAM_CMD_AUDIO_UPDATE_COMPLETE	0xb9
-+#define STEAM_CMD_GET_CHIPID		0xba
-+#define STEAM_CMD_CALIBRATE_JOYSTICK	0xbf
-+#define STEAM_CMD_CALIBRATE_TRIGGERS	0xc0
-+#define STEAM_CMD_SET_AUDIO_MAPPING	0xc1
-+#define STEAM_CMD_CHECK_GYRO_FW_LOAD	0xc2
-+#define STEAM_CMD_CALIBRATE_ANALOG	0xc3
-+#define STEAM_CMD_DONGLE_GET_CONN_SLOTS	0xc4
-+#define STEAM_CMD_HAPTIC_CMD		0xea
- #define STEAM_CMD_HAPTIC_RUMBLE		0xeb
-
- /* Some useful register ids */
--#define STEAM_REG_LPAD_MODE		0x07
--#define STEAM_REG_RPAD_MODE		0x08
--#define STEAM_REG_RPAD_MARGIN		0x18
--#define STEAM_REG_LED			0x2d
--#define STEAM_REG_GYRO_MODE		0x30
--#define STEAM_REG_LPAD_CLICK_PRESSURE	0x34
--#define STEAM_REG_RPAD_CLICK_PRESSURE	0x35
-+#define STEAM_REG_MOUSE_SENSITIVITY		0x00
-+#define STEAM_REG_MOUSE_ACCELERATION		0x01
-+#define STEAM_REG_TRACKBALL_ROTATION_ANGLE	0x02
-+#define STEAM_REG_HAPTIC_INTENSITY		0x03
-+#define STEAM_REG_LEFT_GAMEPAD_STICK_ENABLED	0x04
-+#define STEAM_REG_RIGHT_GAMEPAD_STICK_ENABLED	0x05
-+#define STEAM_REG_USB_DEBUG_MODE		0x06
-+#define STEAM_REG_LEFT_TRACKPAD_MODE		0x07
-+#define STEAM_REG_RIGHT_TRACKPAD_MODE		0x08
-+#define STEAM_REG_MOUSE_POINTER_ENABLED		0x09
-+#define STEAM_REG_DPAD_DEADZONE			0x0a
-+#define STEAM_REG_MINIMUM_MOMENTUM_VEL		0x0b
-+#define STEAM_REG_MOMENTUM_DECAY_AMOUNT		0x0c
-+#define STEAM_REG_PAD_REL_MODE_TICKS_PER_PIXEL	0x0d
-+#define STEAM_REG_HAPTIC_INCREMENT		0x0e
-+#define STEAM_REG_DPAD_ANGLE_SIN		0x0f
-+#define STEAM_REG_DPAD_ANGLE_COS		0x10
-+#define STEAM_REG_MOMENTUM_VERTICAL_DIVISOR	0x11
-+#define STEAM_REG_MOMENTUM_MAXIMUM_VELOCITY	0x12
-+#define STEAM_REG_TRACKPAD_Z_ON			0x13
-+#define STEAM_REG_TRACKPAD_Z_OFF		0x14
-+#define STEAM_REG_SENSITIVY_SCALE_AMOUNT	0x15
-+#define STEAM_REG_LEFT_TRACKPAD_SECONDARY_MODE	0x16
-+#define STEAM_REG_RIGHT_TRACKPAD_SECONDARY_MODE	0x17
-+#define STEAM_REG_SMOOTH_ABSOLUTE_MOUSE		0x18
-+#define STEAM_REG_STEAMBUTTON_POWEROFF_TIME	0x19
-+#define STEAM_REG_TRACKPAD_OUTER_RADIUS		0x1b
-+#define STEAM_REG_TRACKPAD_Z_ON_LEFT		0x1c
-+#define STEAM_REG_TRACKPAD_Z_OFF_LEFT		0x1d
-+#define STEAM_REG_TRACKPAD_OUTER_SPIN_VEL	0x1e
-+#define STEAM_REG_TRACKPAD_OUTER_SPIN_RADIUS	0x1f
-+#define STEAM_REG_TRACKPAD_OUTER_SPIN_HORIZONTAL_ONLY	0x20
-+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_DEADZONE	0x21
-+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_MAX_VEL	0x22
-+#define STEAM_REG_TRACKPAD_RELATIVE_MODE_INVERT_Y	0x23
-+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_ENABLED	0x24
-+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_PERIOD	0x25
-+#define STEAM_REG_TRACKPAD_DOUBLE_TAP_BEEP_COUNT	0x26
-+#define STEAM_REG_TRACKPAD_OUTER_RADIUS_RELEASE_ON_TRANSITION 0x27
-+#define STEAM_REG_RADIAL_MODE_ANGLE		0x28
-+#define STEAM_REG_HAPTIC_INTENSITY_MOUSE_MODE	0x29
-+#define STEAM_REG_LEFT_DPAD_REQUIRES_CLICK	0x2a
-+#define STEAM_REG_RIGHT_DPAD_REQUIRES_CLICK	0x2b
-+#define STEAM_REG_LED_BASELINE_BRIGHTNESS	0x2c
-+#define STEAM_REG_LED_USER_BRIGHTNESS		0x2d
-+#define STEAM_REG_ENABLE_RAW_JOYSTICK		0x2e
-+#define STEAM_REG_ENABLE_FAST_SCAN		0x2f
-+#define STEAM_REG_GYRO_MODE			0x30
-+#define STEAM_REG_WIRELESS_PACKET_VERSION	0x31
-+#define STEAM_REG_SLEEP_INACTIVITY_TIMEOUT	0x32
-+#define STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE	0x34
-+#define STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE	0x35
-
- /* Raw event identifiers */
- #define STEAM_EV_INPUT_DATA		0x01
-@@ -108,13 +179,28 @@ static LIST_HEAD(steam_devices);
- #define STEAM_EV_BATTERY		0x04
- #define STEAM_EV_DECK_INPUT_DATA	0x09
-
-+/* String attribute idenitifiers */
-+#define STEAM_ATTRIB_STR_BOARD_SERIAL	0x00
-+#define STEAM_ATTRIB_STR_UNIT_SERIAL	0x01
-+
- /* Values for GYRO_MODE (bitmask) */
--#define STEAM_GYRO_MODE_OFF		0x0000
--#define STEAM_GYRO_MODE_STEERING	0x0001
--#define STEAM_GYRO_MODE_TILT		0x0002
--#define STEAM_GYRO_MODE_SEND_ORIENTATION	0x0004
--#define STEAM_GYRO_MODE_SEND_RAW_ACCEL		0x0008
--#define STEAM_GYRO_MODE_SEND_RAW_GYRO		0x0010
-+#define STEAM_GYRO_MODE_OFF		0
-+#define STEAM_GYRO_MODE_STEERING	BIT(0)
-+#define STEAM_GYRO_MODE_TILT		BIT(1)
-+#define STEAM_GYRO_MODE_SEND_ORIENTATION	BIT(2)
-+#define STEAM_GYRO_MODE_SEND_RAW_ACCEL		BIT(3)
-+#define STEAM_GYRO_MODE_SEND_RAW_GYRO		BIT(4)
-+
-+/* Trackpad modes */
-+#define STEAM_TRACKPAD_ABSOLUTE_MOUSE		0x00
-+#define STEAM_TRACKPAD_RELATIVE_MOUSE		0x01
-+#define STEAM_TRACKPAD_DPAD_FOUR_WAY_DISCRETE	0x02
-+#define STEAM_TRACKPAD_DPAD_FOUR_WAY_OVERLAP	0x03
-+#define STEAM_TRACKPAD_DPAD_EIGHT_WAY		0x04
-+#define STEAM_TRACKPAD_RADIAL_MODE		0x05
-+#define STEAM_TRACKPAD_ABSOLUTE_DPAD		0x06
-+#define STEAM_TRACKPAD_NONE			0x07
-+#define STEAM_TRACKPAD_GESTURE_KEYBOARD		0x08
-
- /* Other random constants */
- #define STEAM_SERIAL_LEN 10
-@@ -232,7 +318,7 @@ static int steam_write_registers(struct steam_device *steam,
- 	/* Send: 0x87 len (reg valLo valHi)* */
- 	u8 reg;
- 	u16 val;
--	u8 cmd[64] = {STEAM_CMD_WRITE_REGISTER, 0x00};
-+	u8 cmd[64] = {STEAM_CMD_SET_REGISTER, 0x00};
- 	int ret;
- 	va_list args;
-
-@@ -268,7 +354,7 @@ static int steam_get_serial(struct steam_device *steam)
- 	 * Recv: 0xae 0x15 0x01 serialnumber (10 chars)
- 	 */
- 	int ret;
--	u8 cmd[] = {STEAM_CMD_GET_SERIAL, 0x15, 0x01};
-+	u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL};
- 	u8 reply[3 + STEAM_SERIAL_LEN + 1];
-
- 	ret = steam_send_report(steam, cmd, sizeof(cmd));
-@@ -277,7 +363,7 @@ static int steam_get_serial(struct steam_device *steam)
- 	ret = steam_recv_report(steam, reply, sizeof(reply));
- 	if (ret < 0)
- 		return ret;
--	if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != 0x01)
-+	if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL)
- 		return -EIO;
- 	reply[3 + STEAM_SERIAL_LEN] = 0;
- 	strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no));
-@@ -291,7 +377,7 @@ static int steam_get_serial(struct steam_device *steam)
-  */
- static inline int steam_request_conn_status(struct steam_device *steam)
- {
--	return steam_send_report_byte(steam, STEAM_CMD_REQUEST_COMM_STATUS);
-+	return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE);
- }
-
- static inline int steam_haptic_rumble(struct steam_device *steam,
-@@ -339,9 +425,9 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- 		/* enable esc, enter, cursors */
- 		steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS);
- 		/* enable mouse */
--		steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MOUSE);
-+		steam_send_report_byte(steam, STEAM_CMD_DEFAULT_REGISTER);
- 		steam_write_registers(steam,
--			STEAM_REG_RPAD_MARGIN, 0x01, /* enable margin */
-+			STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x01, /* enable smooth */
- 			0);
-
- 		cancel_delayed_work_sync(&steam->heartbeat);
-@@ -351,11 +437,11 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
-
- 		if (steam->quirks & STEAM_QUIRK_DECK) {
- 			steam_write_registers(steam,
--				STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */
--				STEAM_REG_LPAD_MODE, 0x07, /* disable mouse */
--				STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */
--				STEAM_REG_LPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
--				STEAM_REG_RPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
-+				STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */
-+				STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
-+				STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
-+				STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
-+				STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
- 				0);
- 			/*
- 			 * The Steam Deck has a watchdog that automatically enables
-@@ -365,9 +451,9 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- 				schedule_delayed_work(&steam->heartbeat, 5 * HZ);
- 		} else {
- 			steam_write_registers(steam,
--				STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */
--				STEAM_REG_LPAD_MODE, 0x07, /* disable mouse */
--				STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */
-+				STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */
-+				STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
-+				STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
- 				0);
- 		}
- 	}
-@@ -747,7 +833,7 @@ static void steam_lizard_mode_heartbeat(struct work_struct *work)
- 	if (!steam->client_opened && steam->client_hdev) {
- 		steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS);
- 		steam_write_registers(steam,
--			STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */
-+			STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
- 			0);
- 		schedule_delayed_work(&steam->heartbeat, 5 * HZ);
- 	}
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Wed, 16 Nov 2022 19:54:26 -0800
-Subject: [PATCH 04/10] HID: hid-steam: Add gamepad-only mode switched to by
- holding options
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
----
- drivers/hid/hid-steam.c | 72 +++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 72 insertions(+)
-
-diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
-index 39a9bf3b7f77d..0620046b142ef 100644
---- a/drivers/hid/hid-steam.c
-+++ b/drivers/hid/hid-steam.c
-@@ -202,6 +202,11 @@ static LIST_HEAD(steam_devices);
- #define STEAM_TRACKPAD_NONE			0x07
- #define STEAM_TRACKPAD_GESTURE_KEYBOARD		0x08
-
-+/* Pad identifiers for the deck */
-+#define STEAM_PAD_LEFT 0
-+#define STEAM_PAD_RIGHT 1
-+#define STEAM_PAD_BOTH 2
-+
- /* Other random constants */
- #define STEAM_SERIAL_LEN 10
-
-@@ -221,6 +226,9 @@ struct steam_device {
- 	u8 battery_charge;
- 	u16 voltage;
- 	struct delayed_work heartbeat;
-+	struct delayed_work mode_switch;
-+	bool did_mode_switch;
-+	bool gamepad_mode;
- 	struct work_struct rumble_work;
- 	u16 rumble_left;
- 	u16 rumble_right;
-@@ -380,6 +388,33 @@ static inline int steam_request_conn_status(struct steam_device *steam)
- 	return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE);
- }
-
-+/*
-+ * Send a haptic pulse to the trackpads
-+ * Duration and interval are measured in microseconds, count is the number
-+ * of pulses to send for duration time with interval microseconds between them
-+ * and gain is measured in decibels, ranging from -24 to +6
-+ */
-+static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad,
-+				u16 duration, u16 interval, u16 count, u8 gain)
-+{
-+	u8 report[10] = {STEAM_CMD_HAPTIC_PULSE, 8};
-+
-+	/* Left and right are swapped on this report for legacy reasons */
-+	if (pad < STEAM_PAD_BOTH)
-+		pad ^= 1;
-+
-+	report[2] = pad;
-+	report[3] = duration & 0xFF;
-+	report[4] = duration >> 8;
-+	report[5] = interval & 0xFF;
-+	report[6] = interval >> 8;
-+	report[7] = count & 0xFF;
-+	report[8] = count >> 8;
-+	report[9] = gain;
-+
-+	return steam_send_report(steam, report, sizeof(report));
-+}
-+
- static inline int steam_haptic_rumble(struct steam_device *steam,
- 				u16 intensity, u16 left_speed, u16 right_speed,
- 				u8 left_gain, u8 right_gain)
-@@ -421,6 +456,9 @@ static int steam_play_effect(struct input_dev *dev, void *data,
-
- static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- {
-+	if (steam->gamepad_mode)
-+		enable = false;
-+
- 	if (enable) {
- 		/* enable esc, enter, cursors */
- 		steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS);
-@@ -805,6 +843,29 @@ static void steam_work_connect_cb(struct work_struct *work)
- 	}
- }
-
-+static void steam_mode_switch_cb(struct work_struct *work)
-+{
-+	struct steam_device *steam = container_of(to_delayed_work(work),
-+							struct steam_device, mode_switch);
-+	steam->gamepad_mode = !steam->gamepad_mode;
-+	if (!lizard_mode)
-+		return;
-+
-+	mutex_lock(&steam->mutex);
-+	if (steam->gamepad_mode)
-+		steam_set_lizard_mode(steam, false);
-+	else if (!steam->client_opened)
-+		steam_set_lizard_mode(steam, lizard_mode);
-+	mutex_unlock(&steam->mutex);
-+
-+	steam_haptic_pulse(steam, STEAM_PAD_RIGHT, 0x190, 0, 1, 0);
-+	if (steam->gamepad_mode) {
-+		steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x14D, 0x14D, 0x2D, 0);
-+	} else {
-+		steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x1F4, 0x1F4, 0x1E, 0);
-+	}
-+}
-+
- static bool steam_is_valve_interface(struct hid_device *hdev)
- {
- 	struct hid_report_enum *rep_enum;
-@@ -977,6 +1038,7 @@ static int steam_probe(struct hid_device *hdev,
- 	mutex_init(&steam->mutex);
- 	steam->quirks = id->driver_data;
- 	INIT_WORK(&steam->work_connect, steam_work_connect_cb);
-+	INIT_DELAYED_WORK(&steam->mode_switch, steam_mode_switch_cb);
- 	INIT_LIST_HEAD(&steam->list);
- 	INIT_DEFERRABLE_WORK(&steam->heartbeat, steam_lizard_mode_heartbeat);
- 	INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb);
-@@ -1036,6 +1098,7 @@ static int steam_probe(struct hid_device *hdev,
- client_hdev_fail:
- 	cancel_work_sync(&steam->work_connect);
- 	cancel_delayed_work_sync(&steam->heartbeat);
-+	cancel_delayed_work_sync(&steam->mode_switch);
- 	cancel_work_sync(&steam->rumble_work);
- steam_alloc_fail:
- 	hid_err(hdev, "%s: failed with error %d\n",
-@@ -1059,6 +1122,7 @@ static void steam_remove(struct hid_device *hdev)
- 	cancel_delayed_work_sync(&steam->heartbeat);
- 	mutex_unlock(&steam->mutex);
- 	cancel_work_sync(&steam->work_connect);
-+	cancel_delayed_work_sync(&steam->mode_switch);
- 	if (steam->quirks & STEAM_QUIRK_WIRELESS) {
- 		hid_info(hdev, "Steam wireless receiver disconnected");
- 	}
-@@ -1393,6 +1457,14 @@ static void steam_do_deck_input_event(struct steam_device *steam,
- 	input_event(input, EV_KEY, BTN_BASE, !!(b14 & BIT(2)));
-
- 	input_sync(input);
-+
-+	if (!(b9 & BIT(6)) && steam->did_mode_switch) {
-+		steam->did_mode_switch = false;
-+		cancel_delayed_work_sync(&steam->mode_switch);
-+	} else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) {
-+		steam->did_mode_switch = true;
-+		schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100);
-+	}
- }
-
- /*
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Mon, 8 May 2023 20:24:56 -0700
-Subject: [PATCH 05/10] HID: hid-steam: Clean up locking
-
-This cleans up the locking logic so that the spinlock is consistently used for
-access to a small handful of struct variables, and the mutex is exclusively and
-consistently used for ensuring that mutliple threads aren't trying to
-send/receive reports at the same time. Previously, only some report
-transactions were guarded by this mutex, potentially breaking atomicity. The
-mutex has been renamed to reflect this usage.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
----
- drivers/hid/hid-steam.c | 148 ++++++++++++++++++++++++----------------
- 1 file changed, 90 insertions(+), 58 deletions(-)
-
-diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
-index 0620046b142ef..845ca71b8bd3a 100644
---- a/drivers/hid/hid-steam.c
-+++ b/drivers/hid/hid-steam.c
-@@ -214,7 +214,7 @@ struct steam_device {
- 	struct list_head list;
- 	spinlock_t lock;
- 	struct hid_device *hdev, *client_hdev;
--	struct mutex mutex;
-+	struct mutex report_mutex;
- 	bool client_opened;
- 	struct input_dev __rcu *input;
- 	unsigned long quirks;
-@@ -361,21 +361,26 @@ static int steam_get_serial(struct steam_device *steam)
- 	 * Send: 0xae 0x15 0x01
- 	 * Recv: 0xae 0x15 0x01 serialnumber (10 chars)
- 	 */
--	int ret;
-+	int ret = 0;
- 	u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL};
- 	u8 reply[3 + STEAM_SERIAL_LEN + 1];
-
-+	mutex_lock(&steam->report_mutex);
- 	ret = steam_send_report(steam, cmd, sizeof(cmd));
- 	if (ret < 0)
--		return ret;
-+		goto out;
- 	ret = steam_recv_report(steam, reply, sizeof(reply));
- 	if (ret < 0)
--		return ret;
--	if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL)
--		return -EIO;
-+		goto out;
-+	if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) {
-+		ret = -EIO;
-+		goto out;
-+	}
- 	reply[3 + STEAM_SERIAL_LEN] = 0;
- 	strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no));
--	return 0;
-+out:
-+	mutex_unlock(&steam->report_mutex);
-+	return ret;
- }
-
- /*
-@@ -385,7 +390,11 @@ static int steam_get_serial(struct steam_device *steam)
-  */
- static inline int steam_request_conn_status(struct steam_device *steam)
- {
--	return steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE);
-+	int ret;
-+	mutex_lock(&steam->report_mutex);
-+	ret = steam_send_report_byte(steam, STEAM_CMD_DONGLE_GET_STATE);
-+	mutex_unlock(&steam->report_mutex);
-+	return ret;
- }
-
- /*
-@@ -397,6 +406,7 @@ static inline int steam_request_conn_status(struct steam_device *steam)
- static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad,
- 				u16 duration, u16 interval, u16 count, u8 gain)
- {
-+	int ret;
- 	u8 report[10] = {STEAM_CMD_HAPTIC_PULSE, 8};
-
- 	/* Left and right are swapped on this report for legacy reasons */
-@@ -412,13 +422,17 @@ static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad,
- 	report[8] = count >> 8;
- 	report[9] = gain;
-
--	return steam_send_report(steam, report, sizeof(report));
-+	mutex_lock(&steam->report_mutex);
-+	ret = steam_send_report(steam, report, sizeof(report));
-+	mutex_unlock(&steam->report_mutex);
-+	return ret;
- }
-
- static inline int steam_haptic_rumble(struct steam_device *steam,
- 				u16 intensity, u16 left_speed, u16 right_speed,
- 				u8 left_gain, u8 right_gain)
- {
-+	int ret;
- 	u8 report[11] = {STEAM_CMD_HAPTIC_RUMBLE, 9};
-
- 	report[3] = intensity & 0xFF;
-@@ -430,7 +444,10 @@ static inline int steam_haptic_rumble(struct steam_device *steam,
- 	report[9] = left_gain;
- 	report[10] = right_gain;
-
--	return steam_send_report(steam, report, sizeof(report));
-+	mutex_lock(&steam->report_mutex);
-+	ret = steam_send_report(steam, report, sizeof(report));
-+	mutex_unlock(&steam->report_mutex);
-+	return ret;
- }
-
- static void steam_haptic_rumble_cb(struct work_struct *work)
-@@ -460,6 +477,7 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- 		enable = false;
-
- 	if (enable) {
-+		mutex_lock(&steam->report_mutex);
- 		/* enable esc, enter, cursors */
- 		steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS);
- 		/* enable mouse */
-@@ -467,9 +485,11 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- 		steam_write_registers(steam,
- 			STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x01, /* enable smooth */
- 			0);
-+		mutex_unlock(&steam->report_mutex);
-
- 		cancel_delayed_work_sync(&steam->heartbeat);
- 	} else {
-+		mutex_lock(&steam->report_mutex);
- 		/* disable esc, enter, cursor */
- 		steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS);
-
-@@ -481,18 +501,19 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- 				STEAM_REG_LEFT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
- 				STEAM_REG_RIGHT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable clicky pad */
- 				0);
-+			mutex_unlock(&steam->report_mutex);
- 			/*
- 			 * The Steam Deck has a watchdog that automatically enables
- 			 * lizard mode if it doesn't see any traffic for too long
- 			 */
--			if (!work_busy(&steam->heartbeat.work))
--				schedule_delayed_work(&steam->heartbeat, 5 * HZ);
-+			schedule_delayed_work(&steam->heartbeat, 5 * HZ);
- 		} else {
- 			steam_write_registers(steam,
- 				STEAM_REG_SMOOTH_ABSOLUTE_MOUSE, 0x00, /* disable smooth */
- 				STEAM_REG_LEFT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
- 				STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
- 				0);
-+			mutex_unlock(&steam->report_mutex);
- 		}
- 	}
- }
-@@ -500,22 +521,29 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable)
- static int steam_input_open(struct input_dev *dev)
- {
- 	struct steam_device *steam = input_get_drvdata(dev);
-+	unsigned long flags;
-+	bool set_lizard_mode;
-
--	mutex_lock(&steam->mutex);
--	if (!steam->client_opened && lizard_mode)
-+	spin_lock_irqsave(&steam->lock, flags);
-+	set_lizard_mode = !steam->client_opened && lizard_mode;
-+	spin_unlock_irqrestore(&steam->lock, flags);
-+	if (set_lizard_mode)
- 		steam_set_lizard_mode(steam, false);
--	mutex_unlock(&steam->mutex);
-+
- 	return 0;
- }
-
- static void steam_input_close(struct input_dev *dev)
- {
- 	struct steam_device *steam = input_get_drvdata(dev);
-+	unsigned long flags;
-+	bool set_lizard_mode;
-
--	mutex_lock(&steam->mutex);
--	if (!steam->client_opened && lizard_mode)
-+	spin_lock_irqsave(&steam->lock, flags);
-+	set_lizard_mode = !steam->client_opened && lizard_mode;
-+	spin_unlock_irqrestore(&steam->lock, flags);
-+	if (set_lizard_mode)
- 		steam_set_lizard_mode(steam, true);
--	mutex_unlock(&steam->mutex);
- }
-
- static enum power_supply_property steam_battery_props[] = {
-@@ -760,6 +788,7 @@ static int steam_register(struct steam_device *steam)
- {
- 	int ret;
- 	bool client_opened;
-+	unsigned long flags;
-
- 	/*
- 	 * This function can be called several times in a row with the
-@@ -772,11 +801,9 @@ static int steam_register(struct steam_device *steam)
- 		 * Unlikely, but getting the serial could fail, and it is not so
- 		 * important, so make up a serial number and go on.
- 		 */
--		mutex_lock(&steam->mutex);
- 		if (steam_get_serial(steam) < 0)
- 			strscpy(steam->serial_no, "XXXXXXXXXX",
- 					sizeof(steam->serial_no));
--		mutex_unlock(&steam->mutex);
-
- 		hid_info(steam->hdev, "Steam Controller '%s' connected",
- 				steam->serial_no);
-@@ -791,11 +818,11 @@ static int steam_register(struct steam_device *steam)
- 		mutex_unlock(&steam_devices_lock);
- 	}
-
--	mutex_lock(&steam->mutex);
-+	spin_lock_irqsave(&steam->lock, flags);
- 	client_opened = steam->client_opened;
-+	spin_unlock_irqrestore(&steam->lock, flags);
- 	if (!client_opened)
- 		steam_set_lizard_mode(steam, lizard_mode);
--	mutex_unlock(&steam->mutex);
-
- 	if (!client_opened)
- 		ret = steam_input_register(steam);
-@@ -847,16 +874,21 @@ static void steam_mode_switch_cb(struct work_struct *work)
- {
- 	struct steam_device *steam = container_of(to_delayed_work(work),
- 							struct steam_device, mode_switch);
-+	unsigned long flags;
-+	bool client_opened;
- 	steam->gamepad_mode = !steam->gamepad_mode;
- 	if (!lizard_mode)
- 		return;
-
--	mutex_lock(&steam->mutex);
- 	if (steam->gamepad_mode)
- 		steam_set_lizard_mode(steam, false);
--	else if (!steam->client_opened)
--		steam_set_lizard_mode(steam, lizard_mode);
--	mutex_unlock(&steam->mutex);
-+	else {
-+		spin_lock_irqsave(&steam->lock, flags);
-+		client_opened = steam->client_opened;
-+		spin_unlock_irqrestore(&steam->lock, flags);
-+		if (!client_opened)
-+			steam_set_lizard_mode(steam, lizard_mode);
-+	}
-
- 	steam_haptic_pulse(steam, STEAM_PAD_RIGHT, 0x190, 0, 1, 0);
- 	if (steam->gamepad_mode) {
-@@ -889,16 +921,21 @@ static void steam_lizard_mode_heartbeat(struct work_struct *work)
- {
- 	struct steam_device *steam = container_of(work, struct steam_device,
- 							heartbeat.work);
-+	bool client_opened;
-+	unsigned long flags;
-
--	mutex_lock(&steam->mutex);
--	if (!steam->client_opened && steam->client_hdev) {
-+	spin_lock_irqsave(&steam->lock, flags);
-+	client_opened = steam->client_opened;
-+	spin_unlock_irqrestore(&steam->lock, flags);
-+	if (!client_opened) {
-+		mutex_lock(&steam->report_mutex);
- 		steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS);
- 		steam_write_registers(steam,
- 			STEAM_REG_RIGHT_TRACKPAD_MODE, STEAM_TRACKPAD_NONE, /* disable mouse */
- 			0);
-+		mutex_unlock(&steam->report_mutex);
- 		schedule_delayed_work(&steam->heartbeat, 5 * HZ);
- 	}
--	mutex_unlock(&steam->mutex);
- }
-
- static int steam_client_ll_parse(struct hid_device *hdev)
-@@ -921,10 +958,11 @@ static void steam_client_ll_stop(struct hid_device *hdev)
- static int steam_client_ll_open(struct hid_device *hdev)
- {
- 	struct steam_device *steam = hdev->driver_data;
-+	unsigned long flags;
-
--	mutex_lock(&steam->mutex);
-+	spin_lock_irqsave(&steam->lock, flags);
- 	steam->client_opened = true;
--	mutex_unlock(&steam->mutex);
-+	spin_unlock_irqrestore(&steam->lock, flags);
-
- 	steam_input_unregister(steam);
-
-@@ -939,14 +977,12 @@ static void steam_client_ll_close(struct hid_device *hdev)
- 	bool connected;
-
- 	spin_lock_irqsave(&steam->lock, flags);
--	connected = steam->connected;
-+	steam->client_opened = false;
-+	connected = steam->connected && !steam->client_opened;
- 	spin_unlock_irqrestore(&steam->lock, flags);
-
--	mutex_lock(&steam->mutex);
--	steam->client_opened = false;
- 	if (connected)
- 		steam_set_lizard_mode(steam, lizard_mode);
--	mutex_unlock(&steam->mutex);
-
- 	if (connected)
- 		steam_input_register(steam);
-@@ -1035,7 +1071,7 @@ static int steam_probe(struct hid_device *hdev,
- 	steam->hdev = hdev;
- 	hid_set_drvdata(hdev, steam);
- 	spin_lock_init(&steam->lock);
--	mutex_init(&steam->mutex);
-+	mutex_init(&steam->report_mutex);
- 	steam->quirks = id->driver_data;
- 	INIT_WORK(&steam->work_connect, steam_work_connect_cb);
- 	INIT_DELAYED_WORK(&steam->mode_switch, steam_mode_switch_cb);
-@@ -1043,13 +1079,6 @@ static int steam_probe(struct hid_device *hdev,
- 	INIT_DEFERRABLE_WORK(&steam->heartbeat, steam_lizard_mode_heartbeat);
- 	INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb);
-
--	steam->client_hdev = steam_create_client_hid(hdev);
--	if (IS_ERR(steam->client_hdev)) {
--		ret = PTR_ERR(steam->client_hdev);
--		goto client_hdev_fail;
--	}
--	steam->client_hdev->driver_data = steam;
--
- 	/*
- 	 * With the real steam controller interface, do not connect hidraw.
- 	 * Instead, create the client_hid and connect that.
-@@ -1058,10 +1087,6 @@ static int steam_probe(struct hid_device *hdev,
- 	if (ret)
- 		goto hid_hw_start_fail;
-
--	ret = hid_add_device(steam->client_hdev);
--	if (ret)
--		goto client_hdev_add_fail;
--
- 	ret = hid_hw_open(hdev);
- 	if (ret) {
- 		hid_err(hdev,
-@@ -1087,15 +1112,26 @@ static int steam_probe(struct hid_device *hdev,
- 		}
- 	}
-
-+	steam->client_hdev = steam_create_client_hid(hdev);
-+	if (IS_ERR(steam->client_hdev)) {
-+		ret = PTR_ERR(steam->client_hdev);
-+		goto client_hdev_fail;
-+	}
-+	steam->client_hdev->driver_data = steam;
-+
-+	ret = hid_add_device(steam->client_hdev);
-+	if (ret)
-+		goto client_hdev_add_fail;
-+
- 	return 0;
-
--input_register_fail:
--hid_hw_open_fail:
- client_hdev_add_fail:
- 	hid_hw_stop(hdev);
--hid_hw_start_fail:
--	hid_destroy_device(steam->client_hdev);
- client_hdev_fail:
-+	hid_destroy_device(steam->client_hdev);
-+input_register_fail:
-+hid_hw_open_fail:
-+hid_hw_start_fail:
- 	cancel_work_sync(&steam->work_connect);
- 	cancel_delayed_work_sync(&steam->heartbeat);
- 	cancel_delayed_work_sync(&steam->mode_switch);
-@@ -1115,14 +1151,12 @@ static void steam_remove(struct hid_device *hdev)
- 		return;
- 	}
-
-+	cancel_delayed_work_sync(&steam->heartbeat);
-+	cancel_delayed_work_sync(&steam->mode_switch);
-+	cancel_work_sync(&steam->work_connect);
- 	hid_destroy_device(steam->client_hdev);
--	mutex_lock(&steam->mutex);
- 	steam->client_hdev = NULL;
- 	steam->client_opened = false;
--	cancel_delayed_work_sync(&steam->heartbeat);
--	mutex_unlock(&steam->mutex);
--	cancel_work_sync(&steam->work_connect);
--	cancel_delayed_work_sync(&steam->mode_switch);
- 	if (steam->quirks & STEAM_QUIRK_WIRELESS) {
- 		hid_info(hdev, "Steam wireless receiver disconnected");
- 	}
-@@ -1597,10 +1631,8 @@ static int steam_param_set_lizard_mode(const char *val,
-
- 	mutex_lock(&steam_devices_lock);
- 	list_for_each_entry(steam, &steam_devices, list) {
--		mutex_lock(&steam->mutex);
- 		if (!steam->client_opened)
- 			steam_set_lizard_mode(steam, lizard_mode);
--		mutex_unlock(&steam->mutex);
- 	}
- 	mutex_unlock(&steam_devices_lock);
- 	return 0;
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Wed, 10 May 2023 17:27:12 -0700
-Subject: [PATCH 06/10] HID: hid-steam: Make client_opened a counter
-
-The client_opened variable was used to track if the hidraw was opened by any
-clients to silence keyboard/mouse events while opened. However, there was no
-counting of how many clients were opened, so opening two at the same time and
-then closing one would fool the driver into thinking it had no remaining opened
-clients.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
----
- drivers/hid/hid-steam.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
-index 845ca71b8bd3a..0c2fe51b29bc1 100644
---- a/drivers/hid/hid-steam.c
-+++ b/drivers/hid/hid-steam.c
-@@ -215,7 +215,7 @@ struct steam_device {
- 	spinlock_t lock;
- 	struct hid_device *hdev, *client_hdev;
- 	struct mutex report_mutex;
--	bool client_opened;
-+	unsigned long client_opened;
- 	struct input_dev __rcu *input;
- 	unsigned long quirks;
- 	struct work_struct work_connect;
-@@ -787,7 +787,7 @@ static void steam_battery_unregister(struct steam_device *steam)
- static int steam_register(struct steam_device *steam)
- {
- 	int ret;
--	bool client_opened;
-+	unsigned long client_opened;
- 	unsigned long flags;
-
- 	/*
-@@ -961,7 +961,7 @@ static int steam_client_ll_open(struct hid_device *hdev)
- 	unsigned long flags;
-
- 	spin_lock_irqsave(&steam->lock, flags);
--	steam->client_opened = true;
-+	steam->client_opened++;
- 	spin_unlock_irqrestore(&steam->lock, flags);
-
- 	steam_input_unregister(steam);
-@@ -977,7 +977,7 @@ static void steam_client_ll_close(struct hid_device *hdev)
- 	bool connected;
-
- 	spin_lock_irqsave(&steam->lock, flags);
--	steam->client_opened = false;
-+	steam->client_opened--;
- 	connected = steam->connected && !steam->client_opened;
- 	spin_unlock_irqrestore(&steam->lock, flags);
-
-@@ -1156,7 +1156,7 @@ static void steam_remove(struct hid_device *hdev)
- 	cancel_work_sync(&steam->work_connect);
- 	hid_destroy_device(steam->client_hdev);
- 	steam->client_hdev = NULL;
--	steam->client_opened = false;
-+	steam->client_opened = 0;
- 	if (steam->quirks & STEAM_QUIRK_WIRELESS) {
- 		hid_info(hdev, "Steam wireless receiver disconnected");
- 	}
---
-2.41.0
-
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vicki Pfau <vi@endrift.com>
-Date: Thu, 18 May 2023 18:00:35 -0700
-Subject: [PATCH 07/10] HID: hid-steam: Better handling of serial number length
-
-The second byte of the GET_STRING_ATTRIB report is a length, so we should set
-the size of the buffer to be the size we're actually requesting, and only
-reject the reply if the length out is nonsensical.
-
-Signed-off-by: Vicki Pfau <vi@endrift.com>
----
- drivers/hid/hid-steam.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
-index 0c2fe51b29bc1..92e3e1052fa42 100644
---- a/drivers/hid/hid-steam.c
-+++ b/drivers/hid/hid-steam.c
-@@ -208,7 +208,7 @@ static LIST_HEAD(steam_devices);
- #define STEAM_PAD_BOTH 2
-
- /* Other random constants */
--#define STEAM_SERIAL_LEN 10
-+#define STEAM_SERIAL_LEN 0x15
-
- struct steam_device {
- 	struct list_head list;
-@@ -359,10 +359,10 @@ static int steam_get_serial(struct steam_device *steam)
- {
- 	/*
- 	 * Send: 0xae 0x15 0x01
--	 * Recv: 0xae 0x15 0x01 serialnumber (10 chars)
-+	 * Recv: 0xae 0x15 0x01 serialnumber
- 	 */
- 	int ret = 0;
--	u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, 0x15, STEAM_ATTRIB_STR_UNIT_SERIAL};
-+	u8 cmd[] = {STEAM_CMD_GET_STRING_ATTRIB, sizeof(steam->serial_no), STEAM_ATTRIB_STR_UNIT_SERIAL};
- 	u8 reply[3 + STEAM_SERIAL_LEN + 1];
-
- 	mutex_lock(&steam->report_mutex);
-@@ -372,12 +372,12 @@ static int steam_get_serial(struct steam_device *steam)
- 	ret = steam_recv_report(steam, reply, sizeof(reply));
- 	if (ret < 0)
- 		goto out;
--	if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) {
-+	if (reply[0] != 0xae || reply[1] < 1 || reply[1] > sizeof(steam->serial_no) || reply[2] != STEAM_ATTRIB_STR_UNIT_SERIAL) {
- 		ret = -EIO;
- 		goto out;
- 	}
- 	reply[3 + STEAM_SERIAL_LEN] = 0;
--	strscpy(steam->serial_no, reply + 3, sizeof(steam->serial_no));
-+	strscpy(steam->serial_no, reply + 3, reply[1]);
- out:
- 	mutex_unlock(&steam->report_mutex);
- 	return ret;
---
-2.41.0
diff --git a/patches/nobara/uinput.patch b/patches/nobara/uinput.patch
deleted file mode 100644
index c5666a8..0000000
--- a/patches/nobara/uinput.patch
+++ /dev/null
@@ -1,133 +0,0 @@
----
- drivers/input/misc/uinput.c | 48 +++++++++++++++++++++++++------------
- include/uapi/linux/uinput.h |  5 ++++
- 2 files changed, 38 insertions(+), 15 deletions(-)
-
-
-diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
-index 84051f20b18a..2c3180370a02 100644
---- a/drivers/input/misc/uinput.c
-+++ b/drivers/input/misc/uinput.c
-@@ -20,6 +20,7 @@ 
-  */
- #include <uapi/linux/uinput.h>
- #include <linux/poll.h>
-+#include <linux/printk.h>
- #include <linux/sched.h>
- #include <linux/slab.h>
- #include <linux/module.h>
-@@ -280,7 +281,7 @@  static int uinput_dev_flush(struct input_dev *dev, struct file *file)
- 
- static void uinput_destroy_device(struct uinput_device *udev)
- {
--	const char *name, *phys;
-+	const char *name, *phys, *uniq;
- 	struct input_dev *dev = udev->dev;
- 	enum uinput_state old_state = udev->state;
- 
-@@ -289,6 +290,7 @@  static void uinput_destroy_device(struct uinput_device *udev)
- 	if (dev) {
- 		name = dev->name;
- 		phys = dev->phys;
-+		uniq = dev->uniq;
- 		if (old_state == UIST_CREATED) {
- 			uinput_flush_requests(udev);
- 			input_unregister_device(dev);
-@@ -297,6 +299,7 @@  static void uinput_destroy_device(struct uinput_device *udev)
- 		}
- 		kfree(name);
- 		kfree(phys);
-+		kfree(uniq);
- 		udev->dev = NULL;
- 	}
- }
-@@ -831,6 +834,24 @@  static int uinput_str_to_user(void __user *dest, const char *str,
- 	return ret ? -EFAULT : len;
- }
- 
-+static int uinput_get_user_str(struct uinput_device *udev, const char **kptr,
-+			       const char *uptr, unsigned int size)
-+{
-+	char *tmp;
-+
-+	if (udev->state == UIST_CREATED)
-+		return -EINVAL;
-+
-+	tmp = strndup_user(uptr, size);
-+	if (IS_ERR(tmp))
-+		return PTR_ERR(tmp);
-+
-+	kfree(*kptr);
-+	*kptr = tmp;
-+
-+	return 0;
-+}
-+
- static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
- 				 unsigned long arg, void __user *p)
- {
-@@ -839,7 +860,6 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
- 	struct uinput_ff_upload ff_up;
- 	struct uinput_ff_erase  ff_erase;
- 	struct uinput_request   *req;
--	char			*phys;
- 	const char		*name;
- 	unsigned int		size;
- 
-@@ -916,19 +936,8 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
- 		goto out;
- 
- 	case UI_SET_PHYS:
--		if (udev->state == UIST_CREATED) {
--			retval = -EINVAL;
--			goto out;
--		}
--
--		phys = strndup_user(p, 1024);
--		if (IS_ERR(phys)) {
--			retval = PTR_ERR(phys);
--			goto out;
--		}
--
--		kfree(udev->dev->phys);
--		udev->dev->phys = phys;
-+		pr_warn_once("uinput: UI_SET_PHYS is deprecated. Use UI_SET_PHYS_STR");
-+		retval = uinput_get_user_str(udev, &udev->dev->phys, p, 1024);
- 		goto out;
- 
- 	case UI_BEGIN_FF_UPLOAD:
-@@ -1023,6 +1032,15 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
- 	case UI_ABS_SETUP & ~IOCSIZE_MASK:
- 		retval = uinput_abs_setup(udev, p, size);
- 		goto out;
-+
-+	case UI_SET_PHYS_STR(0):
-+		retval = uinput_get_user_str(udev, &udev->dev->phys, p, size);
-+		goto out;
-+
-+	case UI_SET_UNIQ_STR(0):
-+		retval = uinput_get_user_str(udev, &udev->dev->uniq, p, size);
-+		goto out;
-+
- 	}
- 
- 	retval = -EINVAL;
-diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h
-index c9e677e3af1d..84d4fa142830 100644
---- a/include/uapi/linux/uinput.h
-+++ b/include/uapi/linux/uinput.h
-@@ -142,9 +142,14 @@  struct uinput_abs_setup {
- #define UI_SET_LEDBIT		_IOW(UINPUT_IOCTL_BASE, 105, int)
- #define UI_SET_SNDBIT		_IOW(UINPUT_IOCTL_BASE, 106, int)
- #define UI_SET_FFBIT		_IOW(UINPUT_IOCTL_BASE, 107, int)
-+
-+/* DEPRECATED: Data size is ambiguous. Use UI_SET_PHYS_STR instead. */
- #define UI_SET_PHYS		_IOW(UINPUT_IOCTL_BASE, 108, char*)
-+
- #define UI_SET_SWBIT		_IOW(UINPUT_IOCTL_BASE, 109, int)
- #define UI_SET_PROPBIT		_IOW(UINPUT_IOCTL_BASE, 110, int)
-+#define UI_SET_PHYS_STR(len)	_IOC(_IOC_WRITE, UINPUT_IOCTL_BASE, 111, len)
-+#define UI_SET_UNIQ_STR(len)	_IOC(_IOC_WRITE, UINPUT_IOCTL_BASE, 112, len)
- 
- #define UI_BEGIN_FF_UPLOAD	_IOWR(UINPUT_IOCTL_BASE, 200, struct uinput_ff_upload)
- #define UI_END_FF_UPLOAD	_IOW(UINPUT_IOCTL_BASE, 201, struct uinput_ff_upload)
diff --git a/patches/series b/patches/series
deleted file mode 100644
index 6d09e36..0000000
--- a/patches/series
+++ /dev/null
@@ -1,15 +0,0 @@
-cachyos/0001-cachyos-base-all.patch
-cachyos/0001-bore-cachy.patch
-# nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
-# nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch
-# nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch
-# nobara/0001-Set-amdgpu.ppfeaturemask-0xffffffff-as-default.patch
-# nobara/0001-acpi-proc-idle-skip-dummy-wait.patch
-# nobara/0001-add-acpi_call.patch
-# nobara/amdgpu-si-cik-default.patch
-# nobara/lenovo-legion-laptop.patch
-# asuslinux/0001-platform-x86-asus-wmi-add-support-for-2024-ROG-Mini-.patch
-# asuslinux/0002-platform-x86-asus-wmi-add-support-for-Vivobook-GPU-M.patch
-# asuslinux/0003-platform-x86-asus-wmi-add-support-variant-of-TUF-RGB.patch
-# asuslinux/0004-platform-x86-asus-wmi-support-toggling-POST-sound.patch
-# asuslinux/0005-platform-x86-asus-wmi-store-a-min-default-for-ppt-op.patch
diff --git a/release.sh b/release.sh
index f38fc8e..1e45a35 100755
--- a/release.sh
+++ b/release.sh
@@ -1,2 +1,2 @@
 # send debs to server
-rsync -azP --include './' --include '*.deb' --exclude '*' ./output/ ferreo@direct.pika-os.com:/srv/www/cockatiel-incoming/
\ No newline at end of file
+rsync -azP --include './' --include '*.deb' --exclude '*' ./output/ ferreo@direct.pika-os.com:/srv/www/cockatiel-incoming/⏎            
\ No newline at end of file
diff --git a/scripts/build.sh b/scripts-v3/build.sh
similarity index 100%
rename from scripts/build.sh
rename to scripts-v3/build.sh
diff --git a/config b/scripts-v3/config
similarity index 100%
rename from config
rename to scripts-v3/config
diff --git a/scripts-v3/config.sh b/scripts-v3/config.sh
new file mode 100755
index 0000000..31aec28
--- /dev/null
+++ b/scripts-v3/config.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+echo "Pika Kernel - Applying configuration"
+
+scripts-v3/config -k -d CONFIG_GENERIC_CPU
+scripts-v3/config -k -e CONFIG_GENERIC_CPU3
+scripts-v3/config -e CACHY
+scripts-v3/config -e SCHED_BORE
+
+scripts-v3/config -e HZ_300 --set-val HZ 750
+scripts-v3/config -d HZ_PERIODIC -d NO_HZ_IDLE -d CONTEXT_TRACKING_FORCE -e NO_HZ_FULL_NODEF -e NO_HZ_FULL -e NO_HZ -e NO_HZ_COMMON -e CONTEXT_TRACKING
+scripts-v3/config -e PREEMPT_BUILD -d PREEMPT_NONE -d PREEMPT_VOLUNTARY -e PREEMPT -e PREEMPT_COUNT -e PREEMPTION -e PREEMPT_DYNAMIC
+
+scripts-v3/config -d CC_OPTIMIZE_FOR_PERFORMANCE \
+            -e CC_OPTIMIZE_FOR_PERFORMANCE_O3
+
+scripts-v3/config -m TCP_CONG_CUBIC \
+            -d DEFAULT_CUBIC \
+            -e TCP_CONG_BBR \
+            -e DEFAULT_BBR \
+            --set-str DEFAULT_TCP_CONG bbr
+
+scripts-v3/config -m NET_SCH_FQ_CODEL \
+            -e NET_SCH_FQ \
+            -d DEFAULT_FQ_CODEL \
+            -e DEFAULT_FQ \
+            --set-str DEFAULT_NET_SCH fq
+
+scripts-v3/config -e LRU_GEN -e LRU_GEN_ENABLED -d LRU_GEN_STATS
+
+scripts-v3/config -d TRANSPARENT_HUGEPAGE_MADVISE -e TRANSPARENT_HUGEPAGE_ALWAYS
+
+scripts-v3/config -e PER_VMA_LOCK -d PER_VMA_LOCK_STATS
+
+scripts-v3/config -e DAMON \
+            -e DAMON_VADDR \
+            -e DAMON_DBGFS \
+            -e DAMON_SYSFS \
+            -e DAMON_PADDR \
+            -e DAMON_RECLAIM \
+            -e DAMON_LRU_SORT
+
+scripts-v3/config --set-val MODULE_COMPRESS_ZSTD_LEVEL 19 -e MODULE_COMPRESS_ZSTD_ULTRA --set-val MODULE_COMPRESS_ZSTD_LEVEL_ULTRA 22 --set-val ZSTD_COMP_VAL 22
+
+scripts-v3/config -e EFI_HANDOVER_PROTOCOL
+
+scripts-v3/config -e USER_NS
+
+make prepare
diff --git a/scripts/output.sh b/scripts-v3/output.sh
similarity index 100%
rename from scripts/output.sh
rename to scripts-v3/output.sh
diff --git a/scripts-v3/patch.sh b/scripts-v3/patch.sh
new file mode 100755
index 0000000..fdfa7af
--- /dev/null
+++ b/scripts-v3/patch.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo "Pika Kernel - Applying patches"
+
+if [ -f ../patches/series ]
+then
+    for i in $(cat ../patches/series | grep -v '^#') ; do echo "Applying Patch: $i" && patch -Np1 -i ../patches/$i || bash -c "echo "Applying Patch $i Failed!" && exit 2"; done
+fi
\ No newline at end of file
diff --git a/scripts/source.sh b/scripts-v3/source.sh
similarity index 100%
rename from scripts/source.sh
rename to scripts-v3/source.sh
diff --git a/scripts/config.sh b/scripts/config.sh
deleted file mode 100755
index b1719da..0000000
--- a/scripts/config.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-echo "Pika Kernel - Applying configuration"
-
-cp ../config .config
-
-scripts/config -k -d CONFIG_GENERIC_CPU
-scripts/config -k -e CONFIG_GENERIC_CPU3
-scripts/config -e CACHY
-scripts/config -e SCHED_BORE
-
-scripts/config -e HZ_300 --set-val HZ 750
-scripts/config -d HZ_PERIODIC -d NO_HZ_IDLE -d CONTEXT_TRACKING_FORCE -e NO_HZ_FULL_NODEF -e NO_HZ_FULL -e NO_HZ -e NO_HZ_COMMON -e CONTEXT_TRACKING
-scripts/config -e PREEMPT_BUILD -d PREEMPT_NONE -d PREEMPT_VOLUNTARY -e PREEMPT -e PREEMPT_COUNT -e PREEMPTION -e PREEMPT_DYNAMIC
-
-scripts/config -d CC_OPTIMIZE_FOR_PERFORMANCE \
-            -e CC_OPTIMIZE_FOR_PERFORMANCE_O3
-
-scripts/config -m TCP_CONG_CUBIC \
-            -d DEFAULT_CUBIC \
-            -e TCP_CONG_BBR \
-            -e DEFAULT_BBR \
-            --set-str DEFAULT_TCP_CONG bbr
-
-scripts/config -m NET_SCH_FQ_CODEL \
-            -e NET_SCH_FQ \
-            -d DEFAULT_FQ_CODEL \
-            -e DEFAULT_FQ \
-            --set-str DEFAULT_NET_SCH fq
-
-scripts/config -e LRU_GEN -e LRU_GEN_ENABLED -d LRU_GEN_STATS
-
-scripts/config -d TRANSPARENT_HUGEPAGE_MADVISE -e TRANSPARENT_HUGEPAGE_ALWAYS
-
-scripts/config -e PER_VMA_LOCK -d PER_VMA_LOCK_STATS
-
-scripts/config -e DAMON \
-            -e DAMON_VADDR \
-            -e DAMON_DBGFS \
-            -e DAMON_SYSFS \
-            -e DAMON_PADDR \
-            -e DAMON_RECLAIM \
-            -e DAMON_LRU_SORT
-
-scripts/config --set-val MODULE_COMPRESS_ZSTD_LEVEL 19 -e MODULE_COMPRESS_ZSTD_ULTRA --set-val MODULE_COMPRESS_ZSTD_LEVEL_ULTRA 22 --set-val ZSTD_COMP_VAL 22
-
-scripts/config -e EFI_HANDOVER_PROTOCOL
-
-scripts/config -e USER_NS
-
-make prepare
diff --git a/scripts/patch.sh b/scripts/patch.sh
deleted file mode 100755
index a54d3e4..0000000
--- a/scripts/patch.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-echo "Pika Kernel - Applying patches"
-
-for i in $(cat ../patches/series | grep -v '^#') ; do echo "Applying Patch: $i" && patch -Np1 -i ../patches/$i || bash -c "echo "Applying Patch $i Failed!" && exit 2"; done