From fc878d9d3b7685b5508da9fa4f13e80ecce3b62c Mon Sep 17 00:00:00 2001
From: Kernel Packages Group <>
Date: Mon, 4 Nov 2024 21:25:11 +0100
Subject: [PATCH] Initial commit

---
 .github/build-canary-v3                |     1 +
 .github/build-nest-v3                  |     1 +
 .github/release-canary-v3              |     1 +
 .github/release-nest-v3                |     1 +
 .github/workflows/build-canaryv3.yml   |    34 +
 .github/workflows/build-nestv3.yml     |    34 +
 .github/workflows/release-canaryv3.yml |    37 +
 .github/workflows/release-nestv3.yml   |    37 +
 LICENSE.md                             |    24 +
 README.md                              |     8 +
 VERSION                                |     1 +
 config                                 | 12032 ++++++
 main.sh                                |    10 +
 patches/0001-cachyos-base-all.patch    | 48099 +++++++++++++++++++++++
 patches/0002-sched-ext.patch           | 17925 +++++++++
 patches/0003-bore-cachy-ext.patch      |  1020 +
 patches/series                         |     3 +
 release.sh                             |     2 +
 scripts/build.sh                       |     5 +
 scripts/config.sh                      |    47 +
 scripts/output.sh                      |    11 +
 scripts/patch.sh                       |     8 +
 scripts/source.sh                      |     8 +
 23 files changed, 79349 insertions(+)
 create mode 100644 .github/build-canary-v3
 create mode 100644 .github/build-nest-v3
 create mode 100644 .github/release-canary-v3
 create mode 100644 .github/release-nest-v3
 create mode 100644 .github/workflows/build-canaryv3.yml
 create mode 100644 .github/workflows/build-nestv3.yml
 create mode 100644 .github/workflows/release-canaryv3.yml
 create mode 100644 .github/workflows/release-nestv3.yml
 create mode 100644 LICENSE.md
 create mode 100644 README.md
 create mode 100644 VERSION
 create mode 100644 config
 create mode 100755 main.sh
 create mode 100644 patches/0001-cachyos-base-all.patch
 create mode 100644 patches/0002-sched-ext.patch
 create mode 100644 patches/0003-bore-cachy-ext.patch
 create mode 100644 patches/series
 create mode 100755 release.sh
 create mode 100755 scripts/build.sh
 create mode 100755 scripts/config.sh
 create mode 100755 scripts/output.sh
 create mode 100755 scripts/patch.sh
 create mode 100755 scripts/source.sh

diff --git a/.github/build-canary-v3 b/.github/build-canary-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/build-canary-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/build-nest-v3 b/.github/build-nest-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/build-nest-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/release-canary-v3 b/.github/release-canary-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/release-canary-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/release-nest-v3 b/.github/release-nest-v3
new file mode 100644
index 0000000..56a6051
--- /dev/null
+++ b/.github/release-nest-v3
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/.github/workflows/build-canaryv3.yml b/.github/workflows/build-canaryv3.yml
new file mode 100644
index 0000000..1e70945
--- /dev/null
+++ b/.github/workflows/build-canaryv3.yml
@@ -0,0 +1,34 @@
+name: PikaOS Package Build Only (Canary) (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/build-canary-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ secrets.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ secrets.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./main.sh
diff --git a/.github/workflows/build-nestv3.yml b/.github/workflows/build-nestv3.yml
new file mode 100644
index 0000000..cdd416e
--- /dev/null
+++ b/.github/workflows/build-nestv3.yml
@@ -0,0 +1,34 @@
+name: PikaOS Package Build Only (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/build-nest-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:nestv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ secrets.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ secrets.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./main.sh
diff --git a/.github/workflows/release-canaryv3.yml b/.github/workflows/release-canaryv3.yml
new file mode 100644
index 0000000..ed29aeb
--- /dev/null
+++ b/.github/workflows/release-canaryv3.yml
@@ -0,0 +1,37 @@
+name: PikaOS Package Build & Release (Canary) (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/release-canary-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:canaryv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ secrets.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ secrets.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./main.sh
+
+    - name: Release Package
+      run: ./release.sh
diff --git a/.github/workflows/release-nestv3.yml b/.github/workflows/release-nestv3.yml
new file mode 100644
index 0000000..674b5f1
--- /dev/null
+++ b/.github/workflows/release-nestv3.yml
@@ -0,0 +1,37 @@
+name: PikaOS Package Build & Release (amd64-v3)
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/release-nest-v3'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pikaos-linux/pikaos-builder:nestv3
+      volumes:
+        - /proc:/proc
+      options: --privileged -it
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install SSH key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ secrets.SSH_KEY }}
+        name: id_rsa
+        known_hosts: ${{ secrets.KNOWN_HOSTS }}
+        if_key_exists: replace
+    
+    - name: Update APT Cache
+      run: apt-get update -y
+
+    - name: Build Package
+      run: ./main.sh
+
+    - name: Release Package
+      run: ./release.sh
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..16ff30b
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,24 @@
+MIT License (With DPKG packaging compatibility)
+
+Copyright (c) 2024 PikaOS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Notes:
+The files covered by this license are any files and directories in the root of this repository (including but not limited to: `main.sh`, `release.sh`, and `.github`), with the exception of the `debian` directory and its contents if `debian/copyright` exists, and declares any files or directories as a different LICENSE/COPYRIGHT.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..da0e3bc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+# pika-kernel-builder
+Scripts for building and releasing the Pika kernel, based on Cachy OS kernel patches - <3 them
+
+To add our kernel apt repo do:
+
+wget -q -O - https://ppa.pika-os.com/key.gpg | sudo apt-key add -
+
+sudo add-apt-repository https://ppa.pika-os.com
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..a003776
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+6.11.5
diff --git a/config b/config
new file mode 100644
index 0000000..d68066a
--- /dev/null
+++ b/config
@@ -0,0 +1,12032 @@
+#
+# Automatically generated file; DO NOT EDIT.
+# Linux/x86 6.11.4 Kernel Configuration
+#
+CONFIG_CC_VERSION_TEXT="gcc (GCC) 14.2.1 20240910"
+CONFIG_CC_IS_GCC=y
+CONFIG_GCC_VERSION=140201
+CONFIG_CLANG_VERSION=0
+CONFIG_AS_IS_GNU=y
+CONFIG_AS_VERSION=24300
+CONFIG_LD_IS_BFD=y
+CONFIG_LD_VERSION=24300
+CONFIG_LLD_VERSION=0
+CONFIG_RUST_IS_AVAILABLE=y
+CONFIG_CC_CAN_LINK=y
+CONFIG_CC_CAN_LINK_STATIC=y
+CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
+CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
+CONFIG_TOOLS_SUPPORT_RELR=y
+CONFIG_CC_HAS_ASM_INLINE=y
+CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
+CONFIG_PAHOLE_VERSION=127
+CONFIG_IRQ_WORK=y
+CONFIG_BUILDTIME_TABLE_SORT=y
+CONFIG_THREAD_INFO_IN_TASK=y
+# CONFIG_ECHO_SCHED is not set
+
+#
+# General setup
+#
+# CONFIG_CACHY is not set
+CONFIG_INIT_ENV_ARG_LIMIT=32
+# CONFIG_COMPILE_TEST is not set
+# CONFIG_WERROR is not set
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_BUILD_SALT=""
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KERNEL_XZ=y
+CONFIG_HAVE_KERNEL_LZO=y
+CONFIG_HAVE_KERNEL_LZ4=y
+CONFIG_HAVE_KERNEL_ZSTD=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
+# CONFIG_KERNEL_XZ is not set
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
+CONFIG_KERNEL_ZSTD=y
+CONFIG_DEFAULT_INIT=""
+CONFIG_DEFAULT_HOSTNAME="pikaos"
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_SYSVIPC_COMPAT=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
+CONFIG_WATCH_QUEUE=y
+CONFIG_CROSS_MEMORY_ATTACH=y
+# CONFIG_USELIB is not set
+CONFIG_AUDIT=y
+CONFIG_HAVE_ARCH_AUDITSYSCALL=y
+CONFIG_AUDITSYSCALL=y
+
+#
+# IRQ subsystem
+#
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_IRQ_SHOW=y
+CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_GENERIC_IRQ_MIGRATION=y
+CONFIG_GENERIC_IRQ_INJECTION=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_CHIP=y
+CONFIG_IRQ_DOMAIN=y
+CONFIG_IRQ_SIM=y
+CONFIG_IRQ_DOMAIN_HIERARCHY=y
+CONFIG_GENERIC_MSI_IRQ=y
+CONFIG_IRQ_MSI_IOMMU=y
+CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
+CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
+CONFIG_GENERIC_IRQ_STAT_SNAPSHOT=y
+CONFIG_IRQ_FORCED_THREADING=y
+CONFIG_SPARSE_IRQ=y
+# CONFIG_GENERIC_IRQ_DEBUGFS is not set
+# end of IRQ subsystem
+
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_ARCH_CLOCKSOURCE_INIT=y
+CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE=y
+CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y
+CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y
+CONFIG_CONTEXT_TRACKING=y
+CONFIG_CONTEXT_TRACKING_IDLE=y
+
+#
+# Timers subsystem
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ_COMMON=y
+# CONFIG_HZ_PERIODIC is not set
+# CONFIG_NO_HZ_IDLE is not set
+CONFIG_NO_HZ_FULL=y
+CONFIG_CONTEXT_TRACKING_USER=y
+# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US=100
+# end of Timers subsystem
+
+CONFIG_BPF=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+
+#
+# BPF subsystem
+#
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_UNPRIV_DEFAULT_OFF=y
+# CONFIG_BPF_PRELOAD is not set
+CONFIG_BPF_LSM=y
+# end of BPF subsystem
+
+CONFIG_PREEMPT_BUILD=y
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+CONFIG_PREEMPT_COUNT=y
+CONFIG_PREEMPTION=y
+CONFIG_PREEMPT_DYNAMIC=y
+CONFIG_SCHED_CORE=y
+# CONFIG_SCHED_CLASS_EXT is not set
+
+#
+# CPU/Task time and stats accounting
+#
+CONFIG_VIRT_CPU_ACCOUNTING=y
+CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
+CONFIG_IRQ_TIME_ACCOUNTING=y
+CONFIG_HAVE_SCHED_AVG_IRQ=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
+# CONFIG_PSI_DEFAULT_DISABLED is not set
+# end of CPU/Task time and stats accounting
+
+CONFIG_CPU_ISOLATION=y
+
+#
+# RCU Subsystem
+#
+CONFIG_TREE_RCU=y
+CONFIG_PREEMPT_RCU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_TREE_SRCU=y
+CONFIG_TASKS_RCU_GENERIC=y
+# CONFIG_FORCE_TASKS_RCU is not set
+CONFIG_NEED_TASKS_RCU=y
+CONFIG_TASKS_RCU=y
+# CONFIG_FORCE_TASKS_RUDE_RCU is not set
+CONFIG_TASKS_RUDE_RCU=y
+# CONFIG_FORCE_TASKS_TRACE_RCU is not set
+CONFIG_TASKS_TRACE_RCU=y
+CONFIG_RCU_STALL_COMMON=y
+CONFIG_RCU_NEED_SEGCBLIST=y
+CONFIG_RCU_FANOUT=64
+CONFIG_RCU_FANOUT_LEAF=16
+CONFIG_RCU_BOOST=y
+CONFIG_RCU_BOOST_DELAY=500
+# CONFIG_RCU_EXP_KTHREAD is not set
+CONFIG_RCU_NOCB_CPU=y
+# CONFIG_RCU_NOCB_CPU_DEFAULT_ALL is not set
+# CONFIG_RCU_NOCB_CPU_CB_BOOST is not set
+# CONFIG_TASKS_TRACE_RCU_READ_MB is not set
+CONFIG_RCU_LAZY=y
+CONFIG_RCU_LAZY_DEFAULT_OFF=y
+CONFIG_RCU_DOUBLE_CHECK_CB_TIME=y
+# end of RCU Subsystem
+
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IKHEADERS=m
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
+CONFIG_PRINTK_INDEX=y
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+
+#
+# Scheduler features
+#
+CONFIG_UCLAMP_TASK=y
+CONFIG_UCLAMP_BUCKETS_COUNT=5
+# CONFIG_SCHED_ALT is not set
+# CONFIG_SCHED_BMQ is not set
+# CONFIG_SCHED_PDS is not set
+# end of Scheduler features
+
+CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
+CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
+CONFIG_CC_HAS_INT128=y
+CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
+CONFIG_GCC10_NO_ARRAY_BOUNDS=y
+CONFIG_CC_NO_ARRAY_BOUNDS=y
+CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
+CONFIG_CC_NO_STRINGOP_OVERFLOW=y
+CONFIG_ARCH_SUPPORTS_INT128=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
+CONFIG_SLAB_OBJ_EXT=y
+CONFIG_CGROUPS=y
+CONFIG_PAGE_COUNTER=y
+# CONFIG_CGROUP_FAVOR_DYNMODS is not set
+CONFIG_MEMCG=y
+# CONFIG_MEMCG_V1 is not set
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_WRITEBACK=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_GROUP_SCHED_WEIGHT=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_EXT_GROUP_SCHED=y
+CONFIG_SCHED_MM_CID=y
+CONFIG_UCLAMP_TASK_GROUP=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+# CONFIG_CGROUP_DEBUG is not set
+CONFIG_SOCK_CGROUP_DATA=y
+CONFIG_NAMESPACES=y
+CONFIG_UTS_NS=y
+CONFIG_TIME_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_USER_NS_UNPRIVILEGED=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+# CONFIG_SCHED_BORE is not set
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_XZ=y
+CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
+CONFIG_RD_ZSTD=y
+CONFIG_BOOT_CONFIG=y
+# CONFIG_BOOT_CONFIG_FORCE is not set
+# CONFIG_BOOT_CONFIG_EMBED is not set
+CONFIG_INITRAMFS_PRESERVE_MTIME=y
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_LD_ORPHAN_WARN=y
+CONFIG_LD_ORPHAN_WARN_LEVEL="warn"
+CONFIG_SYSCTL=y
+CONFIG_HAVE_UID16=y
+CONFIG_SYSCTL_EXCEPTION_TRACE=y
+CONFIG_HAVE_PCSPKR_PLATFORM=y
+# CONFIG_EXPERT is not set
+CONFIG_UID16=y
+CONFIG_MULTIUSER=y
+CONFIG_SGETMASK_SYSCALL=y
+CONFIG_SYSFS_SYSCALL=y
+CONFIG_FHANDLE=y
+CONFIG_POSIX_TIMERS=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_PCSPKR_PLATFORM=y
+CONFIG_FUTEX=y
+CONFIG_FUTEX_PI=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_IO_URING=y
+CONFIG_ADVISE_SYSCALLS=y
+CONFIG_MEMBARRIER=y
+CONFIG_KCMP=y
+CONFIG_RSEQ=y
+CONFIG_CACHESTAT_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_SELFTEST is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
+CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
+CONFIG_HAVE_PERF_EVENTS=y
+CONFIG_GUEST_PERF_EVENTS=y
+
+#
+# Kernel Performance Events And Counters
+#
+CONFIG_PERF_EVENTS=y
+# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
+# end of Kernel Performance Events And Counters
+
+CONFIG_SYSTEM_DATA_VERIFICATION=y
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+
+#
+# Kexec and crash features
+#
+CONFIG_CRASH_RESERVE=y
+CONFIG_VMCORE_INFO=y
+CONFIG_KEXEC_CORE=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
+# CONFIG_KEXEC_SIG_FORCE is not set
+CONFIG_KEXEC_BZIMAGE_VERIFY_SIG=y
+CONFIG_KEXEC_JUMP=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRASH_HOTPLUG=y
+CONFIG_CRASH_MAX_MEMORY_RANGES=8192
+# end of Kexec and crash features
+# end of General setup
+
+CONFIG_64BIT=y
+CONFIG_X86_64=y
+CONFIG_X86=y
+CONFIG_INSTRUCTION_DECODER=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_MMU=y
+CONFIG_ARCH_MMAP_RND_BITS_MIN=28
+CONFIG_ARCH_MMAP_RND_BITS_MAX=32
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_HAVE_INTEL_TXT=y
+CONFIG_X86_64_SMP=y
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_DYNAMIC_PHYSICAL_MASK=y
+CONFIG_PGTABLE_LEVELS=5
+CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
+
+#
+# Processor type and features
+#
+CONFIG_SMP=y
+CONFIG_X86_X2APIC=y
+CONFIG_X86_POSTED_MSI=y
+CONFIG_X86_MPPARSE=y
+CONFIG_X86_CPU_RESCTRL=y
+CONFIG_X86_FRED=y
+# CONFIG_X86_EXTENDED_PLATFORM is not set
+CONFIG_X86_INTEL_LPSS=y
+CONFIG_X86_AMD_PLATFORM_DEVICE=y
+CONFIG_IOSF_MBI=y
+# CONFIG_IOSF_MBI_DEBUG is not set
+CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_PARAVIRT_XXL=y
+# CONFIG_PARAVIRT_DEBUG is not set
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_X86_HV_CALLBACK_VECTOR=y
+CONFIG_XEN=y
+CONFIG_XEN_PV=y
+CONFIG_XEN_512GB=y
+CONFIG_XEN_PV_SMP=y
+CONFIG_XEN_PV_DOM0=y
+CONFIG_XEN_PVHVM=y
+CONFIG_XEN_PVHVM_SMP=y
+CONFIG_XEN_PVHVM_GUEST=y
+CONFIG_XEN_SAVE_RESTORE=y
+# CONFIG_XEN_DEBUG_FS is not set
+CONFIG_XEN_PVH=y
+CONFIG_XEN_DOM0=y
+CONFIG_XEN_PV_MSR_SAFE=y
+CONFIG_KVM_GUEST=y
+CONFIG_ARCH_CPUIDLE_HALTPOLL=y
+CONFIG_PVH=y
+CONFIG_PARAVIRT_TIME_ACCOUNTING=y
+CONFIG_PARAVIRT_CLOCK=y
+CONFIG_JAILHOUSE_GUEST=y
+CONFIG_ACRN_GUEST=y
+CONFIG_INTEL_TDX_GUEST=y
+# CONFIG_MK8 is not set
+# CONFIG_MK8SSE3 is not set
+# CONFIG_MK10 is not set
+# CONFIG_MBARCELONA is not set
+# CONFIG_MBOBCAT is not set
+# CONFIG_MJAGUAR is not set
+# CONFIG_MBULLDOZER is not set
+# CONFIG_MPILEDRIVER is not set
+# CONFIG_MSTEAMROLLER is not set
+# CONFIG_MEXCAVATOR is not set
+# CONFIG_MZEN is not set
+# CONFIG_MZEN2 is not set
+# CONFIG_MZEN3 is not set
+# CONFIG_MZEN4 is not set
+# CONFIG_MZEN5 is not set
+# CONFIG_MPSC is not set
+# CONFIG_MATOM is not set
+# CONFIG_MCORE2 is not set
+# CONFIG_MNEHALEM is not set
+# CONFIG_MWESTMERE is not set
+# CONFIG_MSILVERMONT is not set
+# CONFIG_MGOLDMONT is not set
+# CONFIG_MGOLDMONTPLUS is not set
+# CONFIG_MSANDYBRIDGE is not set
+# CONFIG_MIVYBRIDGE is not set
+# CONFIG_MHASWELL is not set
+# CONFIG_MBROADWELL is not set
+# CONFIG_MSKYLAKE is not set
+# CONFIG_MSKYLAKEX is not set
+# CONFIG_MCANNONLAKE is not set
+# CONFIG_MICELAKE is not set
+# CONFIG_MCASCADELAKE is not set
+# CONFIG_MCOOPERLAKE is not set
+# CONFIG_MTIGERLAKE is not set
+# CONFIG_MSAPPHIRERAPIDS is not set
+# CONFIG_MROCKETLAKE is not set
+# CONFIG_MALDERLAKE is not set
+# CONFIG_MRAPTORLAKE is not set
+# CONFIG_MMETEORLAKE is not set
+# CONFIG_MEMERALDRAPIDS is not set
+CONFIG_GENERIC_CPU=y
+# CONFIG_MNATIVE_INTEL is not set
+# CONFIG_MNATIVE_AMD is not set
+CONFIG_SUPPORT_MARCH_CODEVERS=y
+CONFIG_X86_64_VERSION=1
+CONFIG_X86_INTERNODE_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
+CONFIG_X86_HAVE_PAE=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_CMOV=y
+CONFIG_X86_MINIMUM_CPU_FAMILY=64
+CONFIG_X86_DEBUGCTLMSR=y
+CONFIG_IA32_FEAT_CTL=y
+CONFIG_X86_VMX_FEATURE_NAMES=y
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_HYGON=y
+CONFIG_CPU_SUP_CENTAUR=y
+CONFIG_CPU_SUP_ZHAOXIN=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_DMI=y
+# CONFIG_GART_IOMMU is not set
+CONFIG_BOOT_VESA_SUPPORT=y
+# CONFIG_MAXSMP is not set
+CONFIG_NR_CPUS_RANGE_BEGIN=2
+CONFIG_NR_CPUS_RANGE_END=512
+CONFIG_NR_CPUS_DEFAULT=64
+CONFIG_NR_CPUS=320
+CONFIG_SCHED_CLUSTER=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+CONFIG_SCHED_MC_PRIO=y
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_ACPI_MADT_WAKEUP=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCELOG_LEGACY is not set
+CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
+CONFIG_X86_MCE_INJECT=m
+
+#
+# Performance monitoring
+#
+CONFIG_PERF_EVENTS_INTEL_UNCORE=m
+CONFIG_PERF_EVENTS_INTEL_RAPL=m
+CONFIG_PERF_EVENTS_INTEL_CSTATE=m
+CONFIG_PERF_EVENTS_AMD_POWER=m
+CONFIG_PERF_EVENTS_AMD_UNCORE=m
+CONFIG_PERF_EVENTS_AMD_BRS=y
+# end of Performance monitoring
+
+CONFIG_X86_16BIT=y
+CONFIG_X86_ESPFIX64=y
+CONFIG_X86_VSYSCALL_EMULATION=y
+CONFIG_X86_IOPL_IOPERM=y
+CONFIG_MICROCODE=y
+# CONFIG_MICROCODE_LATE_LOADING is not set
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_5LEVEL=y
+CONFIG_X86_DIRECT_GBPAGES=y
+CONFIG_X86_CPA_STATISTICS=y
+CONFIG_X86_MEM_ENCRYPT=y
+CONFIG_AMD_MEM_ENCRYPT=y
+CONFIG_NUMA=y
+CONFIG_AMD_NUMA=y
+CONFIG_X86_64_ACPI_NUMA=y
+# CONFIG_NUMA_EMU is not set
+CONFIG_NODES_SHIFT=5
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+# CONFIG_ARCH_MEMORY_PROBE is not set
+CONFIG_ARCH_PROC_KCORE_TEXT=y
+CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
+CONFIG_X86_PMEM_LEGACY_DEVICE=y
+CONFIG_X86_PMEM_LEGACY=m
+CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
+CONFIG_MTRR=y
+CONFIG_MTRR_SANITIZER=y
+CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
+CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0
+CONFIG_X86_PAT=y
+CONFIG_ARCH_USES_PG_UNCACHED=y
+CONFIG_X86_UMIP=y
+CONFIG_CC_HAS_IBT=y
+CONFIG_X86_CET=y
+CONFIG_X86_KERNEL_IBT=y
+CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
+# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
+# CONFIG_X86_INTEL_TSX_MODE_ON is not set
+CONFIG_X86_INTEL_TSX_MODE_AUTO=y
+CONFIG_X86_SGX=y
+CONFIG_X86_USER_SHADOW_STACK=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_EFI_HANDOVER_PROTOCOL=y
+CONFIG_EFI_MIXED=y
+CONFIG_EFI_RUNTIME_MAP=y
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_250 is not set
+CONFIG_HZ_300=y
+# CONFIG_HZ_500 is not set
+# CONFIG_HZ_600 is not set
+# CONFIG_HZ_750 is not set
+# CONFIG_HZ_625 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=300
+CONFIG_MIN_BASE_SLICE_NS=2000000
+CONFIG_SCHED_HRTICK=y
+CONFIG_ARCH_SUPPORTS_KEXEC=y
+CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y
+CONFIG_ARCH_SELECTS_KEXEC_FILE=y
+CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY=y
+CONFIG_ARCH_SUPPORTS_KEXEC_SIG=y
+CONFIG_ARCH_SUPPORTS_KEXEC_SIG_FORCE=y
+CONFIG_ARCH_SUPPORTS_KEXEC_BZIMAGE_VERIFY_SIG=y
+CONFIG_ARCH_SUPPORTS_KEXEC_JUMP=y
+CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y
+CONFIG_ARCH_SUPPORTS_CRASH_HOTPLUG=y
+CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y
+CONFIG_PHYSICAL_START=0x1000000
+CONFIG_RELOCATABLE=y
+CONFIG_RANDOMIZE_BASE=y
+CONFIG_X86_NEED_RELOCS=y
+CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_DYNAMIC_MEMORY_LAYOUT=y
+CONFIG_RANDOMIZE_MEMORY=y
+CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa
+CONFIG_ADDRESS_MASKING=y
+CONFIG_HOTPLUG_CPU=y
+# CONFIG_COMPAT_VDSO is not set
+CONFIG_LEGACY_VSYSCALL_XONLY=y
+# CONFIG_LEGACY_VSYSCALL_NONE is not set
+# CONFIG_CMDLINE_BOOL is not set
+CONFIG_MODIFY_LDT_SYSCALL=y
+# CONFIG_STRICT_SIGALTSTACK_SIZE is not set
+CONFIG_HAVE_LIVEPATCH=y
+# CONFIG_LIVEPATCH is not set
+# end of Processor type and features
+
+CONFIG_CC_HAS_NAMED_AS=y
+CONFIG_CC_HAS_NAMED_AS_FIXED_SANITIZERS=y
+CONFIG_USE_X86_SEG_SUPPORT=y
+CONFIG_CC_HAS_SLS=y
+CONFIG_CC_HAS_RETURN_THUNK=y
+CONFIG_CC_HAS_ENTRY_PADDING=y
+CONFIG_FUNCTION_PADDING_CFI=11
+CONFIG_FUNCTION_PADDING_BYTES=16
+CONFIG_CALL_PADDING=y
+CONFIG_HAVE_CALL_THUNKS=y
+CONFIG_CALL_THUNKS=y
+CONFIG_PREFIX_SYMBOLS=y
+CONFIG_CPU_MITIGATIONS=y
+CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y
+CONFIG_MITIGATION_RETPOLINE=y
+CONFIG_MITIGATION_RETHUNK=y
+CONFIG_MITIGATION_UNRET_ENTRY=y
+CONFIG_MITIGATION_CALL_DEPTH_TRACKING=y
+# CONFIG_CALL_THUNKS_DEBUG is not set
+CONFIG_MITIGATION_IBPB_ENTRY=y
+CONFIG_MITIGATION_IBRS_ENTRY=y
+CONFIG_MITIGATION_SRSO=y
+CONFIG_MITIGATION_SLS=y
+# CONFIG_MITIGATION_GDS_FORCE is not set
+CONFIG_MITIGATION_RFDS=y
+CONFIG_MITIGATION_SPECTRE_BHI=y
+CONFIG_ARCH_HAS_ADD_PAGES=y
+
+#
+# Power management and ACPI options
+#
+CONFIG_ARCH_HIBERNATION_HEADER=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+CONFIG_HIBERNATE_CALLBACKS=y
+CONFIG_HIBERNATION=y
+CONFIG_HIBERNATION_SNAPSHOT_DEV=y
+CONFIG_HIBERNATION_COMP_LZO=y
+# CONFIG_HIBERNATION_COMP_LZ4 is not set
+CONFIG_HIBERNATION_DEF_COMP="lzo"
+CONFIG_PM_STD_PARTITION=""
+CONFIG_PM_SLEEP=y
+CONFIG_PM_SLEEP_SMP=y
+# CONFIG_PM_AUTOSLEEP is not set
+# CONFIG_PM_USERSPACE_AUTOSLEEP is not set
+# CONFIG_PM_WAKELOCKS is not set
+CONFIG_PM=y
+CONFIG_PM_DEBUG=y
+# CONFIG_PM_ADVANCED_DEBUG is not set
+# CONFIG_PM_TEST_SUSPEND is not set
+CONFIG_PM_SLEEP_DEBUG=y
+CONFIG_PM_TRACE=y
+CONFIG_PM_TRACE_RTC=y
+CONFIG_PM_CLK=y
+CONFIG_PM_GENERIC_DOMAINS=y
+CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
+CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
+CONFIG_ENERGY_MODEL=y
+CONFIG_ARCH_SUPPORTS_ACPI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
+CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
+CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
+CONFIG_ACPI_TABLE_LIB=y
+CONFIG_ACPI_THERMAL_LIB=y
+# CONFIG_ACPI_DEBUGGER is not set
+CONFIG_ACPI_SPCR_TABLE=y
+CONFIG_ACPI_FPDT=y
+CONFIG_ACPI_LPIT=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
+CONFIG_ACPI_EC_DEBUGFS=m
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_TAD=m
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_CPU_FREQ_PSS=y
+CONFIG_ACPI_PROCESSOR_CSTATE=y
+CONFIG_ACPI_PROCESSOR_IDLE=y
+CONFIG_ACPI_CPPC_LIB=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_IPMI=m
+CONFIG_ACPI_HOTPLUG_CPU=y
+CONFIG_ACPI_PROCESSOR_AGGREGATOR=m
+CONFIG_ACPI_THERMAL=y
+CONFIG_ACPI_PLATFORM_PROFILE=m
+CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
+CONFIG_ACPI_TABLE_UPGRADE=y
+CONFIG_ACPI_DEBUG=y
+CONFIG_ACPI_PCI_SLOT=y
+CONFIG_ACPI_CONTAINER=y
+CONFIG_ACPI_HOTPLUG_MEMORY=y
+CONFIG_ACPI_HOTPLUG_IOAPIC=y
+CONFIG_ACPI_SBS=m
+CONFIG_ACPI_HED=y
+CONFIG_ACPI_BGRT=y
+CONFIG_ACPI_NHLT=y
+CONFIG_ACPI_NFIT=m
+# CONFIG_NFIT_SECURITY_DEBUG is not set
+CONFIG_ACPI_NUMA=y
+CONFIG_ACPI_HMAT=y
+CONFIG_HAVE_ACPI_APEI=y
+CONFIG_HAVE_ACPI_APEI_NMI=y
+CONFIG_ACPI_APEI=y
+CONFIG_ACPI_APEI_GHES=y
+CONFIG_ACPI_APEI_PCIEAER=y
+CONFIG_ACPI_APEI_MEMORY_FAILURE=y
+CONFIG_ACPI_APEI_EINJ=m
+CONFIG_ACPI_APEI_EINJ_CXL=y
+CONFIG_ACPI_APEI_ERST_DEBUG=m
+CONFIG_ACPI_DPTF=y
+CONFIG_DPTF_POWER=m
+CONFIG_DPTF_PCH_FIVR=m
+CONFIG_ACPI_WATCHDOG=y
+CONFIG_ACPI_EXTLOG=m
+CONFIG_ACPI_ADXL=y
+CONFIG_ACPI_CONFIGFS=m
+CONFIG_ACPI_PFRUT=m
+CONFIG_ACPI_PCC=y
+CONFIG_ACPI_FFH=y
+CONFIG_PMIC_OPREGION=y
+CONFIG_BYTCRC_PMIC_OPREGION=y
+CONFIG_CHTCRC_PMIC_OPREGION=y
+CONFIG_XPOWER_PMIC_OPREGION=y
+CONFIG_BXT_WC_PMIC_OPREGION=y
+CONFIG_CHT_WC_PMIC_OPREGION=y
+CONFIG_CHT_DC_TI_PMIC_OPREGION=y
+CONFIG_TPS68470_PMIC_OPREGION=y
+CONFIG_ACPI_VIOT=y
+CONFIG_ACPI_PRMT=y
+CONFIG_X86_PM_TIMER=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_ATTR_SET=y
+CONFIG_CPU_FREQ_GOV_COMMON=y
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+
+#
+# CPU frequency scaling drivers
+#
+CONFIG_X86_INTEL_PSTATE=y
+CONFIG_X86_PCC_CPUFREQ=m
+CONFIG_X86_AMD_PSTATE=y
+CONFIG_X86_AMD_PSTATE_DEFAULT_MODE=3
+CONFIG_X86_AMD_PSTATE_UT=m
+CONFIG_X86_ACPI_CPUFREQ=m
+CONFIG_X86_ACPI_CPUFREQ_CPB=y
+CONFIG_X86_POWERNOW_K8=m
+CONFIG_X86_AMD_FREQ_SENSITIVITY=m
+# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
+CONFIG_X86_P4_CLOCKMOD=m
+
+#
+# shared options
+#
+CONFIG_X86_SPEEDSTEP_LIB=m
+# end of CPU Frequency scaling
+
+#
+# CPU Idle
+#
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+CONFIG_CPU_IDLE_GOV_TEO=y
+CONFIG_CPU_IDLE_GOV_HALTPOLL=y
+CONFIG_HALTPOLL_CPUIDLE=m
+# end of CPU Idle
+
+CONFIG_INTEL_IDLE=y
+# end of Power management and ACPI options
+
+#
+# Bus options (PCI etc.)
+#
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCI_XEN=y
+CONFIG_MMCONF_FAM10H=y
+CONFIG_ISA_DMA_API=y
+CONFIG_AMD_NB=y
+# end of Bus options (PCI etc.)
+
+#
+# Binary Emulations
+#
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
+CONFIG_X86_X32_ABI=y
+CONFIG_COMPAT_32=y
+CONFIG_COMPAT=y
+CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
+# end of Binary Emulations
+
+CONFIG_KVM_COMMON=y
+CONFIG_HAVE_KVM_PFNCACHE=y
+CONFIG_HAVE_KVM_IRQCHIP=y
+CONFIG_HAVE_KVM_IRQ_ROUTING=y
+CONFIG_HAVE_KVM_DIRTY_RING=y
+CONFIG_HAVE_KVM_DIRTY_RING_TSO=y
+CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL=y
+CONFIG_KVM_MMIO=y
+CONFIG_KVM_ASYNC_PF=y
+CONFIG_HAVE_KVM_MSI=y
+CONFIG_HAVE_KVM_READONLY_MEM=y
+CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
+CONFIG_KVM_VFIO=y
+CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
+CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY=y
+CONFIG_KVM_COMPAT=y
+CONFIG_HAVE_KVM_IRQ_BYPASS=y
+CONFIG_HAVE_KVM_NO_POLL=y
+CONFIG_KVM_XFER_TO_GUEST_WORK=y
+CONFIG_HAVE_KVM_PM_NOTIFIER=y
+CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y
+CONFIG_KVM_GENERIC_MMU_NOTIFIER=y
+CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES=y
+CONFIG_KVM_PRIVATE_MEM=y
+CONFIG_KVM_GENERIC_PRIVATE_MEM=y
+CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE=y
+CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
+CONFIG_KVM_INTEL=m
+CONFIG_X86_SGX_KVM=y
+CONFIG_KVM_AMD=m
+CONFIG_KVM_AMD_SEV=y
+CONFIG_KVM_SMM=y
+CONFIG_KVM_HYPERV=y
+CONFIG_KVM_XEN=y
+CONFIG_KVM_EXTERNAL_WRITE_TRACKING=y
+CONFIG_KVM_MAX_NR_VCPUS=1024
+CONFIG_AS_AVX512=y
+CONFIG_AS_SHA1_NI=y
+CONFIG_AS_SHA256_NI=y
+CONFIG_AS_TPAUSE=y
+CONFIG_AS_GFNI=y
+CONFIG_AS_VAES=y
+CONFIG_AS_VPCLMULQDQ=y
+CONFIG_AS_WRUSS=y
+CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y
+
+#
+# General architecture-dependent options
+#
+CONFIG_HOTPLUG_SMT=y
+CONFIG_HOTPLUG_CORE_SYNC=y
+CONFIG_HOTPLUG_CORE_SYNC_DEAD=y
+CONFIG_HOTPLUG_CORE_SYNC_FULL=y
+CONFIG_HOTPLUG_SPLIT_STARTUP=y
+CONFIG_HOTPLUG_PARALLEL=y
+CONFIG_GENERIC_ENTRY=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+# CONFIG_STATIC_KEYS_SELFTEST is not set
+# CONFIG_STATIC_CALL_SELFTEST is not set
+CONFIG_OPTPROBES=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_UPROBES=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_ARCH_USE_BUILTIN_BSWAP=y
+CONFIG_KRETPROBES=y
+CONFIG_KRETPROBE_ON_RETHOOK=y
+CONFIG_USER_RETURN_NOTIFIER=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_OPTPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y
+CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
+CONFIG_HAVE_NMI=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_CONTIGUOUS=y
+CONFIG_GENERIC_SMP_IDLE_THREAD=y
+CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
+CONFIG_ARCH_HAS_SET_MEMORY=y
+CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
+CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y
+CONFIG_ARCH_HAS_CPU_PASID=y
+CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
+CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
+CONFIG_ARCH_WANTS_NO_INSTR=y
+CONFIG_HAVE_ASM_MODVERSIONS=y
+CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
+CONFIG_HAVE_RSEQ=y
+CONFIG_HAVE_RUST=y
+CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
+CONFIG_HAVE_HW_BREAKPOINT=y
+CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
+CONFIG_HAVE_USER_RETURN_NOTIFIER=y
+CONFIG_HAVE_PERF_EVENTS_NMI=y
+CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
+CONFIG_HAVE_PERF_REGS=y
+CONFIG_HAVE_PERF_USER_STACK_DUMP=y
+CONFIG_HAVE_ARCH_JUMP_LABEL=y
+CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
+CONFIG_MMU_GATHER_TABLE_FREE=y
+CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
+CONFIG_MMU_GATHER_MERGE_VMAS=y
+CONFIG_MMU_LAZY_TLB_REFCOUNT=y
+CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
+CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y
+CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
+CONFIG_HAVE_CMPXCHG_LOCAL=y
+CONFIG_HAVE_CMPXCHG_DOUBLE=y
+CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
+CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
+CONFIG_HAVE_ARCH_SECCOMP=y
+CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
+CONFIG_SECCOMP=y
+CONFIG_SECCOMP_FILTER=y
+# CONFIG_SECCOMP_CACHE_DEBUG is not set
+CONFIG_HAVE_ARCH_STACKLEAK=y
+CONFIG_HAVE_STACKPROTECTOR=y
+CONFIG_STACKPROTECTOR=y
+CONFIG_STACKPROTECTOR_STRONG=y
+CONFIG_LTO=y
+CONFIG_LTO_CLANG=y
+CONFIG_ARCH_SUPPORTS_LTO_CLANG=y
+CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y
+CONFIG_HAS_LTO_CLANG=y
+CONFIG_LTO_NONE=y
+# CONFIG_LTO_CLANG_FULL is not set
+# CONFIG_LTO_CLANG_THIN is not set
+CONFIG_ARCH_SUPPORTS_CFI_CLANG=y
+CONFIG_ARCH_USES_CFI_TRAPS=y
+# CONFIG_CFI_CLANG is not set
+# CONFIG_CFI_PERMISSIVE is not set
+CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
+CONFIG_HAVE_CONTEXT_TRACKING_USER=y
+CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK=y
+CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
+CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
+CONFIG_HAVE_MOVE_PUD=y
+CONFIG_HAVE_MOVE_PMD=y
+CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
+CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
+CONFIG_HAVE_ARCH_HUGE_VMAP=y
+CONFIG_HAVE_ARCH_HUGE_VMALLOC=y
+CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
+CONFIG_ARCH_WANT_PMD_MKWRITE=y
+CONFIG_HAVE_ARCH_SOFT_DIRTY=y
+CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
+CONFIG_MODULES_USE_ELF_RELA=y
+CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
+CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y
+CONFIG_SOFTIRQ_ON_OWN_STACK=y
+CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
+CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
+CONFIG_HAVE_EXIT_THREAD=y
+CONFIG_ARCH_MMAP_RND_BITS=32
+CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
+CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
+CONFIG_HAVE_PAGE_SIZE_4KB=y
+CONFIG_PAGE_SIZE_4KB=y
+CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
+CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
+CONFIG_PAGE_SHIFT=12
+CONFIG_HAVE_OBJTOOL=y
+CONFIG_HAVE_JUMP_LABEL_HACK=y
+CONFIG_HAVE_NOINSTR_HACK=y
+CONFIG_HAVE_NOINSTR_VALIDATION=y
+CONFIG_HAVE_UACCESS_VALIDATION=y
+CONFIG_HAVE_STACK_VALIDATION=y
+CONFIG_HAVE_RELIABLE_STACKTRACE=y
+CONFIG_ISA_BUS_API=y
+CONFIG_OLD_SIGSUSPEND3=y
+CONFIG_COMPAT_OLD_SIGACTION=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_HAVE_ARCH_VMAP_STACK=y
+CONFIG_VMAP_STACK=y
+CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y
+CONFIG_RANDOMIZE_KSTACK_OFFSET=y
+CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
+CONFIG_STRICT_MODULE_RWX=y
+CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
+CONFIG_ARCH_USE_MEMREMAP_PROT=y
+CONFIG_LOCK_EVENT_COUNTS=y
+CONFIG_ARCH_HAS_MEM_ENCRYPT=y
+CONFIG_ARCH_HAS_CC_PLATFORM=y
+CONFIG_HAVE_STATIC_CALL=y
+CONFIG_HAVE_STATIC_CALL_INLINE=y
+CONFIG_HAVE_PREEMPT_DYNAMIC=y
+CONFIG_HAVE_PREEMPT_DYNAMIC_CALL=y
+CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
+CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y
+CONFIG_ARCH_HAS_ELFCORE_COMPAT=y
+CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y
+CONFIG_DYNAMIC_SIGFRAME=y
+CONFIG_HAVE_ARCH_NODE_DEV_GROUP=y
+CONFIG_ARCH_HAS_HW_PTE_YOUNG=y
+CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
+CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT=y
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_GCOV_KERNEL is not set
+CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
+# end of GCOV-based kernel profiling
+
+CONFIG_HAVE_GCC_PLUGINS=y
+CONFIG_GCC_PLUGINS=y
+# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
+CONFIG_FUNCTION_ALIGNMENT_4B=y
+CONFIG_FUNCTION_ALIGNMENT_16B=y
+CONFIG_FUNCTION_ALIGNMENT=16
+CONFIG_CC_HAS_MIN_FUNCTION_ALIGNMENT=y
+CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT=y
+# end of General architecture-dependent options
+
+CONFIG_RT_MUTEXES=y
+CONFIG_MODULE_SIG_FORMAT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_DEBUGFS=y
+# CONFIG_MODULE_DEBUG is not set
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
+# CONFIG_MODVERSIONS is not set
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG=y
+# CONFIG_MODULE_SIG_FORCE is not set
+CONFIG_MODULE_SIG_ALL=y
+# CONFIG_MODULE_SIG_SHA1 is not set
+# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA384 is not set
+CONFIG_MODULE_SIG_SHA512=y
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
+CONFIG_MODULE_SIG_HASH="sha512"
+# CONFIG_MODULE_COMPRESS_NONE is not set
+# CONFIG_MODULE_COMPRESS_GZIP is not set
+# CONFIG_MODULE_COMPRESS_XZ is not set
+CONFIG_MODULE_COMPRESS_ZSTD=y
+CONFIG_MODULE_DECOMPRESS=y
+CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y
+CONFIG_MODPROBE_PATH="/sbin/modprobe"
+# CONFIG_TRIM_UNUSED_KSYMS is not set
+CONFIG_MODULES_TREE_LOOKUP=y
+CONFIG_BLOCK=y
+CONFIG_BLOCK_LEGACY_AUTOLOAD=y
+CONFIG_BLK_RQ_ALLOC_TIME=y
+CONFIG_BLK_CGROUP_RWSTAT=y
+CONFIG_BLK_CGROUP_PUNT_BIO=y
+CONFIG_BLK_DEV_BSG_COMMON=y
+CONFIG_BLK_ICQ=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_WRITE_MOUNTED=y
+CONFIG_BLK_DEV_ZONED=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_MQ=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_FC_APPID=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
+CONFIG_BLK_DEBUG_FS=y
+CONFIG_BLK_SED_OPAL=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_AIX_PARTITION=y
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+# CONFIG_UNIXWARE_DISKLABEL is not set
+CONFIG_LDM_PARTITION=y
+# CONFIG_LDM_DEBUG is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+# CONFIG_SYSV68_PARTITION is not set
+# CONFIG_CMDLINE_PARTITION is not set
+# end of Partition Types
+
+CONFIG_BLK_MQ_PCI=y
+CONFIG_BLK_MQ_VIRTIO=y
+CONFIG_BLK_PM=y
+CONFIG_BLOCK_HOLDER_DEPRECATED=y
+CONFIG_BLK_MQ_STACKING=y
+
+#
+# IO Schedulers
+#
+CONFIG_MQ_IOSCHED_DEADLINE=y
+CONFIG_MQ_IOSCHED_KYBER=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BFQ_GROUP_IOSCHED=y
+# CONFIG_BFQ_CGROUP_DEBUG is not set
+# end of IO Schedulers
+
+CONFIG_PREEMPT_NOTIFIERS=y
+CONFIG_PADATA=y
+CONFIG_ASN1=y
+CONFIG_UNINLINE_SPIN_UNLOCK=y
+CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
+CONFIG_MUTEX_SPIN_ON_OWNER=y
+CONFIG_RWSEM_SPIN_ON_OWNER=y
+CONFIG_LOCK_SPIN_ON_OWNER=y
+CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
+CONFIG_QUEUED_SPINLOCKS=y
+CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
+CONFIG_QUEUED_RWLOCKS=y
+CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
+CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
+CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
+CONFIG_FREEZER=y
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_COMPAT_BINFMT_ELF=y
+CONFIG_ELFCORE=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_COREDUMP=y
+# end of Executable file formats
+
+#
+# Memory Management options
+#
+CONFIG_ZPOOL=y
+CONFIG_SWAP=y
+CONFIG_ZSWAP=y
+CONFIG_ZSWAP_DEFAULT_ON=y
+CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
+CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
+CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd"
+# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set
+# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc"
+CONFIG_ZBUD=y
+# CONFIG_Z3FOLD_DEPRECATED is not set
+CONFIG_HAVE_ZSMALLOC=y
+CONFIG_ZSMALLOC=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_ZSMALLOC_CHAIN_SIZE=8
+
+#
+# Slab allocator options
+#
+CONFIG_SLUB=y
+CONFIG_SLAB_MERGE_DEFAULT=y
+CONFIG_SLAB_FREELIST_RANDOM=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_SLAB_BUCKETS=y
+# CONFIG_SLUB_STATS is not set
+CONFIG_SLUB_CPU_PARTIAL=y
+# CONFIG_RANDOM_KMALLOC_CACHES is not set
+# end of Slab allocator options
+
+CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SPARSEMEM=y
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
+CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
+CONFIG_HAVE_GUP_FAST=y
+CONFIG_NUMA_KEEP_MEMINFO=y
+CONFIG_MEMORY_ISOLATION=y
+CONFIG_EXCLUSIVE_SYSTEM_RAM=y
+CONFIG_HAVE_BOOTMEM_INFO_NODE=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_MHP_MEMMAP_ON_MEMORY=y
+CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
+CONFIG_MEMORY_BALLOON=y
+CONFIG_BALLOON_COMPACTION=y
+CONFIG_COMPACTION=y
+CONFIG_COMPACT_UNEVICTABLE_DEFAULT=0
+CONFIG_PAGE_REPORTING=y
+CONFIG_MIGRATION=y
+CONFIG_DEVICE_MIGRATION=y
+CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
+CONFIG_ARCH_ENABLE_THP_MIGRATION=y
+CONFIG_CONTIG_ALLOC=y
+CONFIG_PCP_BATCH_SCALE_MAX=5
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU_NOTIFIER=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
+CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
+CONFIG_ARCH_WANTS_THP_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
+# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
+# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set
+CONFIG_THP_SWAP=y
+CONFIG_READ_ONLY_THP_FOR_FS=y
+CONFIG_PGTABLE_HAS_HUGE_LEAVES=y
+CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
+CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
+CONFIG_USE_PERCPU_NUMA_NODE_ID=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_CMA=y
+CONFIG_CMA_DEBUGFS=y
+CONFIG_CMA_SYSFS=y
+CONFIG_CMA_AREAS=7
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_GENERIC_EARLY_IOREMAP=y
+# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
+CONFIG_PAGE_IDLE_FLAG=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y
+CONFIG_ARCH_HAS_PTE_DEVMAP=y
+CONFIG_ZONE_DMA=y
+CONFIG_ZONE_DMA32=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_HMM_MIRROR=y
+CONFIG_GET_FREE_REGION=y
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_VMAP_PFN=y
+CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
+CONFIG_ARCH_HAS_PKEYS=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_PERCPU_STATS is not set
+# CONFIG_GUP_TEST is not set
+# CONFIG_DMAPOOL_TEST is not set
+CONFIG_ARCH_HAS_PTE_SPECIAL=y
+CONFIG_MAPPING_DIRTY_HELPERS=y
+CONFIG_MEMFD_CREATE=y
+CONFIG_SECRETMEM=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
+CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y
+CONFIG_USERFAULTFD=y
+CONFIG_PTE_MARKER_UFFD_WP=y
+CONFIG_LRU_GEN=y
+CONFIG_LRU_GEN_ENABLED=y
+# CONFIG_LRU_GEN_STATS is not set
+CONFIG_LRU_GEN_WALKS_MMU=y
+CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
+CONFIG_PER_VMA_LOCK=y
+CONFIG_LOCK_MM_AND_FIND_VMA=y
+CONFIG_IOMMU_MM_DATA=y
+CONFIG_EXECMEM=y
+
+#
+# Data Access Monitoring
+#
+CONFIG_DAMON=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DAMON_SYSFS=y
+# CONFIG_DAMON_DBGFS_DEPRECATED is not set
+CONFIG_DAMON_RECLAIM=y
+CONFIG_DAMON_LRU_SORT=y
+# end of Data Access Monitoring
+# end of Memory Management options
+
+CONFIG_NET=y
+CONFIG_COMPAT_NETLINK_MESSAGES=y
+CONFIG_NET_INGRESS=y
+CONFIG_NET_EGRESS=y
+CONFIG_NET_XGRESS=y
+CONFIG_NET_REDIRECT=y
+CONFIG_SKB_DECRYPTED=y
+CONFIG_SKB_EXTENSIONS=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
+# CONFIG_TLS_TOE is not set
+CONFIG_XFRM=y
+CONFIG_XFRM_OFFLOAD=y
+CONFIG_XFRM_ALGO=m
+CONFIG_XFRM_USER=m
+# CONFIG_XFRM_USER_COMPAT is not set
+CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_MIGRATE=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_XFRM_AH=m
+CONFIG_XFRM_ESP=m
+CONFIG_XFRM_IPCOMP=m
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_XFRM_ESPINTCP=y
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
+CONFIG_NET_HANDSHAKE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_FIB_TRIE_STATS=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_ROUTE_CLASSID=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IP_TUNNEL=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE_COMMON=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_NET_UDP_TUNNEL=m
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
+CONFIG_INET_ESPINTCP=y
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_TABLE_PERTURB_ORDER=16
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_TCP_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_INET_RAW_DIAG=m
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=m
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_NV=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_CONG_DCTCP=m
+CONFIG_TCP_CONG_CDG=m
+CONFIG_TCP_CONG_BBR=y
+CONFIG_DEFAULT_BBR=y
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="bbr"
+CONFIG_TCP_SIGPOOL=y
+CONFIG_TCP_AO=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
+CONFIG_INET6_ESPINTCP=y
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_ILA=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SEG6_HMAC=y
+CONFIG_IPV6_SEG6_BPF=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_NETLABEL=y
+CONFIG_MPTCP=y
+CONFIG_INET_MPTCP_DIAG=m
+CONFIG_MPTCP_IPV6=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NET_PTP_CLASSIFY=y
+CONFIG_NETWORK_PHY_TIMESTAMPING=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_BRIDGE_NETFILTER=m
+
+#
+# Core Netfilter Configuration
+#
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_EGRESS=y
+CONFIG_NETFILTER_SKIP_EGRESS=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_FAMILY_BRIDGE=y
+CONFIG_NETFILTER_FAMILY_ARP=y
+CONFIG_NETFILTER_BPF_LINK=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
+CONFIG_NETFILTER_NETLINK_ACCT=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NETFILTER_NETLINK_OSF=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_LOG_SYSLOG=m
+CONFIG_NETFILTER_CONNCOUNT=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_OVS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_GRE=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_BROADCAST=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_AMANDA=m
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_SIP=m
+CONFIG_NF_NAT_TFTP=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_OVS=y
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REJECT=m
+CONFIG_NFT_REJECT_INET=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NF_DUP_NETDEV=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
+
+#
+# Xtables combined modules
+#
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_CONNMARK=m
+CONFIG_NETFILTER_XT_SET=m
+
+#
+# Xtables targets
+#
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LED=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+
+#
+# Xtables matches
+#
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ECN=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_HL=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_L2TP=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+# end of Core Netfilter Configuration
+
+CONFIG_IP_SET=m
+CONFIG_IP_SET_MAX=256
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPMARK=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_IPMAC=m
+CONFIG_IP_SET_HASH_MAC=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=15
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_AH_ESP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_FO=m
+CONFIG_IP_VS_OVF=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_MH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
+
+#
+# IPVS SH scheduler
+#
+CONFIG_IP_VS_SH_TAB_BITS=8
+
+#
+# IPVS MH scheduler
+#
+CONFIG_IP_VS_MH_TAB_INDEX=12
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_NFCT=y
+CONFIG_IP_VS_PE_SIP=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_NF_DEFRAG_IPV4=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_NF_SOCKET_IPV4=m
+CONFIG_NF_TPROXY_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NF_DUP_IPV4=m
+CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_REJECT_IPV4=m
+CONFIG_NF_NAT_SNMP_BASIC=m
+CONFIG_NF_NAT_PPTP=m
+CONFIG_NF_NAT_H323=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_NFT_COMPAT_ARP=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+# end of IP: Netfilter Configuration
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_NF_SOCKET_IPV6=m
+CONFIG_NF_TPROXY_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_REJECT_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_DUP_IPV6=m
+CONFIG_NF_REJECT_IPV6=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_SRH=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+# end of IPv6: Netfilter Configuration
+
+CONFIG_NF_DEFRAG_IPV6=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_BRIDGE_REJECT=m
+CONFIG_NF_CONNTRACK_BRIDGE=m
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+# CONFIG_IP_DCCP is not set
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
+CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
+# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
+CONFIG_SCTP_COOKIE_HMAC_MD5=y
+CONFIG_SCTP_COOKIE_HMAC_SHA1=y
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+# CONFIG_RDS_DEBUG is not set
+CONFIG_TIPC=m
+CONFIG_TIPC_MEDIA_IB=y
+CONFIG_TIPC_MEDIA_UDP=y
+CONFIG_TIPC_CRYPTO=y
+CONFIG_TIPC_DIAG=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_STP=m
+CONFIG_GARP=m
+CONFIG_MRP=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_MRP=y
+CONFIG_BRIDGE_CFM=y
+CONFIG_NET_DSA=m
+CONFIG_NET_DSA_TAG_NONE=m
+CONFIG_NET_DSA_TAG_AR9331=m
+CONFIG_NET_DSA_TAG_BRCM_COMMON=m
+CONFIG_NET_DSA_TAG_BRCM=m
+CONFIG_NET_DSA_TAG_BRCM_LEGACY=m
+CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
+CONFIG_NET_DSA_TAG_HELLCREEK=m
+CONFIG_NET_DSA_TAG_GSWIP=m
+CONFIG_NET_DSA_TAG_DSA_COMMON=m
+CONFIG_NET_DSA_TAG_DSA=m
+CONFIG_NET_DSA_TAG_EDSA=m
+CONFIG_NET_DSA_TAG_MTK=m
+CONFIG_NET_DSA_TAG_KSZ=m
+CONFIG_NET_DSA_TAG_OCELOT=m
+CONFIG_NET_DSA_TAG_OCELOT_8021Q=m
+CONFIG_NET_DSA_TAG_QCA=m
+CONFIG_NET_DSA_TAG_RTL4_A=m
+CONFIG_NET_DSA_TAG_RTL8_4=m
+CONFIG_NET_DSA_TAG_RZN1_A5PSW=m
+CONFIG_NET_DSA_TAG_LAN9303=m
+CONFIG_NET_DSA_TAG_SJA1105=m
+CONFIG_NET_DSA_TAG_TRAILER=m
+CONFIG_NET_DSA_TAG_VSC73XX_8021Q=m
+CONFIG_NET_DSA_TAG_XRS700X=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_LLC=m
+CONFIG_LLC2=m
+CONFIG_ATALK=m
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_PHONET=m
+CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_DEBUGFS=y
+CONFIG_6LOWPAN_NHC=m
+CONFIG_6LOWPAN_NHC_DEST=m
+CONFIG_6LOWPAN_NHC_FRAGMENT=m
+CONFIG_6LOWPAN_NHC_HOP=m
+CONFIG_6LOWPAN_NHC_IPV6=m
+CONFIG_6LOWPAN_NHC_MOBILITY=m
+CONFIG_6LOWPAN_NHC_ROUTING=m
+CONFIG_6LOWPAN_NHC_UDP=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
+CONFIG_IEEE802154=m
+CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
+CONFIG_IEEE802154_SOCKET=m
+CONFIG_IEEE802154_6LOWPAN=m
+CONFIG_MAC802154=m
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_CBS=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_MQPRIO_LIB=m
+CONFIG_NET_SCH_TAPRIO=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_SKBPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_SCH_CAKE=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_PIE=m
+CONFIG_NET_SCH_FQ_PIE=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DEFAULT=y
+# CONFIG_DEFAULT_FQ is not set
+# CONFIG_DEFAULT_CODEL is not set
+CONFIG_DEFAULT_FQ_CODEL=y
+# CONFIG_DEFAULT_FQ_PIE is not set
+# CONFIG_DEFAULT_SFQ is not set
+# CONFIG_DEFAULT_PFIFO_FAST is not set
+CONFIG_DEFAULT_NET_SCH="fq_codel"
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_CANID=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GATE=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=m
+CONFIG_BATMAN_ADV=m
+CONFIG_BATMAN_ADV_BATMAN_V=y
+CONFIG_BATMAN_ADV_BLA=y
+CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_BATMAN_ADV_MCAST=y
+# CONFIG_BATMAN_ADV_DEBUG is not set
+# CONFIG_BATMAN_ADV_TRACING is not set
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_GRE=m
+CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_OPENVSWITCH_GENEVE=m
+CONFIG_VSOCKETS=m
+CONFIG_VSOCKETS_DIAG=m
+CONFIG_VSOCKETS_LOOPBACK=m
+CONFIG_VMWARE_VMCI_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS_COMMON=m
+CONFIG_HYPERV_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_NSH=m
+CONFIG_HSR=m
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_QRTR=m
+CONFIG_QRTR_SMD=m
+CONFIG_QRTR_TUN=m
+CONFIG_QRTR_MHI=m
+CONFIG_NET_NCSI=y
+CONFIG_NCSI_OEM_CMD_GET_MAC=y
+CONFIG_NCSI_OEM_CMD_KEEP_PHY=y
+CONFIG_PCPU_DEV_REFCNT=y
+CONFIG_MAX_SKB_FRAGS=17
+CONFIG_RPS=y
+CONFIG_RFS_ACCEL=y
+CONFIG_SOCK_RX_QUEUE_MAPPING=y
+CONFIG_XPS=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_NET_RX_BUSY_POLL=y
+CONFIG_BQL=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_NET_FLOW_LIMIT=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_DROP_MONITOR=y
+# end of Network testing
+# end of Networking options
+
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_MKISS=m
+CONFIG_6PACK=m
+CONFIG_BPQETHER=m
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_YAM=m
+# end of AX.25 network device drivers
+
+CONFIG_CAN=m
+CONFIG_CAN_RAW=m
+CONFIG_CAN_BCM=m
+CONFIG_CAN_GW=m
+CONFIG_CAN_J1939=m
+CONFIG_CAN_ISOTP=m
+CONFIG_BT=m
+CONFIG_BT_BREDR=y
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HIDP=m
+CONFIG_BT_LE=y
+CONFIG_BT_LE_L2CAP_ECRED=y
+CONFIG_BT_6LOWPAN=m
+CONFIG_BT_LEDS=y
+CONFIG_BT_MSFTEXT=y
+CONFIG_BT_AOSPEXT=y
+CONFIG_BT_DEBUGFS=y
+# CONFIG_BT_SELFTEST is not set
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_INTEL=m
+CONFIG_BT_BCM=m
+CONFIG_BT_RTL=m
+CONFIG_BT_QCA=m
+CONFIG_BT_MTK=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
+CONFIG_BT_HCIBTUSB_POLL_SYNC=y
+CONFIG_BT_HCIBTUSB_BCM=y
+CONFIG_BT_HCIBTUSB_MTK=y
+CONFIG_BT_HCIBTUSB_RTL=y
+CONFIG_BT_HCIBTSDIO=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_SERDEV=y
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_NOKIA=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_ATH3K=y
+CONFIG_BT_HCIUART_LL=y
+CONFIG_BT_HCIUART_3WIRE=y
+CONFIG_BT_HCIUART_INTEL=y
+CONFIG_BT_HCIUART_BCM=y
+CONFIG_BT_HCIUART_RTL=y
+CONFIG_BT_HCIUART_QCA=y
+CONFIG_BT_HCIUART_AG6XX=y
+CONFIG_BT_HCIUART_MRVL=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBCM4377=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_MRVL=m
+CONFIG_BT_MRVL_SDIO=m
+CONFIG_BT_ATH3K=m
+CONFIG_BT_MTKSDIO=m
+CONFIG_BT_MTKUART=m
+CONFIG_BT_HCIRSI=m
+CONFIG_BT_VIRTIO=m
+CONFIG_BT_NXPUART=m
+CONFIG_BT_INTEL_PCIE=m
+# end of Bluetooth device drivers
+
+CONFIG_AF_RXRPC=m
+CONFIG_AF_RXRPC_IPV6=y
+# CONFIG_AF_RXRPC_INJECT_LOSS is not set
+# CONFIG_AF_RXRPC_INJECT_RX_DELAY is not set
+CONFIG_AF_RXRPC_DEBUG=y
+CONFIG_RXKAD=y
+# CONFIG_RXPERF is not set
+CONFIG_AF_KCM=m
+CONFIG_STREAM_PARSER=y
+CONFIG_MCTP=y
+CONFIG_MCTP_FLOWS=y
+CONFIG_FIB_RULES=y
+CONFIG_WIRELESS=y
+CONFIG_WIRELESS_EXT=y
+CONFIG_WEXT_CORE=y
+CONFIG_WEXT_PROC=y
+CONFIG_WEXT_SPY=y
+CONFIG_WEXT_PRIV=y
+CONFIG_CFG80211=m
+# CONFIG_NL80211_TESTMODE is not set
+# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
+CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
+CONFIG_CFG80211_DEFAULT_PS=y
+CONFIG_CFG80211_DEBUGFS=y
+CONFIG_CFG80211_CRDA_SUPPORT=y
+CONFIG_CFG80211_WEXT=y
+CONFIG_CFG80211_WEXT_EXPORT=y
+CONFIG_LIB80211=m
+CONFIG_LIB80211_CRYPT_WEP=m
+CONFIG_LIB80211_CRYPT_CCMP=m
+CONFIG_LIB80211_CRYPT_TKIP=m
+# CONFIG_LIB80211_DEBUG is not set
+CONFIG_MAC80211=m
+CONFIG_MAC80211_HAS_RC=y
+CONFIG_MAC80211_RC_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
+CONFIG_MAC80211_MESH=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_DEBUGFS=y
+# CONFIG_MAC80211_MESSAGE_TRACING is not set
+# CONFIG_MAC80211_DEBUG_MENU is not set
+CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
+CONFIG_RFKILL=m
+CONFIG_RFKILL_LEDS=y
+CONFIG_RFKILL_INPUT=y
+CONFIG_RFKILL_GPIO=m
+CONFIG_NET_9P=m
+CONFIG_NET_9P_FD=m
+CONFIG_NET_9P_VIRTIO=m
+CONFIG_NET_9P_XEN=m
+CONFIG_NET_9P_RDMA=m
+# CONFIG_NET_9P_DEBUG is not set
+# CONFIG_CAIF is not set
+CONFIG_CEPH_LIB=m
+CONFIG_CEPH_LIB_PRETTYDEBUG=y
+CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
+CONFIG_NFC=m
+CONFIG_NFC_DIGITAL=m
+CONFIG_NFC_NCI=m
+CONFIG_NFC_NCI_SPI=m
+CONFIG_NFC_NCI_UART=m
+CONFIG_NFC_HCI=m
+CONFIG_NFC_SHDLC=y
+
+#
+# Near Field Communication (NFC) devices
+#
+CONFIG_NFC_TRF7970A=m
+CONFIG_NFC_MEI_PHY=m
+CONFIG_NFC_SIM=m
+CONFIG_NFC_PORT100=m
+CONFIG_NFC_VIRTUAL_NCI=m
+CONFIG_NFC_FDP=m
+CONFIG_NFC_FDP_I2C=m
+CONFIG_NFC_PN544=m
+CONFIG_NFC_PN544_I2C=m
+CONFIG_NFC_PN544_MEI=m
+CONFIG_NFC_PN533=m
+CONFIG_NFC_PN533_USB=m
+CONFIG_NFC_PN533_I2C=m
+CONFIG_NFC_PN532_UART=m
+CONFIG_NFC_MICROREAD=m
+CONFIG_NFC_MICROREAD_I2C=m
+CONFIG_NFC_MICROREAD_MEI=m
+CONFIG_NFC_MRVL=m
+CONFIG_NFC_MRVL_USB=m
+CONFIG_NFC_MRVL_UART=m
+CONFIG_NFC_MRVL_I2C=m
+CONFIG_NFC_MRVL_SPI=m
+CONFIG_NFC_ST21NFCA=m
+CONFIG_NFC_ST21NFCA_I2C=m
+CONFIG_NFC_ST_NCI=m
+CONFIG_NFC_ST_NCI_I2C=m
+CONFIG_NFC_ST_NCI_SPI=m
+CONFIG_NFC_NXP_NCI=m
+CONFIG_NFC_NXP_NCI_I2C=m
+CONFIG_NFC_S3FWRN5=m
+CONFIG_NFC_S3FWRN5_I2C=m
+CONFIG_NFC_S3FWRN82_UART=m
+CONFIG_NFC_ST95HF=m
+# end of Near Field Communication (NFC) devices
+
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
+CONFIG_LWTUNNEL=y
+CONFIG_LWTUNNEL_BPF=y
+CONFIG_DST_CACHE=y
+CONFIG_GRO_CELLS=y
+CONFIG_SOCK_VALIDATE_XMIT=y
+CONFIG_NET_IEEE8021Q_HELPERS=y
+CONFIG_NET_SELFTESTS=m
+CONFIG_NET_SOCK_MSG=y
+CONFIG_NET_DEVLINK=y
+CONFIG_PAGE_POOL=y
+CONFIG_PAGE_POOL_STATS=y
+CONFIG_FAILOVER=m
+CONFIG_ETHTOOL_NETLINK=y
+
+#
+# Device Drivers
+#
+CONFIG_HAVE_EISA=y
+# CONFIG_EISA is not set
+CONFIG_HAVE_PCI=y
+CONFIG_GENERIC_PCI_IOMAP=y
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_PCIEAER=y
+CONFIG_PCIEAER_INJECT=m
+CONFIG_PCIEAER_CXL=y
+CONFIG_PCIE_ECRC=y
+CONFIG_PCIEASPM=y
+CONFIG_PCIEASPM_DEFAULT=y
+# CONFIG_PCIEASPM_POWERSAVE is not set
+# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
+# CONFIG_PCIEASPM_PERFORMANCE is not set
+CONFIG_PCIE_PME=y
+CONFIG_PCIE_DPC=y
+CONFIG_PCIE_PTM=y
+CONFIG_PCIE_EDR=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_QUIRKS=y
+# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
+CONFIG_PCI_STUB=y
+CONFIG_PCI_PF_STUB=m
+CONFIG_XEN_PCIDEV_FRONTEND=m
+CONFIG_PCI_ATS=y
+CONFIG_PCI_DOE=y
+CONFIG_PCI_LOCKLESS_CONFIG=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_PRI=y
+CONFIG_PCI_PASID=y
+CONFIG_PCI_P2PDMA=y
+CONFIG_PCI_LABEL=y
+CONFIG_PCI_HYPERV=m
+CONFIG_VGA_ARB=y
+CONFIG_VGA_ARB_MAX_GPUS=10
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_ACPI=y
+CONFIG_HOTPLUG_PCI_ACPI_IBM=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_SHPC=y
+
+#
+# PCI controller drivers
+#
+CONFIG_VMD=m
+CONFIG_PCI_HYPERV_INTERFACE=m
+
+#
+# Cadence-based PCIe controllers
+#
+# end of Cadence-based PCIe controllers
+
+#
+# DesignWare-based PCIe controllers
+#
+CONFIG_PCIE_DW=y
+CONFIG_PCIE_DW_HOST=y
+CONFIG_PCI_MESON=m
+CONFIG_PCIE_DW_PLAT=y
+CONFIG_PCIE_DW_PLAT_HOST=y
+# end of DesignWare-based PCIe controllers
+
+#
+# Mobiveil-based PCIe controllers
+#
+# end of Mobiveil-based PCIe controllers
+
+#
+# PLDA-based PCIe controllers
+#
+# end of PLDA-based PCIe controllers
+# end of PCI controller drivers
+
+#
+# PCI Endpoint
+#
+# CONFIG_PCI_ENDPOINT is not set
+# end of PCI Endpoint
+
+#
+# PCI switch controller drivers
+#
+CONFIG_PCI_SW_SWITCHTEC=m
+# end of PCI switch controller drivers
+
+CONFIG_CXL_BUS=m
+CONFIG_CXL_PCI=m
+# CONFIG_CXL_MEM_RAW_COMMANDS is not set
+CONFIG_CXL_ACPI=m
+CONFIG_CXL_PMEM=m
+CONFIG_CXL_MEM=m
+CONFIG_CXL_PORT=m
+CONFIG_CXL_SUSPEND=y
+CONFIG_CXL_REGION=y
+# CONFIG_CXL_REGION_INVALIDATION_TEST is not set
+CONFIG_PCCARD=m
+CONFIG_PCMCIA=m
+CONFIG_PCMCIA_LOAD_CIS=y
+CONFIG_CARDBUS=y
+
+#
+# PC-card bridges
+#
+CONFIG_YENTA=m
+CONFIG_YENTA_O2=y
+CONFIG_YENTA_RICOH=y
+CONFIG_YENTA_TI=y
+CONFIG_YENTA_ENE_TUNE=y
+CONFIG_YENTA_TOSHIBA=y
+CONFIG_PD6729=m
+CONFIG_I82092=m
+CONFIG_PCCARD_NONSTATIC=y
+# CONFIG_RAPIDIO is not set
+
+#
+# Generic Driver Options
+#
+CONFIG_AUXILIARY_BUS=y
+# CONFIG_UEVENT_HELPER is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS_SAFE=y
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+
+#
+# Firmware loader
+#
+CONFIG_FW_LOADER=y
+CONFIG_FW_LOADER_DEBUG=y
+CONFIG_FW_LOADER_PAGED_BUF=y
+CONFIG_FW_LOADER_SYSFS=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_FW_LOADER_USER_HELPER is not set
+CONFIG_FW_LOADER_COMPRESS=y
+CONFIG_FW_LOADER_COMPRESS_XZ=y
+CONFIG_FW_LOADER_COMPRESS_ZSTD=y
+CONFIG_FW_CACHE=y
+CONFIG_FW_UPLOAD=y
+# end of Firmware loader
+
+CONFIG_WANT_DEV_COREDUMP=y
+CONFIG_ALLOW_DEV_COREDUMP=y
+CONFIG_DEV_COREDUMP=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
+CONFIG_HMEM_REPORTING=y
+# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
+CONFIG_SYS_HYPERVISOR=y
+CONFIG_GENERIC_CPU_DEVICES=y
+CONFIG_GENERIC_CPU_AUTOPROBE=y
+CONFIG_GENERIC_CPU_VULNERABILITIES=y
+CONFIG_SOC_BUS=y
+CONFIG_REGMAP=y
+CONFIG_REGMAP_I2C=y
+CONFIG_REGMAP_SLIMBUS=m
+CONFIG_REGMAP_SPI=y
+CONFIG_REGMAP_W1=m
+CONFIG_REGMAP_MMIO=y
+CONFIG_REGMAP_IRQ=y
+CONFIG_REGMAP_SOUNDWIRE=m
+CONFIG_REGMAP_SOUNDWIRE_MBQ=m
+CONFIG_REGMAP_SCCB=m
+CONFIG_REGMAP_SPI_AVMM=m
+CONFIG_DMA_SHARED_BUFFER=y
+# CONFIG_DMA_FENCE_TRACE is not set
+# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set
+# end of Generic Driver Options
+
+#
+# Bus devices
+#
+CONFIG_MHI_BUS=m
+# CONFIG_MHI_BUS_DEBUG is not set
+CONFIG_MHI_BUS_PCI_GENERIC=m
+CONFIG_MHI_BUS_EP=m
+# end of Bus devices
+
+#
+# Cache Drivers
+#
+# end of Cache Drivers
+
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+
+#
+# Firmware Drivers
+#
+
+#
+# ARM System Control and Management Interface Protocol
+#
+# end of ARM System Control and Management Interface Protocol
+
+CONFIG_EDD=m
+# CONFIG_EDD_OFF is not set
+CONFIG_FIRMWARE_MEMMAP=y
+CONFIG_DMIID=y
+CONFIG_DMI_SYSFS=y
+CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_ISCSI_IBFT=m
+CONFIG_FW_CFG_SYSFS=m
+# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
+CONFIG_SYSFB=y
+# CONFIG_SYSFB_SIMPLEFB is not set
+CONFIG_FW_CS_DSP=m
+CONFIG_GOOGLE_FIRMWARE=y
+# CONFIG_GOOGLE_SMI is not set
+CONFIG_GOOGLE_CBMEM=m
+CONFIG_GOOGLE_COREBOOT_TABLE=m
+CONFIG_GOOGLE_MEMCONSOLE=m
+# CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY is not set
+CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
+CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=m
+CONFIG_GOOGLE_VPD=m
+
+#
+# EFI (Extensible Firmware Interface) Support
+#
+CONFIG_EFI_ESRT=y
+CONFIG_EFI_VARS_PSTORE=y
+CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE=y
+CONFIG_EFI_SOFT_RESERVE=y
+CONFIG_EFI_DXE_MEM_ATTRIBUTES=y
+CONFIG_EFI_RUNTIME_WRAPPERS=y
+CONFIG_EFI_BOOTLOADER_CONTROL=m
+CONFIG_EFI_CAPSULE_LOADER=m
+# CONFIG_EFI_TEST is not set
+CONFIG_EFI_DEV_PATH_PARSER=y
+CONFIG_APPLE_PROPERTIES=y
+# CONFIG_RESET_ATTACK_MITIGATION is not set
+CONFIG_EFI_RCI2_TABLE=y
+# CONFIG_EFI_DISABLE_PCI_DMA is not set
+CONFIG_EFI_EARLYCON=y
+CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
+# CONFIG_EFI_DISABLE_RUNTIME is not set
+CONFIG_EFI_COCO_SECRET=y
+CONFIG_UNACCEPTED_MEMORY=y
+CONFIG_EFI_EMBEDDED_FIRMWARE=y
+# end of EFI (Extensible Firmware Interface) Support
+
+CONFIG_UEFI_CPER=y
+CONFIG_UEFI_CPER_X86=y
+
+#
+# Qualcomm firmware drivers
+#
+# end of Qualcomm firmware drivers
+
+#
+# Tegra firmware driver
+#
+# end of Tegra firmware driver
+# end of Firmware Drivers
+
+CONFIG_GNSS=m
+CONFIG_GNSS_SERIAL=m
+CONFIG_GNSS_MTK_SERIAL=m
+CONFIG_GNSS_SIRF_SERIAL=m
+CONFIG_GNSS_UBX_SERIAL=m
+CONFIG_GNSS_USB=m
+CONFIG_MTD=m
+# CONFIG_MTD_TESTS is not set
+
+#
+# Partition parsers
+#
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# end of Partition parsers
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_BLKDEVS=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+
+#
+# Note that in some cases UBI block is preferred. See MTD_UBI_BLOCK.
+#
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_SM_FTL is not set
+# CONFIG_MTD_OOPS is not set
+CONFIG_MTD_PSTORE=m
+# CONFIG_MTD_SWAP is not set
+CONFIG_MTD_PARTITIONED_MASTER=y
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_RAM is not set
+CONFIG_MTD_ROM=m
+# CONFIG_MTD_ABSENT is not set
+# end of RAM/ROM/Flash chip drivers
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PLATRAM is not set
+# end of Mapping drivers for chip access
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_MCHP23K256 is not set
+# CONFIG_MTD_MCHP48L640 is not set
+# CONFIG_MTD_SST25L is not set
+# CONFIG_MTD_SLRAM is not set
+CONFIG_MTD_PHRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLOCK2MTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOCG3 is not set
+# end of Self-contained MTD device drivers
+
+#
+# NAND
+#
+CONFIG_MTD_NAND_CORE=m
+# CONFIG_MTD_ONENAND is not set
+CONFIG_MTD_RAW_NAND=m
+
+#
+# Raw/parallel NAND flash controllers
+#
+# CONFIG_MTD_NAND_DENALI_PCI is not set
+# CONFIG_MTD_NAND_CAFE is not set
+# CONFIG_MTD_NAND_MXIC is not set
+# CONFIG_MTD_NAND_GPIO is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_NAND_ARASAN is not set
+
+#
+# Misc
+#
+CONFIG_MTD_NAND_NANDSIM=m
+# CONFIG_MTD_NAND_RICOH is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_SPI_NAND is not set
+
+#
+# ECC engine support
+#
+CONFIG_MTD_NAND_ECC=y
+CONFIG_MTD_NAND_ECC_SW_HAMMING=y
+CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
+CONFIG_MTD_NAND_ECC_SW_BCH=y
+CONFIG_MTD_NAND_ECC_MXIC=y
+# end of ECC engine support
+# end of NAND
+
+#
+# LPDDR & LPDDR2 PCM memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+# end of LPDDR & LPDDR2 PCM memory drivers
+
+CONFIG_MTD_SPI_NOR=m
+CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
+# CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set
+CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y
+# CONFIG_MTD_SPI_NOR_SWP_KEEP is not set
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTD_UBI_BEB_LIMIT=20
+# CONFIG_MTD_UBI_FASTMAP is not set
+# CONFIG_MTD_UBI_GLUEBI is not set
+# CONFIG_MTD_UBI_BLOCK is not set
+CONFIG_MTD_UBI_NVMEM=m
+# CONFIG_MTD_HYPERBUS is not set
+# CONFIG_OF is not set
+CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_SUPERIO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+CONFIG_PARPORT_1284=y
+CONFIG_PARPORT_NOT_PC=y
+CONFIG_PNP=y
+CONFIG_PNP_DEBUG_MESSAGES=y
+
+#
+# Protocols
+#
+CONFIG_PNPACPI=y
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_NULL_BLK=m
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_FD_RAWCMD is not set
+CONFIG_CDROM=m
+CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
+CONFIG_ZRAM=m
+# CONFIG_ZRAM_DEF_COMP_LZORLE is not set
+CONFIG_ZRAM_DEF_COMP_ZSTD=y
+# CONFIG_ZRAM_DEF_COMP_LZ4 is not set
+# CONFIG_ZRAM_DEF_COMP_LZO is not set
+# CONFIG_ZRAM_DEF_COMP_LZ4HC is not set
+# CONFIG_ZRAM_DEF_COMP_842 is not set
+CONFIG_ZRAM_DEF_COMP="zstd"
+CONFIG_ZRAM_WRITEBACK=y
+CONFIG_ZRAM_TRACK_ENTRY_ACTIME=y
+CONFIG_ZRAM_MEMORY_TRACKING=y
+CONFIG_ZRAM_MULTI_COMP=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_LOOP_MIN_COUNT=0
+CONFIG_BLK_DEV_DRBD=m
+# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_CDROM_PKTCDVD_BUFFERS=8
+# CONFIG_CDROM_PKTCDVD_WCACHE is not set
+CONFIG_ATA_OVER_ETH=m
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_BLKDEV_BACKEND=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_UBLK=m
+CONFIG_BLKDEV_UBLK_LEGACY_OPCODES=y
+CONFIG_BLK_DEV_RNBD=y
+CONFIG_BLK_DEV_RNBD_CLIENT=m
+CONFIG_BLK_DEV_RNBD_SERVER=m
+
+#
+# NVME Support
+#
+CONFIG_NVME_KEYRING=m
+CONFIG_NVME_AUTH=m
+CONFIG_NVME_CORE=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_NVME_MULTIPATH=y
+CONFIG_NVME_VERBOSE_ERRORS=y
+CONFIG_NVME_HWMON=y
+CONFIG_NVME_FABRICS=m
+CONFIG_NVME_RDMA=m
+CONFIG_NVME_FC=m
+CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
+CONFIG_NVME_HOST_AUTH=y
+CONFIG_NVME_TARGET=m
+CONFIG_NVME_TARGET_DEBUGFS=y
+CONFIG_NVME_TARGET_PASSTHRU=y
+CONFIG_NVME_TARGET_LOOP=m
+CONFIG_NVME_TARGET_RDMA=m
+CONFIG_NVME_TARGET_FC=m
+CONFIG_NVME_TARGET_FCLOOP=m
+CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
+CONFIG_NVME_TARGET_AUTH=y
+# end of NVME Support
+
+#
+# Misc devices
+#
+CONFIG_SENSORS_LIS3LV02D=m
+CONFIG_AD525X_DPOT=m
+CONFIG_AD525X_DPOT_I2C=m
+CONFIG_AD525X_DPOT_SPI=m
+# CONFIG_DUMMY_IRQ is not set
+CONFIG_IBM_ASM=m
+CONFIG_PHANTOM=m
+CONFIG_TIFM_CORE=m
+CONFIG_TIFM_7XX1=m
+CONFIG_ICS932S401=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_SMPRO_ERRMON=m
+CONFIG_SMPRO_MISC=m
+CONFIG_HP_ILO=m
+CONFIG_APDS9802ALS=m
+CONFIG_ISL29003=m
+CONFIG_ISL29020=m
+CONFIG_SENSORS_TSL2550=m
+CONFIG_SENSORS_BH1770=m
+CONFIG_SENSORS_APDS990X=m
+CONFIG_HMC6352=m
+CONFIG_DS1682=m
+CONFIG_VMWARE_BALLOON=m
+CONFIG_LATTICE_ECP3_CONFIG=m
+# CONFIG_SRAM is not set
+CONFIG_DW_XDATA_PCIE=m
+CONFIG_PCI_ENDPOINT_TEST=m
+CONFIG_XILINX_SDFEC=m
+CONFIG_MISC_RTSX=m
+CONFIG_NTSYNC=y
+CONFIG_TPS6594_ESM=m
+CONFIG_TPS6594_PFSM=m
+CONFIG_NSM=m
+CONFIG_C2PORT=m
+CONFIG_C2PORT_DURAMAR_2150=m
+
+#
+# EEPROM support
+#
+CONFIG_EEPROM_AT24=m
+# CONFIG_EEPROM_AT25 is not set
+CONFIG_EEPROM_MAX6875=m
+CONFIG_EEPROM_93CX6=m
+# CONFIG_EEPROM_93XX46 is not set
+CONFIG_EEPROM_IDT_89HPESX=m
+CONFIG_EEPROM_EE1004=m
+# end of EEPROM support
+
+CONFIG_CB710_CORE=m
+# CONFIG_CB710_DEBUG is not set
+CONFIG_CB710_DEBUG_ASSUMPTIONS=y
+
+#
+# Texas Instruments shared transport line discipline
+#
+CONFIG_TI_ST=m
+# end of Texas Instruments shared transport line discipline
+
+CONFIG_SENSORS_LIS3_I2C=m
+CONFIG_ALTERA_STAPL=m
+CONFIG_INTEL_MEI=m
+CONFIG_INTEL_MEI_ME=m
+CONFIG_INTEL_MEI_TXE=m
+CONFIG_INTEL_MEI_GSC=m
+CONFIG_INTEL_MEI_VSC_HW=m
+CONFIG_INTEL_MEI_VSC=m
+CONFIG_INTEL_MEI_HDCP=m
+CONFIG_INTEL_MEI_PXP=m
+CONFIG_INTEL_MEI_GSC_PROXY=m
+CONFIG_VMWARE_VMCI=m
+CONFIG_GENWQE=m
+CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0
+CONFIG_ECHO=m
+CONFIG_BCM_VK=m
+CONFIG_BCM_VK_TTY=y
+CONFIG_MISC_ALCOR_PCI=m
+CONFIG_MISC_RTSX_PCI=m
+CONFIG_MISC_RTSX_USB=m
+CONFIG_UACCE=m
+CONFIG_PVPANIC=y
+CONFIG_PVPANIC_MMIO=m
+CONFIG_PVPANIC_PCI=m
+CONFIG_GP_PCI1XXXX=m
+CONFIG_KEBA_CP500=m
+# end of Misc devices
+
+#
+# SCSI device support
+#
+CONFIG_SCSI_MOD=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI_COMMON=y
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+CONFIG_SCSI_NETLINK=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_BLK_DEV_BSG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+
+#
+# SCSI Transports
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_SAS_HOST_SMP=y
+CONFIG_SCSI_SRP_ATTRS=m
+# end of SCSI Transports
+
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_ISCSI_TCP=m
+CONFIG_ISCSI_BOOT_SYSFS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2X_FCOE=m
+CONFIG_BE2ISCSI=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_HPSA=m
+CONFIG_SCSI_3W_9XXX=m
+CONFIG_SCSI_3W_SAS=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC94XX=m
+CONFIG_AIC94XX_DEBUG=y
+CONFIG_SCSI_MVSAS=m
+CONFIG_SCSI_MVSAS_DEBUG=y
+CONFIG_SCSI_MVSAS_TASKLET=y
+CONFIG_SCSI_MVUMI=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_ARCMSR=m
+CONFIG_SCSI_ESAS2R=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT3SAS=m
+CONFIG_SCSI_MPT2SAS_MAX_SGE=128
+CONFIG_SCSI_MPT3SAS_MAX_SGE=128
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_MPI3MR=m
+CONFIG_SCSI_SMARTPQI=m
+CONFIG_SCSI_HPTIOP=m
+CONFIG_SCSI_BUSLOGIC=m
+CONFIG_SCSI_FLASHPOINT=y
+CONFIG_SCSI_MYRB=m
+CONFIG_SCSI_MYRS=m
+CONFIG_VMWARE_PVSCSI=m
+CONFIG_XEN_SCSI_FRONTEND=m
+CONFIG_HYPERV_STORAGE=m
+CONFIG_LIBFC=m
+CONFIG_LIBFCOE=m
+CONFIG_FCOE=m
+CONFIG_FCOE_FNIC=m
+CONFIG_SCSI_SNIC=m
+# CONFIG_SCSI_SNIC_DEBUG_FS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_FDOMAIN=m
+CONFIG_SCSI_FDOMAIN_PCI=m
+CONFIG_SCSI_ISCI=m
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_STEX=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+CONFIG_SCSI_SYM53C8XX_MMIO=y
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA_FC=m
+CONFIG_TCM_QLA2XXX=m
+# CONFIG_TCM_QLA2XXX_DEBUG is not set
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_QEDI=m
+CONFIG_QEDF=m
+CONFIG_SCSI_LPFC=m
+# CONFIG_SCSI_LPFC_DEBUG_FS is not set
+CONFIG_SCSI_EFCT=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_AM53C974=m
+CONFIG_SCSI_WD719X=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_PMCRAID=m
+CONFIG_SCSI_PM8001=m
+CONFIG_SCSI_BFA_FC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_CHELSIO_FCOE=m
+CONFIG_SCSI_LOWLEVEL_PCMCIA=y
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_QLOGIC=m
+CONFIG_PCMCIA_SYM53C500=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+# end of SCSI device support
+
+CONFIG_ATA=y
+CONFIG_SATA_HOST=y
+CONFIG_PATA_TIMINGS=y
+CONFIG_ATA_VERBOSE_ERROR=y
+CONFIG_ATA_FORCE=y
+CONFIG_ATA_ACPI=y
+CONFIG_SATA_ZPODD=y
+CONFIG_SATA_PMP=y
+
+#
+# Controllers with non-SFF native interface
+#
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_MOBILE_LPM_POLICY=3
+CONFIG_SATA_AHCI_PLATFORM=m
+CONFIG_AHCI_DWC=m
+CONFIG_SATA_INIC162X=m
+CONFIG_SATA_ACARD_AHCI=m
+CONFIG_SATA_SIL24=m
+CONFIG_ATA_SFF=y
+
+#
+# SFF controllers with custom DMA interface
+#
+CONFIG_PDC_ADMA=m
+CONFIG_SATA_QSTOR=m
+CONFIG_SATA_SX4=m
+CONFIG_ATA_BMDMA=y
+
+#
+# SATA SFF controllers with BMDMA
+#
+CONFIG_ATA_PIIX=m
+CONFIG_SATA_DWC=m
+# CONFIG_SATA_DWC_OLD_DMA is not set
+CONFIG_SATA_MV=m
+CONFIG_SATA_NV=m
+CONFIG_SATA_PROMISE=m
+CONFIG_SATA_SIL=m
+CONFIG_SATA_SIS=m
+CONFIG_SATA_SVW=m
+CONFIG_SATA_ULI=m
+CONFIG_SATA_VIA=m
+CONFIG_SATA_VITESSE=m
+
+#
+# PATA SFF controllers with BMDMA
+#
+CONFIG_PATA_ALI=m
+CONFIG_PATA_AMD=m
+CONFIG_PATA_ARTOP=m
+CONFIG_PATA_ATIIXP=m
+CONFIG_PATA_ATP867X=m
+CONFIG_PATA_CMD64X=m
+CONFIG_PATA_CYPRESS=m
+CONFIG_PATA_EFAR=m
+CONFIG_PATA_HPT366=m
+CONFIG_PATA_HPT37X=m
+CONFIG_PATA_HPT3X2N=m
+CONFIG_PATA_HPT3X3=m
+CONFIG_PATA_HPT3X3_DMA=y
+CONFIG_PATA_IT8213=m
+CONFIG_PATA_IT821X=m
+CONFIG_PATA_JMICRON=m
+CONFIG_PATA_MARVELL=m
+CONFIG_PATA_NETCELL=m
+CONFIG_PATA_NINJA32=m
+CONFIG_PATA_NS87415=m
+CONFIG_PATA_OLDPIIX=m
+CONFIG_PATA_OPTIDMA=m
+CONFIG_PATA_PDC2027X=m
+CONFIG_PATA_PDC_OLD=m
+CONFIG_PATA_RADISYS=m
+CONFIG_PATA_RDC=m
+CONFIG_PATA_SCH=m
+CONFIG_PATA_SERVERWORKS=m
+CONFIG_PATA_SIL680=m
+CONFIG_PATA_SIS=m
+CONFIG_PATA_TOSHIBA=m
+CONFIG_PATA_TRIFLEX=m
+CONFIG_PATA_VIA=m
+CONFIG_PATA_WINBOND=m
+
+#
+# PIO-only SFF controllers
+#
+CONFIG_PATA_CMD640_PCI=m
+CONFIG_PATA_MPIIX=m
+CONFIG_PATA_NS87410=m
+CONFIG_PATA_OPTI=m
+CONFIG_PATA_PCMCIA=m
+CONFIG_PATA_RZ1000=m
+CONFIG_PATA_PARPORT=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PATA_PARPORT_ATEN=m
+CONFIG_PATA_PARPORT_BPCK=m
+CONFIG_PATA_PARPORT_BPCK6=m
+CONFIG_PATA_PARPORT_COMM=m
+CONFIG_PATA_PARPORT_DSTR=m
+CONFIG_PATA_PARPORT_FIT2=m
+CONFIG_PATA_PARPORT_FIT3=m
+CONFIG_PATA_PARPORT_EPAT=m
+CONFIG_PATA_PARPORT_EPATC8=y
+CONFIG_PATA_PARPORT_EPIA=m
+CONFIG_PATA_PARPORT_FRIQ=m
+CONFIG_PATA_PARPORT_FRPW=m
+CONFIG_PATA_PARPORT_KBIC=m
+CONFIG_PATA_PARPORT_KTTI=m
+CONFIG_PATA_PARPORT_ON20=m
+CONFIG_PATA_PARPORT_ON26=m
+
+#
+# Generic fallback / legacy drivers
+#
+CONFIG_PATA_ACPI=m
+CONFIG_ATA_GENERIC=m
+CONFIG_PATA_LEGACY=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_BITMAP_FILE=y
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_CLUSTER=m
+CONFIG_BCACHE=m
+# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHE_ASYNC_REGISTRATION=y
+CONFIG_BLK_DEV_DM_BUILTIN=y
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_DEBUG=y
+CONFIG_DM_BUFIO=m
+CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING=y
+# CONFIG_DM_DEBUG_BLOCK_STACK_TRACING is not set
+CONFIG_DM_BIO_PRISON=m
+CONFIG_DM_PERSISTENT_DATA=m
+CONFIG_DM_UNSTRIPED=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_CACHE=m
+CONFIG_DM_CACHE_SMQ=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_EBS=m
+CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_DUST=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_SWITCH=m
+CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
+CONFIG_DM_ZONED=m
+CONFIG_DM_AUDIT=y
+CONFIG_DM_VDO=m
+CONFIG_TARGET_CORE=m
+CONFIG_TCM_IBLOCK=m
+CONFIG_TCM_FILEIO=m
+CONFIG_TCM_PSCSI=m
+CONFIG_TCM_USER2=m
+CONFIG_LOOPBACK_TARGET=m
+CONFIG_TCM_FC=m
+CONFIG_ISCSI_TARGET=m
+CONFIG_ISCSI_TARGET_CXGB4=m
+CONFIG_SBP_TARGET=m
+CONFIG_REMOTE_TARGET=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+# CONFIG_FUSION_LOGGING is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+CONFIG_FIREWIRE_SBP2=m
+CONFIG_FIREWIRE_NET=m
+CONFIG_FIREWIRE_NOSY=m
+# end of IEEE 1394 (FireWire) support
+
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_MAC_EMUMOUSEBTN=m
+CONFIG_NETDEVICES=y
+CONFIG_MII=m
+CONFIG_NET_CORE=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_WIREGUARD=m
+# CONFIG_WIREGUARD_DEBUG is not set
+CONFIG_EQUALIZER=m
+CONFIG_NET_FC=y
+CONFIG_IFB=m
+CONFIG_NET_TEAM=m
+CONFIG_NET_TEAM_MODE_BROADCAST=m
+CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
+CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
+CONFIG_NET_TEAM_MODE_LOADBALANCE=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_IPVLAN_L3S=y
+CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
+CONFIG_VXLAN=m
+CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
+CONFIG_GTP=m
+CONFIG_PFCP=m
+CONFIG_AMT=m
+CONFIG_MACSEC=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+# CONFIG_NETCONSOLE_EXTENDED_LOG is not set
+CONFIG_NETPOLL=y
+CONFIG_NET_POLL_CONTROLLER=y
+CONFIG_NTB_NETDEV=m
+CONFIG_TUN=m
+CONFIG_TAP=m
+# CONFIG_TUN_VNET_CROSS_LE is not set
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_NETKIT=y
+CONFIG_NET_VRF=m
+CONFIG_VSOCKMON=m
+CONFIG_MHI_NET=m
+CONFIG_SUNGEM_PHY=m
+# CONFIG_ARCNET is not set
+CONFIG_ATM_DRIVERS=y
+# CONFIG_ATM_DUMMY is not set
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_NICSTAR=m
+# CONFIG_ATM_NICSTAR_USE_SUNI is not set
+# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+# CONFIG_ATM_IDT77252_RCV_ALL is not set
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_FORE200E_USE_TASKLET=y
+CONFIG_ATM_FORE200E_TX_RETRY=16
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+CONFIG_ATM_SOLOS=m
+
+#
+# Distributed Switch Architecture drivers
+#
+CONFIG_B53=m
+CONFIG_B53_SPI_DRIVER=m
+CONFIG_B53_MDIO_DRIVER=m
+CONFIG_B53_MMAP_DRIVER=m
+CONFIG_B53_SRAB_DRIVER=m
+CONFIG_B53_SERDES=m
+CONFIG_NET_DSA_BCM_SF2=m
+CONFIG_NET_DSA_LOOP=m
+CONFIG_NET_DSA_HIRSCHMANN_HELLCREEK=m
+# CONFIG_NET_DSA_LANTIQ_GSWIP is not set
+CONFIG_NET_DSA_MT7530=m
+CONFIG_NET_DSA_MT7530_MDIO=m
+CONFIG_NET_DSA_MT7530_MMIO=m
+CONFIG_NET_DSA_MV88E6060=m
+CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON=m
+CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C=m
+CONFIG_NET_DSA_MICROCHIP_KSZ_SPI=m
+CONFIG_NET_DSA_MICROCHIP_KSZ_PTP=y
+CONFIG_NET_DSA_MICROCHIP_KSZ8863_SMI=m
+CONFIG_NET_DSA_MV88E6XXX=m
+CONFIG_NET_DSA_MV88E6XXX_PTP=y
+CONFIG_NET_DSA_MSCC_FELIX_DSA_LIB=m
+CONFIG_NET_DSA_MSCC_OCELOT_EXT=m
+CONFIG_NET_DSA_MSCC_SEVILLE=m
+CONFIG_NET_DSA_AR9331=m
+CONFIG_NET_DSA_QCA8K=m
+CONFIG_NET_DSA_QCA8K_LEDS_SUPPORT=y
+CONFIG_NET_DSA_SJA1105=m
+CONFIG_NET_DSA_SJA1105_PTP=y
+CONFIG_NET_DSA_SJA1105_TAS=y
+CONFIG_NET_DSA_SJA1105_VL=y
+CONFIG_NET_DSA_XRS700X=m
+CONFIG_NET_DSA_XRS700X_I2C=m
+CONFIG_NET_DSA_XRS700X_MDIO=m
+CONFIG_NET_DSA_REALTEK=m
+CONFIG_NET_DSA_SMSC_LAN9303=m
+CONFIG_NET_DSA_SMSC_LAN9303_I2C=m
+CONFIG_NET_DSA_SMSC_LAN9303_MDIO=m
+CONFIG_NET_DSA_VITESSE_VSC73XX=m
+CONFIG_NET_DSA_VITESSE_VSC73XX_SPI=m
+CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM=m
+# end of Distributed Switch Architecture drivers
+
+CONFIG_ETHERNET=y
+CONFIG_MDIO=m
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_3C589=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_NET_VENDOR_ADAPTEC=y
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_NET_VENDOR_AGERE=y
+CONFIG_ET131X=m
+CONFIG_NET_VENDOR_ALACRITECH=y
+CONFIG_SLICOSS=m
+CONFIG_NET_VENDOR_ALTEON=y
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_ALTERA_TSE=m
+CONFIG_NET_VENDOR_AMAZON=y
+CONFIG_ENA_ETHERNET=m
+CONFIG_NET_VENDOR_AMD=y
+CONFIG_AMD8111_ETH=m
+CONFIG_PCNET32=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_AMD_XGBE=m
+CONFIG_AMD_XGBE_DCB=y
+CONFIG_AMD_XGBE_HAVE_ECC=y
+CONFIG_PDS_CORE=m
+CONFIG_NET_VENDOR_AQUANTIA=y
+CONFIG_AQTION=m
+CONFIG_NET_VENDOR_ARC=y
+CONFIG_NET_VENDOR_ASIX=y
+CONFIG_SPI_AX88796C=m
+CONFIG_SPI_AX88796C_COMPRESSION=y
+CONFIG_NET_VENDOR_ATHEROS=y
+CONFIG_ATL2=m
+CONFIG_ATL1=m
+CONFIG_ATL1E=m
+CONFIG_ATL1C=m
+CONFIG_ALX=m
+CONFIG_CX_ECAT=m
+CONFIG_NET_VENDOR_BROADCOM=y
+CONFIG_B44=m
+CONFIG_B44_PCI_AUTOSELECT=y
+CONFIG_B44_PCICORE_AUTOSELECT=y
+CONFIG_B44_PCI=y
+CONFIG_BCMGENET=m
+CONFIG_BNX2=m
+CONFIG_CNIC=m
+CONFIG_TIGON3=m
+CONFIG_TIGON3_HWMON=y
+CONFIG_BNX2X=m
+CONFIG_BNX2X_SRIOV=y
+CONFIG_SYSTEMPORT=m
+CONFIG_BNXT=m
+CONFIG_BNXT_SRIOV=y
+CONFIG_BNXT_FLOWER_OFFLOAD=y
+CONFIG_BNXT_DCB=y
+CONFIG_BNXT_HWMON=y
+CONFIG_NET_VENDOR_CADENCE=y
+CONFIG_MACB=m
+CONFIG_MACB_USE_HWSTAMP=y
+CONFIG_MACB_PCI=m
+CONFIG_NET_VENDOR_CAVIUM=y
+CONFIG_THUNDER_NIC_PF=m
+CONFIG_THUNDER_NIC_VF=m
+CONFIG_THUNDER_NIC_BGX=m
+CONFIG_THUNDER_NIC_RGX=m
+CONFIG_CAVIUM_PTP=m
+CONFIG_LIQUIDIO_CORE=m
+CONFIG_LIQUIDIO=m
+CONFIG_LIQUIDIO_VF=m
+CONFIG_NET_VENDOR_CHELSIO=y
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
+CONFIG_CHELSIO_T4=m
+CONFIG_CHELSIO_T4_DCB=y
+CONFIG_CHELSIO_T4_FCOE=y
+CONFIG_CHELSIO_T4VF=m
+CONFIG_CHELSIO_LIB=m
+CONFIG_CHELSIO_INLINE_CRYPTO=y
+CONFIG_CHELSIO_IPSEC_INLINE=m
+CONFIG_CHELSIO_TLS_DEVICE=m
+CONFIG_NET_VENDOR_CISCO=y
+CONFIG_ENIC=m
+CONFIG_NET_VENDOR_CORTINA=y
+CONFIG_NET_VENDOR_DAVICOM=y
+CONFIG_DM9051=m
+CONFIG_DNET=m
+CONFIG_NET_VENDOR_DEC=y
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_DE2104X_DSL=0
+CONFIG_TULIP=m
+CONFIG_TULIP_MWI=y
+CONFIG_TULIP_MMIO=y
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_ULI526X=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_NET_VENDOR_DLINK=y
+CONFIG_DL2K=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_NET_VENDOR_EMULEX=y
+CONFIG_BE2NET=m
+CONFIG_BE2NET_HWMON=y
+CONFIG_BE2NET_BE2=y
+CONFIG_BE2NET_BE3=y
+CONFIG_BE2NET_LANCER=y
+CONFIG_BE2NET_SKYHAWK=y
+CONFIG_NET_VENDOR_ENGLEDER=y
+CONFIG_TSNEP=m
+# CONFIG_TSNEP_SELFTESTS is not set
+CONFIG_NET_VENDOR_EZCHIP=y
+CONFIG_NET_VENDOR_FUJITSU=y
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_NET_VENDOR_FUNGIBLE=y
+CONFIG_FUN_CORE=m
+CONFIG_FUN_ETH=m
+CONFIG_NET_VENDOR_GOOGLE=y
+CONFIG_GVE=m
+CONFIG_NET_VENDOR_HUAWEI=y
+CONFIG_HINIC=m
+CONFIG_NET_VENDOR_I825XX=y
+CONFIG_NET_VENDOR_INTEL=y
+CONFIG_LIBETH=m
+CONFIG_LIBIE=m
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_E1000E_HWTS=y
+CONFIG_IGB=m
+CONFIG_IGB_HWMON=y
+CONFIG_IGB_DCA=y
+CONFIG_IGBVF=m
+CONFIG_IXGBE=m
+CONFIG_IXGBE_HWMON=y
+CONFIG_IXGBE_DCA=y
+CONFIG_IXGBE_DCB=y
+# CONFIG_IXGBE_IPSEC is not set
+CONFIG_IXGBEVF=m
+CONFIG_IXGBEVF_IPSEC=y
+CONFIG_I40E=m
+CONFIG_I40E_DCB=y
+CONFIG_IAVF=m
+CONFIG_I40EVF=m
+CONFIG_ICE=m
+CONFIG_ICE_HWMON=y
+CONFIG_ICE_SWITCHDEV=y
+CONFIG_ICE_HWTS=y
+CONFIG_FM10K=m
+CONFIG_IGC=m
+CONFIG_IGC_LEDS=y
+CONFIG_IDPF=m
+# CONFIG_IDPF_SINGLEQ is not set
+CONFIG_JME=m
+CONFIG_NET_VENDOR_ADI=y
+CONFIG_ADIN1110=m
+CONFIG_NET_VENDOR_LITEX=y
+CONFIG_NET_VENDOR_MARVELL=y
+CONFIG_MVMDIO=m
+CONFIG_SKGE=m
+# CONFIG_SKGE_DEBUG is not set
+CONFIG_SKGE_GENESIS=y
+CONFIG_SKY2=m
+# CONFIG_SKY2_DEBUG is not set
+CONFIG_OCTEON_EP=m
+CONFIG_OCTEON_EP_VF=m
+CONFIG_PRESTERA=m
+CONFIG_PRESTERA_PCI=m
+CONFIG_NET_VENDOR_MELLANOX=y
+CONFIG_MLX4_EN=m
+CONFIG_MLX4_EN_DCB=y
+CONFIG_MLX4_CORE=m
+CONFIG_MLX4_DEBUG=y
+CONFIG_MLX4_CORE_GEN2=y
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_FPGA=y
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_EN_ARFS=y
+CONFIG_MLX5_EN_RXNFC=y
+CONFIG_MLX5_MPFS=y
+CONFIG_MLX5_ESWITCH=y
+CONFIG_MLX5_BRIDGE=y
+CONFIG_MLX5_CLS_ACT=y
+CONFIG_MLX5_TC_CT=y
+CONFIG_MLX5_TC_SAMPLE=y
+CONFIG_MLX5_CORE_EN_DCB=y
+CONFIG_MLX5_CORE_IPOIB=y
+CONFIG_MLX5_MACSEC=y
+CONFIG_MLX5_EN_IPSEC=y
+CONFIG_MLX5_EN_TLS=y
+CONFIG_MLX5_SW_STEERING=y
+CONFIG_MLX5_SF=y
+CONFIG_MLX5_SF_MANAGER=y
+CONFIG_MLX5_DPLL=m
+CONFIG_MLXSW_CORE=m
+CONFIG_MLXSW_CORE_HWMON=y
+CONFIG_MLXSW_CORE_THERMAL=y
+CONFIG_MLXSW_PCI=m
+CONFIG_MLXSW_I2C=m
+CONFIG_MLXSW_SPECTRUM=m
+CONFIG_MLXSW_SPECTRUM_DCB=y
+CONFIG_MLXSW_MINIMAL=m
+CONFIG_MLXFW=m
+CONFIG_NET_VENDOR_META=y
+CONFIG_FBNIC=m
+CONFIG_NET_VENDOR_MICREL=y
+CONFIG_KS8842=m
+CONFIG_KS8851=m
+CONFIG_KS8851_MLL=m
+CONFIG_KSZ884X_PCI=m
+CONFIG_NET_VENDOR_MICROCHIP=y
+CONFIG_ENC28J60=m
+# CONFIG_ENC28J60_WRITEVERIFY is not set
+CONFIG_ENCX24J600=m
+CONFIG_LAN743X=m
+CONFIG_VCAP=y
+CONFIG_NET_VENDOR_MICROSEMI=y
+CONFIG_MSCC_OCELOT_SWITCH_LIB=m
+CONFIG_NET_VENDOR_MICROSOFT=y
+CONFIG_MICROSOFT_MANA=m
+CONFIG_NET_VENDOR_MYRI=y
+CONFIG_MYRI10GE=m
+CONFIG_MYRI10GE_DCA=y
+CONFIG_FEALNX=m
+CONFIG_NET_VENDOR_NI=y
+CONFIG_NI_XGE_MANAGEMENT_ENET=m
+CONFIG_NET_VENDOR_NATSEMI=y
+CONFIG_NATSEMI=m
+CONFIG_NS83820=m
+CONFIG_NET_VENDOR_NETERION=y
+CONFIG_S2IO=m
+CONFIG_NET_VENDOR_NETRONOME=y
+CONFIG_NFP=m
+CONFIG_NFP_APP_FLOWER=y
+CONFIG_NFP_APP_ABM_NIC=y
+CONFIG_NFP_NET_IPSEC=y
+# CONFIG_NFP_DEBUG is not set
+CONFIG_NET_VENDOR_8390=y
+CONFIG_PCMCIA_AXNET=m
+CONFIG_NE2K_PCI=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_NET_VENDOR_NVIDIA=y
+CONFIG_FORCEDETH=m
+CONFIG_NET_VENDOR_OKI=y
+CONFIG_ETHOC=m
+CONFIG_NET_VENDOR_PACKET_ENGINES=y
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_NET_VENDOR_PENSANDO=y
+CONFIG_IONIC=m
+CONFIG_NET_VENDOR_QLOGIC=y
+CONFIG_QLA3XXX=m
+CONFIG_QLCNIC=m
+CONFIG_QLCNIC_SRIOV=y
+CONFIG_QLCNIC_DCB=y
+CONFIG_QLCNIC_HWMON=y
+CONFIG_NETXEN_NIC=m
+CONFIG_QED=m
+CONFIG_QED_LL2=y
+CONFIG_QED_SRIOV=y
+CONFIG_QEDE=m
+CONFIG_QED_RDMA=y
+CONFIG_QED_ISCSI=y
+CONFIG_QED_FCOE=y
+CONFIG_QED_OOO=y
+CONFIG_NET_VENDOR_BROCADE=y
+CONFIG_BNA=m
+CONFIG_NET_VENDOR_QUALCOMM=y
+CONFIG_QCOM_EMAC=m
+CONFIG_RMNET=m
+CONFIG_NET_VENDOR_RDC=y
+CONFIG_R6040=m
+CONFIG_NET_VENDOR_REALTEK=y
+CONFIG_ATP=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+CONFIG_8139TOO_TUNE_TWISTER=y
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_R8169=m
+CONFIG_R8169_LEDS=y
+CONFIG_NET_VENDOR_RENESAS=y
+CONFIG_NET_VENDOR_ROCKER=y
+CONFIG_ROCKER=m
+CONFIG_NET_VENDOR_SAMSUNG=y
+CONFIG_SXGBE_ETH=m
+CONFIG_NET_VENDOR_SEEQ=y
+CONFIG_NET_VENDOR_SILAN=y
+CONFIG_SC92031=m
+CONFIG_NET_VENDOR_SIS=y
+CONFIG_SIS900=m
+CONFIG_SIS190=m
+CONFIG_NET_VENDOR_SOLARFLARE=y
+CONFIG_SFC=m
+CONFIG_SFC_MTD=y
+CONFIG_SFC_MCDI_MON=y
+CONFIG_SFC_SRIOV=y
+CONFIG_SFC_MCDI_LOGGING=y
+CONFIG_SFC_FALCON=m
+CONFIG_SFC_FALCON_MTD=y
+CONFIG_SFC_SIENA=m
+CONFIG_SFC_SIENA_MTD=y
+CONFIG_SFC_SIENA_MCDI_MON=y
+CONFIG_SFC_SIENA_SRIOV=y
+CONFIG_SFC_SIENA_MCDI_LOGGING=y
+CONFIG_NET_VENDOR_SMSC=y
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_EPIC100=m
+CONFIG_SMSC911X=m
+CONFIG_SMSC9420=m
+CONFIG_NET_VENDOR_SOCIONEXT=y
+CONFIG_NET_VENDOR_STMICRO=y
+CONFIG_STMMAC_ETH=m
+# CONFIG_STMMAC_SELFTESTS is not set
+CONFIG_STMMAC_PLATFORM=m
+CONFIG_DWMAC_GENERIC=m
+CONFIG_DWMAC_INTEL=m
+CONFIG_STMMAC_PCI=m
+CONFIG_NET_VENDOR_SUN=y
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_CASSINI=m
+CONFIG_NIU=m
+CONFIG_NET_VENDOR_SYNOPSYS=y
+CONFIG_DWC_XLGMAC=m
+CONFIG_DWC_XLGMAC_PCI=m
+CONFIG_NET_VENDOR_TEHUTI=y
+CONFIG_TEHUTI=m
+CONFIG_TEHUTI_TN40=m
+CONFIG_NET_VENDOR_TI=y
+# CONFIG_TI_CPSW_PHY_SEL is not set
+CONFIG_TLAN=m
+CONFIG_NET_VENDOR_VERTEXCOM=y
+CONFIG_MSE102X=m
+CONFIG_NET_VENDOR_VIA=y
+CONFIG_VIA_RHINE=m
+CONFIG_VIA_RHINE_MMIO=y
+CONFIG_VIA_VELOCITY=m
+CONFIG_NET_VENDOR_WANGXUN=y
+CONFIG_LIBWX=m
+CONFIG_NGBE=m
+CONFIG_TXGBE=m
+CONFIG_NET_VENDOR_WIZNET=y
+CONFIG_WIZNET_W5100=m
+CONFIG_WIZNET_W5300=m
+# CONFIG_WIZNET_BUS_DIRECT is not set
+# CONFIG_WIZNET_BUS_INDIRECT is not set
+CONFIG_WIZNET_BUS_ANY=y
+CONFIG_WIZNET_W5100_SPI=m
+CONFIG_NET_VENDOR_XILINX=y
+CONFIG_XILINX_EMACLITE=m
+CONFIG_XILINX_AXI_EMAC=m
+CONFIG_XILINX_LL_TEMAC=m
+CONFIG_NET_VENDOR_XIRCOM=y
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_FDDI=m
+CONFIG_DEFXX=m
+CONFIG_SKFP=m
+# CONFIG_HIPPI is not set
+CONFIG_PHYLINK=m
+CONFIG_PHYLIB=m
+CONFIG_SWPHY=y
+CONFIG_LED_TRIGGER_PHY=y
+CONFIG_FIXED_PHY=m
+CONFIG_SFP=m
+
+#
+# MII PHY device drivers
+#
+CONFIG_AIR_EN8811H_PHY=m
+CONFIG_AMD_PHY=m
+CONFIG_ADIN_PHY=m
+CONFIG_ADIN1100_PHY=m
+CONFIG_AQUANTIA_PHY=m
+CONFIG_AX88796B_PHY=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_BCM54140_PHY=m
+CONFIG_BCM7XXX_PHY=m
+CONFIG_BCM84881_PHY=m
+CONFIG_BCM87XX_PHY=m
+CONFIG_BCM_NET_PHYLIB=m
+CONFIG_BCM_NET_PHYPTP=m
+CONFIG_CICADA_PHY=m
+CONFIG_CORTINA_PHY=m
+CONFIG_DAVICOM_PHY=m
+CONFIG_ICPLUS_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_INTEL_XWAY_PHY=m
+CONFIG_LSI_ET1011C_PHY=m
+CONFIG_MARVELL_PHY=m
+CONFIG_MARVELL_10G_PHY=m
+CONFIG_MARVELL_88Q2XXX_PHY=m
+CONFIG_MARVELL_88X2222_PHY=m
+CONFIG_MAXLINEAR_GPHY=m
+CONFIG_MEDIATEK_GE_PHY=m
+# CONFIG_MEDIATEK_GE_SOC_PHY is not set
+CONFIG_MICREL_PHY=m
+CONFIG_MICROCHIP_T1S_PHY=m
+CONFIG_MICROCHIP_PHY=m
+CONFIG_MICROCHIP_T1_PHY=m
+CONFIG_MICROSEMI_PHY=m
+CONFIG_MOTORCOMM_PHY=m
+CONFIG_NATIONAL_PHY=m
+CONFIG_NXP_CBTX_PHY=m
+CONFIG_NXP_C45_TJA11XX_PHY=m
+CONFIG_NXP_TJA11XX_PHY=m
+CONFIG_NCN26000_PHY=m
+CONFIG_QCOM_NET_PHYLIB=m
+CONFIG_AT803X_PHY=m
+CONFIG_QCA83XX_PHY=m
+CONFIG_QCA808X_PHY=m
+CONFIG_QSEMI_PHY=m
+CONFIG_REALTEK_PHY=m
+CONFIG_RENESAS_PHY=m
+# CONFIG_ROCKCHIP_PHY is not set
+CONFIG_SMSC_PHY=m
+CONFIG_STE10XP=m
+CONFIG_TERANETICS_PHY=m
+CONFIG_DP83822_PHY=m
+CONFIG_DP83TC811_PHY=m
+CONFIG_DP83848_PHY=m
+CONFIG_DP83867_PHY=m
+CONFIG_DP83869_PHY=m
+CONFIG_DP83TD510_PHY=m
+CONFIG_DP83TG720_PHY=m
+CONFIG_VITESSE_PHY=m
+CONFIG_XILINX_GMII2RGMII=m
+CONFIG_MICREL_KS8995MA=m
+CONFIG_PSE_CONTROLLER=y
+CONFIG_PSE_REGULATOR=m
+CONFIG_PSE_PD692X0=m
+CONFIG_PSE_TPS23881=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
+CONFIG_CAN_VXCAN=m
+CONFIG_CAN_NETLINK=y
+CONFIG_CAN_CALC_BITTIMING=y
+CONFIG_CAN_RX_OFFLOAD=y
+CONFIG_CAN_CAN327=m
+CONFIG_CAN_JANZ_ICAN3=m
+CONFIG_CAN_KVASER_PCIEFD=m
+CONFIG_CAN_SLCAN=m
+CONFIG_CAN_C_CAN=m
+CONFIG_CAN_C_CAN_PLATFORM=m
+CONFIG_CAN_C_CAN_PCI=m
+CONFIG_CAN_CC770=m
+CONFIG_CAN_CC770_PLATFORM=m
+CONFIG_CAN_CTUCANFD=m
+CONFIG_CAN_CTUCANFD_PCI=m
+CONFIG_CAN_ESD_402_PCI=m
+CONFIG_CAN_IFI_CANFD=m
+CONFIG_CAN_M_CAN=m
+CONFIG_CAN_M_CAN_PCI=m
+CONFIG_CAN_M_CAN_PLATFORM=m
+CONFIG_CAN_M_CAN_TCAN4X5X=m
+CONFIG_CAN_PEAK_PCIEFD=m
+CONFIG_CAN_SJA1000=m
+CONFIG_CAN_EMS_PCI=m
+# CONFIG_CAN_EMS_PCMCIA is not set
+CONFIG_CAN_F81601=m
+CONFIG_CAN_KVASER_PCI=m
+CONFIG_CAN_PEAK_PCI=m
+CONFIG_CAN_PEAK_PCIEC=y
+CONFIG_CAN_PEAK_PCMCIA=m
+CONFIG_CAN_PLX_PCI=m
+CONFIG_CAN_SJA1000_PLATFORM=m
+CONFIG_CAN_SOFTING=m
+CONFIG_CAN_SOFTING_CS=m
+
+#
+# CAN SPI interfaces
+#
+CONFIG_CAN_HI311X=m
+CONFIG_CAN_MCP251X=m
+CONFIG_CAN_MCP251XFD=m
+# CONFIG_CAN_MCP251XFD_SANITY is not set
+# end of CAN SPI interfaces
+
+#
+# CAN USB interfaces
+#
+CONFIG_CAN_8DEV_USB=m
+CONFIG_CAN_EMS_USB=m
+CONFIG_CAN_ESD_USB=m
+CONFIG_CAN_ETAS_ES58X=m
+CONFIG_CAN_F81604=m
+CONFIG_CAN_GS_USB=m
+CONFIG_CAN_KVASER_USB=m
+CONFIG_CAN_MCBA_USB=m
+CONFIG_CAN_PEAK_USB=m
+CONFIG_CAN_UCAN=m
+# end of CAN USB interfaces
+
+# CONFIG_CAN_DEBUG_DEVICES is not set
+
+#
+# MCTP Device Drivers
+#
+CONFIG_MCTP_SERIAL=m
+CONFIG_MCTP_TRANSPORT_I2C=m
+# end of MCTP Device Drivers
+
+CONFIG_MDIO_DEVICE=m
+CONFIG_MDIO_BUS=m
+CONFIG_FWNODE_MDIO=m
+CONFIG_ACPI_MDIO=m
+CONFIG_MDIO_DEVRES=m
+CONFIG_MDIO_BITBANG=m
+CONFIG_MDIO_BCM_UNIMAC=m
+CONFIG_MDIO_CAVIUM=m
+CONFIG_MDIO_GPIO=m
+CONFIG_MDIO_I2C=m
+CONFIG_MDIO_MVUSB=m
+CONFIG_MDIO_MSCC_MIIM=m
+CONFIG_MDIO_REGMAP=m
+CONFIG_MDIO_THUNDER=m
+
+#
+# MDIO Multiplexers
+#
+
+#
+# PCS device drivers
+#
+CONFIG_PCS_XPCS=m
+CONFIG_PCS_LYNX=m
+CONFIG_PCS_MTK_LYNXI=m
+# end of PCS device drivers
+
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOATM=m
+CONFIG_PPPOE=m
+# CONFIG_PPPOE_HASH_BITS_1 is not set
+# CONFIG_PPPOE_HASH_BITS_2 is not set
+CONFIG_PPPOE_HASH_BITS_4=y
+# CONFIG_PPPOE_HASH_BITS_8 is not set
+CONFIG_PPPOE_HASH_BITS=4
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_SLIP=m
+CONFIG_SLHC=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+CONFIG_USB_NET_DRIVERS=m
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_RTL8152=m
+CONFIG_USB_LAN78XX=m
+CONFIG_USB_USBNET=m
+CONFIG_USB_NET_AX8817X=m
+CONFIG_USB_NET_AX88179_178A=m
+CONFIG_USB_NET_CDCETHER=m
+CONFIG_USB_NET_CDC_EEM=m
+CONFIG_USB_NET_CDC_NCM=m
+CONFIG_USB_NET_HUAWEI_CDC_NCM=m
+CONFIG_USB_NET_CDC_MBIM=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_SR9700=m
+CONFIG_USB_NET_SR9800=m
+CONFIG_USB_NET_SMSC75XX=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_GL620A=m
+CONFIG_USB_NET_NET1080=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_USB_NET_RNDIS_HOST=m
+CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
+CONFIG_USB_NET_CDC_SUBSET=m
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_KC2190=y
+CONFIG_USB_NET_ZAURUS=m
+CONFIG_USB_NET_CX82310_ETH=m
+CONFIG_USB_NET_KALMIA=m
+CONFIG_USB_NET_QMI_WWAN=m
+CONFIG_USB_HSO=m
+CONFIG_USB_NET_INT51X1=m
+CONFIG_USB_CDC_PHONET=m
+CONFIG_USB_IPHETH=m
+CONFIG_USB_SIERRA_NET=m
+CONFIG_USB_VL600=m
+CONFIG_USB_NET_CH9200=m
+CONFIG_USB_NET_AQC111=m
+CONFIG_USB_RTL8153_ECM=m
+CONFIG_WLAN=y
+CONFIG_WLAN_VENDOR_ADMTEK=y
+CONFIG_ADM8211=m
+CONFIG_ATH_COMMON=m
+CONFIG_WLAN_VENDOR_ATH=y
+# CONFIG_ATH_DEBUG is not set
+CONFIG_ATH5K=m
+CONFIG_ATH5K_DEBUG=y
+CONFIG_ATH5K_TRACER=y
+CONFIG_ATH5K_PCI=y
+CONFIG_ATH9K_HW=m
+CONFIG_ATH9K_COMMON=m
+CONFIG_ATH9K_COMMON_DEBUG=y
+CONFIG_ATH9K_BTCOEX_SUPPORT=y
+CONFIG_ATH9K=m
+CONFIG_ATH9K_PCI=y
+CONFIG_ATH9K_AHB=y
+CONFIG_ATH9K_DEBUGFS=y
+CONFIG_ATH9K_STATION_STATISTICS=y
+CONFIG_ATH9K_DYNACK=y
+CONFIG_ATH9K_WOW=y
+CONFIG_ATH9K_RFKILL=y
+CONFIG_ATH9K_CHANNEL_CONTEXT=y
+CONFIG_ATH9K_PCOEM=y
+CONFIG_ATH9K_PCI_NO_EEPROM=m
+CONFIG_ATH9K_HTC=m
+CONFIG_ATH9K_HTC_DEBUGFS=y
+CONFIG_ATH9K_HWRNG=y
+CONFIG_ATH9K_COMMON_SPECTRAL=y
+CONFIG_CARL9170=m
+CONFIG_CARL9170_LEDS=y
+CONFIG_CARL9170_DEBUGFS=y
+CONFIG_CARL9170_WPC=y
+# CONFIG_CARL9170_HWRNG is not set
+CONFIG_ATH6KL=m
+CONFIG_ATH6KL_SDIO=m
+CONFIG_ATH6KL_USB=m
+CONFIG_ATH6KL_DEBUG=y
+CONFIG_ATH6KL_TRACING=y
+CONFIG_AR5523=m
+CONFIG_WIL6210=m
+CONFIG_WIL6210_ISR_COR=y
+CONFIG_WIL6210_TRACING=y
+CONFIG_WIL6210_DEBUGFS=y
+CONFIG_ATH10K=m
+CONFIG_ATH10K_CE=y
+CONFIG_ATH10K_PCI=m
+CONFIG_ATH10K_SDIO=m
+CONFIG_ATH10K_USB=m
+CONFIG_ATH10K_DEBUG=y
+CONFIG_ATH10K_DEBUGFS=y
+CONFIG_ATH10K_LEDS=y
+CONFIG_ATH10K_SPECTRAL=y
+CONFIG_ATH10K_TRACING=y
+CONFIG_WCN36XX=m
+CONFIG_WCN36XX_DEBUGFS=y
+CONFIG_ATH11K=m
+CONFIG_ATH11K_AHB=m
+CONFIG_ATH11K_PCI=m
+CONFIG_ATH11K_DEBUG=y
+CONFIG_ATH11K_DEBUGFS=y
+# CONFIG_ATH11K_TRACING is not set
+CONFIG_ATH11K_SPECTRAL=y
+CONFIG_ATH12K=m
+CONFIG_ATH12K_DEBUG=y
+CONFIG_ATH12K_DEBUGFS=y
+CONFIG_ATH12K_TRACING=y
+CONFIG_WLAN_VENDOR_ATMEL=y
+CONFIG_AT76C50X_USB=m
+CONFIG_WLAN_VENDOR_BROADCOM=y
+CONFIG_B43=m
+CONFIG_B43_BCMA=y
+CONFIG_B43_SSB=y
+CONFIG_B43_BUSES_BCMA_AND_SSB=y
+# CONFIG_B43_BUSES_BCMA is not set
+# CONFIG_B43_BUSES_SSB is not set
+CONFIG_B43_PCI_AUTOSELECT=y
+CONFIG_B43_PCICORE_AUTOSELECT=y
+CONFIG_B43_SDIO=y
+CONFIG_B43_BCMA_PIO=y
+CONFIG_B43_PIO=y
+CONFIG_B43_PHY_G=y
+CONFIG_B43_PHY_N=y
+CONFIG_B43_PHY_LP=y
+CONFIG_B43_PHY_HT=y
+CONFIG_B43_LEDS=y
+CONFIG_B43_HWRNG=y
+# CONFIG_B43_DEBUG is not set
+CONFIG_B43LEGACY=m
+CONFIG_B43LEGACY_PCI_AUTOSELECT=y
+CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
+CONFIG_B43LEGACY_LEDS=y
+CONFIG_B43LEGACY_HWRNG=y
+CONFIG_B43LEGACY_DEBUG=y
+CONFIG_B43LEGACY_DMA=y
+CONFIG_B43LEGACY_PIO=y
+CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
+# CONFIG_B43LEGACY_DMA_MODE is not set
+# CONFIG_B43LEGACY_PIO_MODE is not set
+CONFIG_BRCMUTIL=m
+CONFIG_BRCMSMAC=m
+CONFIG_BRCMSMAC_LEDS=y
+CONFIG_BRCMFMAC=m
+CONFIG_BRCMFMAC_PROTO_BCDC=y
+CONFIG_BRCMFMAC_PROTO_MSGBUF=y
+CONFIG_BRCMFMAC_SDIO=y
+CONFIG_BRCMFMAC_USB=y
+CONFIG_BRCMFMAC_PCIE=y
+CONFIG_BRCM_TRACING=y
+CONFIG_BRCMDBG=y
+CONFIG_WLAN_VENDOR_INTEL=y
+CONFIG_IPW2100=m
+CONFIG_IPW2100_MONITOR=y
+# CONFIG_IPW2100_DEBUG is not set
+CONFIG_IPW2200=m
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW2200_RADIOTAP=y
+CONFIG_IPW2200_PROMISCUOUS=y
+CONFIG_IPW2200_QOS=y
+# CONFIG_IPW2200_DEBUG is not set
+CONFIG_LIBIPW=m
+# CONFIG_LIBIPW_DEBUG is not set
+CONFIG_IWLEGACY=m
+CONFIG_IWL4965=m
+CONFIG_IWL3945=m
+
+#
+# iwl3945 / iwl4965 Debugging Options
+#
+CONFIG_IWLEGACY_DEBUG=y
+CONFIG_IWLEGACY_DEBUGFS=y
+# end of iwl3945 / iwl4965 Debugging Options
+
+CONFIG_IWLWIFI=m
+CONFIG_IWLWIFI_LEDS=y
+CONFIG_IWLDVM=m
+CONFIG_IWLMVM=m
+CONFIG_IWLWIFI_OPMODE_MODULAR=y
+
+#
+# Debugging Options
+#
+CONFIG_IWLWIFI_DEBUG=y
+CONFIG_IWLWIFI_DEBUGFS=y
+CONFIG_IWLWIFI_DEVICE_TRACING=y
+# end of Debugging Options
+
+CONFIG_WLAN_VENDOR_INTERSIL=y
+CONFIG_P54_COMMON=m
+CONFIG_P54_USB=m
+CONFIG_P54_PCI=m
+CONFIG_P54_SPI=m
+# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
+CONFIG_P54_LEDS=y
+CONFIG_WLAN_VENDOR_MARVELL=y
+CONFIG_LIBERTAS=m
+CONFIG_LIBERTAS_USB=m
+CONFIG_LIBERTAS_SDIO=m
+CONFIG_LIBERTAS_SPI=m
+# CONFIG_LIBERTAS_DEBUG is not set
+CONFIG_LIBERTAS_MESH=y
+CONFIG_LIBERTAS_THINFIRM=m
+# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
+CONFIG_LIBERTAS_THINFIRM_USB=m
+CONFIG_MWIFIEX=m
+CONFIG_MWIFIEX_SDIO=m
+CONFIG_MWIFIEX_PCIE=m
+CONFIG_MWIFIEX_USB=m
+CONFIG_MWL8K=m
+CONFIG_WLAN_VENDOR_MEDIATEK=y
+CONFIG_MT7601U=m
+CONFIG_MT76_CORE=m
+CONFIG_MT76_LEDS=y
+CONFIG_MT76_USB=m
+CONFIG_MT76_SDIO=m
+CONFIG_MT76x02_LIB=m
+CONFIG_MT76x02_USB=m
+CONFIG_MT76_CONNAC_LIB=m
+CONFIG_MT792x_LIB=m
+CONFIG_MT792x_USB=m
+CONFIG_MT76x0_COMMON=m
+CONFIG_MT76x0U=m
+CONFIG_MT76x0E=m
+CONFIG_MT76x2_COMMON=m
+CONFIG_MT76x2E=m
+CONFIG_MT76x2U=m
+CONFIG_MT7603E=m
+CONFIG_MT7615_COMMON=m
+CONFIG_MT7615E=m
+CONFIG_MT7663_USB_SDIO_COMMON=m
+CONFIG_MT7663U=m
+CONFIG_MT7663S=m
+CONFIG_MT7915E=m
+CONFIG_MT7921_COMMON=m
+CONFIG_MT7921E=m
+CONFIG_MT7921S=m
+CONFIG_MT7921U=m
+CONFIG_MT7996E=m
+CONFIG_MT7925_COMMON=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
+CONFIG_WLAN_VENDOR_MICROCHIP=y
+CONFIG_WILC1000=m
+CONFIG_WILC1000_SDIO=m
+CONFIG_WILC1000_SPI=m
+# CONFIG_WILC1000_HW_OOB_INTR is not set
+CONFIG_WLAN_VENDOR_PURELIFI=y
+CONFIG_PLFXLC=m
+CONFIG_WLAN_VENDOR_RALINK=y
+CONFIG_RT2X00=m
+CONFIG_RT2400PCI=m
+CONFIG_RT2500PCI=m
+CONFIG_RT61PCI=m
+CONFIG_RT2800PCI=m
+CONFIG_RT2800PCI_RT33XX=y
+CONFIG_RT2800PCI_RT35XX=y
+CONFIG_RT2800PCI_RT53XX=y
+CONFIG_RT2800PCI_RT3290=y
+CONFIG_RT2500USB=m
+CONFIG_RT73USB=m
+CONFIG_RT2800USB=m
+CONFIG_RT2800USB_RT33XX=y
+CONFIG_RT2800USB_RT35XX=y
+CONFIG_RT2800USB_RT3573=y
+CONFIG_RT2800USB_RT53XX=y
+CONFIG_RT2800USB_RT55XX=y
+CONFIG_RT2800USB_UNKNOWN=y
+CONFIG_RT2800_LIB=m
+CONFIG_RT2800_LIB_MMIO=m
+CONFIG_RT2X00_LIB_MMIO=m
+CONFIG_RT2X00_LIB_PCI=m
+CONFIG_RT2X00_LIB_USB=m
+CONFIG_RT2X00_LIB=m
+CONFIG_RT2X00_LIB_FIRMWARE=y
+CONFIG_RT2X00_LIB_CRYPTO=y
+CONFIG_RT2X00_LIB_LEDS=y
+CONFIG_RT2X00_LIB_DEBUGFS=y
+# CONFIG_RT2X00_DEBUG is not set
+CONFIG_WLAN_VENDOR_REALTEK=y
+CONFIG_RTL8180=m
+CONFIG_RTL8187=m
+CONFIG_RTL8187_LEDS=y
+CONFIG_RTL_CARDS=m
+CONFIG_RTL8192CE=m
+CONFIG_RTL8192SE=m
+CONFIG_RTL8192DE=m
+CONFIG_RTL8723AE=m
+CONFIG_RTL8723BE=m
+CONFIG_RTL8188EE=m
+CONFIG_RTL8192EE=m
+CONFIG_RTL8821AE=m
+CONFIG_RTL8192CU=m
+CONFIG_RTL8192DU=m
+CONFIG_RTLWIFI=m
+CONFIG_RTLWIFI_PCI=m
+CONFIG_RTLWIFI_USB=m
+CONFIG_RTLWIFI_DEBUG=y
+CONFIG_RTL8192C_COMMON=m
+CONFIG_RTL8192D_COMMON=m
+CONFIG_RTL8723_COMMON=m
+CONFIG_RTLBTCOEXIST=m
+CONFIG_RTL8XXXU=m
+CONFIG_RTL8XXXU_UNTESTED=y
+CONFIG_RTW88=m
+CONFIG_RTW88_CORE=m
+CONFIG_RTW88_PCI=m
+CONFIG_RTW88_SDIO=m
+CONFIG_RTW88_USB=m
+CONFIG_RTW88_8822B=m
+CONFIG_RTW88_8822C=m
+CONFIG_RTW88_8723X=m
+CONFIG_RTW88_8703B=m
+CONFIG_RTW88_8723D=m
+CONFIG_RTW88_8821C=m
+CONFIG_RTW88_8822BE=m
+CONFIG_RTW88_8822BS=m
+CONFIG_RTW88_8822BU=m
+CONFIG_RTW88_8822CE=m
+CONFIG_RTW88_8822CS=m
+CONFIG_RTW88_8822CU=m
+CONFIG_RTW88_8723DE=m
+CONFIG_RTW88_8723DS=m
+CONFIG_RTW88_8723CS=m
+CONFIG_RTW88_8723DU=m
+CONFIG_RTW88_8821CE=m
+CONFIG_RTW88_8821CS=m
+CONFIG_RTW88_8821CU=m
+CONFIG_RTW88_DEBUG=y
+CONFIG_RTW88_DEBUGFS=y
+CONFIG_RTW89=m
+CONFIG_RTW89_CORE=m
+CONFIG_RTW89_PCI=m
+CONFIG_RTW89_8851B=m
+CONFIG_RTW89_8852A=m
+CONFIG_RTW89_8852B_COMMON=m
+CONFIG_RTW89_8852B=m
+CONFIG_RTW89_8852C=m
+CONFIG_RTW89_8922A=m
+CONFIG_RTW89_8851BE=m
+CONFIG_RTW89_8852AE=m
+CONFIG_RTW89_8852BE=m
+CONFIG_RTW89_8852CE=m
+CONFIG_RTW89_8922AE=m
+CONFIG_RTW89_DEBUG=y
+CONFIG_RTW89_DEBUGMSG=y
+CONFIG_RTW89_DEBUGFS=y
+CONFIG_WLAN_VENDOR_RSI=y
+CONFIG_RSI_91X=m
+CONFIG_RSI_DEBUGFS=y
+CONFIG_RSI_SDIO=m
+CONFIG_RSI_USB=m
+CONFIG_RSI_COEX=y
+CONFIG_WLAN_VENDOR_SILABS=y
+CONFIG_WFX=m
+CONFIG_WLAN_VENDOR_ST=y
+CONFIG_CW1200=m
+CONFIG_CW1200_WLAN_SDIO=m
+CONFIG_CW1200_WLAN_SPI=m
+CONFIG_WLAN_VENDOR_TI=y
+CONFIG_WL1251=m
+CONFIG_WL1251_SPI=m
+CONFIG_WL1251_SDIO=m
+CONFIG_WL12XX=m
+CONFIG_WL18XX=m
+CONFIG_WLCORE=m
+CONFIG_WLCORE_SDIO=m
+CONFIG_WLAN_VENDOR_ZYDAS=y
+CONFIG_ZD1211RW=m
+# CONFIG_ZD1211RW_DEBUG is not set
+CONFIG_WLAN_VENDOR_QUANTENNA=y
+CONFIG_QTNFMAC=m
+CONFIG_QTNFMAC_PCIE=m
+CONFIG_MAC80211_HWSIM=m
+CONFIG_VIRT_WIFI=m
+# CONFIG_WAN is not set
+CONFIG_IEEE802154_DRIVERS=m
+CONFIG_IEEE802154_FAKELB=m
+CONFIG_IEEE802154_AT86RF230=m
+CONFIG_IEEE802154_MRF24J40=m
+CONFIG_IEEE802154_CC2520=m
+CONFIG_IEEE802154_ATUSB=m
+CONFIG_IEEE802154_ADF7242=m
+CONFIG_IEEE802154_CA8210=m
+# CONFIG_IEEE802154_CA8210_DEBUGFS is not set
+CONFIG_IEEE802154_MCR20A=m
+CONFIG_IEEE802154_HWSIM=m
+
+#
+# Wireless WAN
+#
+CONFIG_WWAN=m
+CONFIG_WWAN_DEBUGFS=y
+CONFIG_WWAN_HWSIM=m
+CONFIG_MHI_WWAN_CTRL=m
+CONFIG_MHI_WWAN_MBIM=m
+CONFIG_RPMSG_WWAN_CTRL=m
+CONFIG_IOSM=m
+CONFIG_MTK_T7XX=m
+# end of Wireless WAN
+
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_VMXNET3=m
+CONFIG_FUJITSU_ES=m
+CONFIG_USB4_NET=m
+CONFIG_HYPERV_NET=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_FAILOVER=m
+CONFIG_ISDN=y
+CONFIG_ISDN_CAPI=y
+CONFIG_CAPI_TRACE=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_MISDN=m
+CONFIG_MISDN_DSP=m
+CONFIG_MISDN_L1OIP=m
+
+#
+# mISDN hardware drivers
+#
+CONFIG_MISDN_HFCPCI=m
+CONFIG_MISDN_HFCMULTI=m
+CONFIG_MISDN_HFCUSB=m
+CONFIG_MISDN_AVMFRITZ=m
+CONFIG_MISDN_SPEEDFAX=m
+CONFIG_MISDN_INFINEON=m
+CONFIG_MISDN_W6692=m
+CONFIG_MISDN_NETJET=m
+CONFIG_MISDN_HDLC=m
+CONFIG_MISDN_IPAC=m
+CONFIG_MISDN_ISAR=m
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+CONFIG_INPUT_LEDS=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_SPARSEKMAP=m
+CONFIG_INPUT_MATRIXKMAP=m
+CONFIG_INPUT_VIVALDIFMAP=m
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ADC=m
+CONFIG_KEYBOARD_ADP5520=m
+CONFIG_KEYBOARD_ADP5588=m
+CONFIG_KEYBOARD_ADP5589=m
+CONFIG_KEYBOARD_APPLESPI=m
+CONFIG_KEYBOARD_ATKBD=m
+CONFIG_KEYBOARD_QT1050=m
+CONFIG_KEYBOARD_QT1070=m
+CONFIG_KEYBOARD_QT2160=m
+CONFIG_KEYBOARD_DLINK_DIR685=m
+CONFIG_KEYBOARD_LKKBD=m
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+CONFIG_KEYBOARD_TCA6416=m
+CONFIG_KEYBOARD_TCA8418=m
+CONFIG_KEYBOARD_MATRIX=m
+CONFIG_KEYBOARD_LM8323=m
+CONFIG_KEYBOARD_LM8333=m
+CONFIG_KEYBOARD_MAX7359=m
+CONFIG_KEYBOARD_MCS=m
+CONFIG_KEYBOARD_MPR121=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_KEYBOARD_OPENCORES=m
+CONFIG_KEYBOARD_PINEPHONE=m
+CONFIG_KEYBOARD_SAMSUNG=m
+CONFIG_KEYBOARD_STOWAWAY=m
+CONFIG_KEYBOARD_SUNKBD=m
+CONFIG_KEYBOARD_IQS62X=m
+CONFIG_KEYBOARD_TM2_TOUCHKEY=m
+CONFIG_KEYBOARD_TWL4030=m
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_CROS_EC=m
+CONFIG_KEYBOARD_MTK_PMIC=m
+CONFIG_KEYBOARD_CYPRESS_SF=m
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_BYD=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
+CONFIG_MOUSE_PS2_CYPRESS=y
+CONFIG_MOUSE_PS2_LIFEBOOK=y
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+CONFIG_MOUSE_PS2_ELANTECH=y
+CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
+CONFIG_MOUSE_PS2_SENTELIC=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_PS2_FOCALTECH=y
+CONFIG_MOUSE_PS2_VMMOUSE=y
+CONFIG_MOUSE_PS2_SMBUS=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_APPLETOUCH=m
+CONFIG_MOUSE_BCM5974=m
+CONFIG_MOUSE_CYAPA=m
+CONFIG_MOUSE_ELAN_I2C=m
+CONFIG_MOUSE_ELAN_I2C_I2C=y
+CONFIG_MOUSE_ELAN_I2C_SMBUS=y
+CONFIG_MOUSE_VSXXXAA=m
+CONFIG_MOUSE_GPIO=m
+CONFIG_MOUSE_SYNAPTICS_I2C=m
+CONFIG_MOUSE_SYNAPTICS_USB=m
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADC=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=m
+CONFIG_JOYSTICK_IFORCE_232=m
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDJOY=m
+CONFIG_JOYSTICK_ZHENHUA=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+CONFIG_JOYSTICK_AS5011=m
+CONFIG_JOYSTICK_JOYDUMP=m
+CONFIG_JOYSTICK_XPAD=m
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_JOYSTICK_WALKERA0701=m
+CONFIG_JOYSTICK_PSXPAD_SPI=m
+CONFIG_JOYSTICK_PSXPAD_SPI_FF=y
+CONFIG_JOYSTICK_PXRC=m
+CONFIG_JOYSTICK_QWIIC=m
+CONFIG_JOYSTICK_FSIA6B=m
+CONFIG_JOYSTICK_SENSEHAT=m
+CONFIG_JOYSTICK_SEESAW=m
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=m
+CONFIG_TABLET_USB_AIPTEK=m
+CONFIG_TABLET_USB_HANWANG=m
+CONFIG_TABLET_USB_KBTAB=m
+CONFIG_TABLET_USB_PEGASUS=m
+CONFIG_TABLET_SERIAL_WACOM4=m
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_88PM860X=m
+CONFIG_TOUCHSCREEN_ADS7846=m
+CONFIG_TOUCHSCREEN_AD7877=m
+CONFIG_TOUCHSCREEN_AD7879=m
+CONFIG_TOUCHSCREEN_AD7879_I2C=m
+CONFIG_TOUCHSCREEN_AD7879_SPI=m
+CONFIG_TOUCHSCREEN_ADC=m
+CONFIG_TOUCHSCREEN_ATMEL_MXT=m
+CONFIG_TOUCHSCREEN_ATMEL_MXT_T37=y
+CONFIG_TOUCHSCREEN_AUO_PIXCIR=m
+CONFIG_TOUCHSCREEN_BU21013=m
+CONFIG_TOUCHSCREEN_BU21029=m
+CONFIG_TOUCHSCREEN_CHIPONE_ICN8505=m
+CONFIG_TOUCHSCREEN_CY8CTMA140=m
+CONFIG_TOUCHSCREEN_CY8CTMG110=m
+CONFIG_TOUCHSCREEN_CYTTSP_CORE=m
+CONFIG_TOUCHSCREEN_CYTTSP_I2C=m
+CONFIG_TOUCHSCREEN_CYTTSP_SPI=m
+CONFIG_TOUCHSCREEN_CYTTSP4_CORE=m
+CONFIG_TOUCHSCREEN_CYTTSP4_I2C=m
+CONFIG_TOUCHSCREEN_CYTTSP4_SPI=m
+CONFIG_TOUCHSCREEN_CYTTSP5=m
+CONFIG_TOUCHSCREEN_DA9034=m
+CONFIG_TOUCHSCREEN_DA9052=m
+CONFIG_TOUCHSCREEN_DYNAPRO=m
+CONFIG_TOUCHSCREEN_HAMPSHIRE=m
+CONFIG_TOUCHSCREEN_EETI=m
+CONFIG_TOUCHSCREEN_EGALAX_SERIAL=m
+CONFIG_TOUCHSCREEN_EXC3000=m
+CONFIG_TOUCHSCREEN_FUJITSU=m
+CONFIG_TOUCHSCREEN_GOODIX=m
+CONFIG_TOUCHSCREEN_GOODIX_BERLIN_CORE=m
+CONFIG_TOUCHSCREEN_GOODIX_BERLIN_I2C=m
+CONFIG_TOUCHSCREEN_GOODIX_BERLIN_SPI=m
+CONFIG_TOUCHSCREEN_HIDEEP=m
+CONFIG_TOUCHSCREEN_HYCON_HY46XX=m
+CONFIG_TOUCHSCREEN_HYNITRON_CSTXXX=m
+CONFIG_TOUCHSCREEN_ILI210X=m
+CONFIG_TOUCHSCREEN_ILITEK=m
+CONFIG_TOUCHSCREEN_S6SY761=m
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_TOUCHSCREEN_EKTF2127=m
+CONFIG_TOUCHSCREEN_ELAN=m
+CONFIG_TOUCHSCREEN_ELO=m
+CONFIG_TOUCHSCREEN_WACOM_W8001=m
+CONFIG_TOUCHSCREEN_WACOM_I2C=m
+CONFIG_TOUCHSCREEN_MAX11801=m
+CONFIG_TOUCHSCREEN_MCS5000=m
+CONFIG_TOUCHSCREEN_MMS114=m
+CONFIG_TOUCHSCREEN_MELFAS_MIP4=m
+CONFIG_TOUCHSCREEN_MSG2638=m
+CONFIG_TOUCHSCREEN_MTOUCH=m
+CONFIG_TOUCHSCREEN_NOVATEK_NVT_TS=m
+CONFIG_TOUCHSCREEN_IMAGIS=m
+CONFIG_TOUCHSCREEN_INEXIO=m
+CONFIG_TOUCHSCREEN_PENMOUNT=m
+CONFIG_TOUCHSCREEN_EDT_FT5X06=m
+CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
+CONFIG_TOUCHSCREEN_TOUCHWIN=m
+CONFIG_TOUCHSCREEN_PIXCIR=m
+CONFIG_TOUCHSCREEN_WDT87XX_I2C=m
+CONFIG_TOUCHSCREEN_WM831X=m
+CONFIG_TOUCHSCREEN_WM97XX=m
+CONFIG_TOUCHSCREEN_WM9705=y
+CONFIG_TOUCHSCREEN_WM9712=y
+CONFIG_TOUCHSCREEN_WM9713=y
+CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
+CONFIG_TOUCHSCREEN_MC13783=m
+CONFIG_TOUCHSCREEN_USB_EGALAX=y
+CONFIG_TOUCHSCREEN_USB_PANJIT=y
+CONFIG_TOUCHSCREEN_USB_3M=y
+CONFIG_TOUCHSCREEN_USB_ITM=y
+CONFIG_TOUCHSCREEN_USB_ETURBO=y
+CONFIG_TOUCHSCREEN_USB_GUNZE=y
+CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
+CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
+CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
+CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
+CONFIG_TOUCHSCREEN_USB_GOTOP=y
+CONFIG_TOUCHSCREEN_USB_JASTEC=y
+CONFIG_TOUCHSCREEN_USB_ELO=y
+CONFIG_TOUCHSCREEN_USB_E2I=y
+CONFIG_TOUCHSCREEN_USB_ZYTRONIC=y
+CONFIG_TOUCHSCREEN_USB_ETT_TC45USB=y
+CONFIG_TOUCHSCREEN_USB_NEXIO=y
+CONFIG_TOUCHSCREEN_USB_EASYTOUCH=y
+CONFIG_TOUCHSCREEN_TOUCHIT213=m
+CONFIG_TOUCHSCREEN_TSC_SERIO=m
+CONFIG_TOUCHSCREEN_TSC200X_CORE=m
+CONFIG_TOUCHSCREEN_TSC2004=m
+CONFIG_TOUCHSCREEN_TSC2005=m
+CONFIG_TOUCHSCREEN_TSC2007=m
+CONFIG_TOUCHSCREEN_TSC2007_IIO=y
+CONFIG_TOUCHSCREEN_PCAP=m
+CONFIG_TOUCHSCREEN_RM_TS=m
+CONFIG_TOUCHSCREEN_SILEAD=m
+CONFIG_TOUCHSCREEN_SIS_I2C=m
+CONFIG_TOUCHSCREEN_ST1232=m
+CONFIG_TOUCHSCREEN_STMFTS=m
+CONFIG_TOUCHSCREEN_SUR40=m
+CONFIG_TOUCHSCREEN_SURFACE3_SPI=m
+CONFIG_TOUCHSCREEN_SX8654=m
+CONFIG_TOUCHSCREEN_TPS6507X=m
+CONFIG_TOUCHSCREEN_ZET6223=m
+CONFIG_TOUCHSCREEN_ZFORCE=m
+CONFIG_TOUCHSCREEN_COLIBRI_VF50=m
+CONFIG_TOUCHSCREEN_ROHM_BU21023=m
+CONFIG_TOUCHSCREEN_IQS5XX=m
+CONFIG_TOUCHSCREEN_IQS7211=m
+CONFIG_TOUCHSCREEN_ZINITIX=m
+CONFIG_TOUCHSCREEN_HIMAX_HX83112B=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_88PM860X_ONKEY=m
+CONFIG_INPUT_88PM80X_ONKEY=m
+CONFIG_INPUT_AD714X=m
+CONFIG_INPUT_AD714X_I2C=m
+CONFIG_INPUT_AD714X_SPI=m
+CONFIG_INPUT_ARIZONA_HAPTICS=m
+CONFIG_INPUT_ATC260X_ONKEY=m
+CONFIG_INPUT_BMA150=m
+# CONFIG_INPUT_CS40L50_VIBRA is not set
+CONFIG_INPUT_E3X0_BUTTON=m
+CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_MAX77693_HAPTIC=m
+CONFIG_INPUT_MAX8925_ONKEY=m
+CONFIG_INPUT_MAX8997_HAPTIC=m
+CONFIG_INPUT_MC13783_PWRBUTTON=m
+CONFIG_INPUT_MMA8450=m
+CONFIG_INPUT_APANEL=m
+CONFIG_INPUT_GPIO_BEEPER=m
+CONFIG_INPUT_GPIO_DECODER=m
+CONFIG_INPUT_GPIO_VIBRA=m
+CONFIG_INPUT_ATLAS_BTNS=m
+CONFIG_INPUT_ATI_REMOTE2=m
+CONFIG_INPUT_KEYSPAN_REMOTE=m
+CONFIG_INPUT_KXTJ9=m
+CONFIG_INPUT_POWERMATE=m
+CONFIG_INPUT_YEALINK=m
+CONFIG_INPUT_CM109=m
+CONFIG_INPUT_REGULATOR_HAPTIC=m
+CONFIG_INPUT_RETU_PWRBUTTON=m
+CONFIG_INPUT_AXP20X_PEK=m
+CONFIG_INPUT_TWL4030_PWRBUTTON=m
+CONFIG_INPUT_TWL4030_VIBRA=m
+CONFIG_INPUT_TWL6040_VIBRA=m
+CONFIG_INPUT_UINPUT=m
+CONFIG_INPUT_PALMAS_PWRBUTTON=m
+CONFIG_INPUT_PCF50633_PMU=m
+CONFIG_INPUT_PCF8574=m
+CONFIG_INPUT_PWM_BEEPER=m
+CONFIG_INPUT_PWM_VIBRA=m
+CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
+CONFIG_INPUT_DA7280_HAPTICS=m
+CONFIG_INPUT_DA9052_ONKEY=m
+CONFIG_INPUT_DA9055_ONKEY=m
+CONFIG_INPUT_DA9063_ONKEY=m
+CONFIG_INPUT_WM831X_ON=m
+CONFIG_INPUT_PCAP=m
+CONFIG_INPUT_ADXL34X=m
+CONFIG_INPUT_ADXL34X_I2C=m
+CONFIG_INPUT_ADXL34X_SPI=m
+CONFIG_INPUT_IBM_PANEL=m
+CONFIG_INPUT_IMS_PCU=m
+CONFIG_INPUT_IQS269A=m
+CONFIG_INPUT_IQS626A=m
+CONFIG_INPUT_IQS7222=m
+CONFIG_INPUT_CMA3000=m
+CONFIG_INPUT_CMA3000_I2C=m
+CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+CONFIG_INPUT_IDEAPAD_SLIDEBAR=m
+CONFIG_INPUT_SOC_BUTTON_ARRAY=m
+CONFIG_INPUT_DRV260X_HAPTICS=m
+CONFIG_INPUT_DRV2665_HAPTICS=m
+CONFIG_INPUT_DRV2667_HAPTICS=m
+CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
+CONFIG_INPUT_RT5120_PWRKEY=m
+CONFIG_RMI4_CORE=m
+CONFIG_RMI4_I2C=m
+CONFIG_RMI4_SPI=m
+CONFIG_RMI4_SMB=m
+CONFIG_RMI4_F03=y
+CONFIG_RMI4_F03_SERIO=m
+CONFIG_RMI4_2D_SENSOR=y
+CONFIG_RMI4_F11=y
+CONFIG_RMI4_F12=y
+CONFIG_RMI4_F30=y
+CONFIG_RMI4_F34=y
+CONFIG_RMI4_F3A=y
+# CONFIG_RMI4_F54 is not set
+CONFIG_RMI4_F55=y
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=m
+CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
+CONFIG_SERIO_I8042=m
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_LIBPS2=m
+CONFIG_SERIO_RAW=m
+CONFIG_SERIO_ALTERA_PS2=m
+CONFIG_SERIO_PS2MULT=m
+CONFIG_SERIO_ARC_PS2=m
+CONFIG_HYPERV_KEYBOARD=m
+CONFIG_SERIO_GPIO_PS2=m
+CONFIG_USERIO=m
+CONFIG_GAMEPORT=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_FM801=m
+# end of Hardware I/O ports
+# end of Input device support
+
+#
+# Character devices
+#
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_VT_CONSOLE_SLEEP=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LEGACY_TIOCSTI is not set
+CONFIG_LDISC_AUTOLOAD=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_EARLYCON=y
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_PNP=y
+# CONFIG_SERIAL_8250_16550A_VARIANTS is not set
+CONFIG_SERIAL_8250_FINTEK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DMA=y
+CONFIG_SERIAL_8250_PCILIB=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_EXAR=m
+CONFIG_SERIAL_8250_CS=m
+CONFIG_SERIAL_8250_MEN_MCB=m
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_PCI1XXXX=m
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_DWLIB=y
+CONFIG_SERIAL_8250_DFL=m
+CONFIG_SERIAL_8250_DW=m
+CONFIG_SERIAL_8250_RT288X=y
+CONFIG_SERIAL_8250_LPSS=m
+CONFIG_SERIAL_8250_MID=m
+CONFIG_SERIAL_8250_PERICOM=m
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_MAX3100=m
+CONFIG_SERIAL_MAX310X=m
+CONFIG_SERIAL_UARTLITE=m
+CONFIG_SERIAL_UARTLITE_NR_UARTS=1
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_JSM=m
+# CONFIG_SERIAL_LANTIQ is not set
+CONFIG_SERIAL_SCCNXP=m
+CONFIG_SERIAL_SC16IS7XX=m
+CONFIG_SERIAL_SC16IS7XX_I2C=m
+CONFIG_SERIAL_SC16IS7XX_SPI=m
+CONFIG_SERIAL_ALTERA_JTAGUART=m
+CONFIG_SERIAL_ALTERA_UART=m
+CONFIG_SERIAL_ALTERA_UART_MAXPORTS=4
+CONFIG_SERIAL_ALTERA_UART_BAUDRATE=115200
+CONFIG_SERIAL_ARC=m
+CONFIG_SERIAL_ARC_NR_PORTS=1
+CONFIG_SERIAL_RP2=m
+CONFIG_SERIAL_RP2_NR_UARTS=32
+CONFIG_SERIAL_FSL_LPUART=m
+CONFIG_SERIAL_FSL_LINFLEXUART=m
+CONFIG_SERIAL_MEN_Z135=m
+CONFIG_SERIAL_SPRD=m
+# end of Serial drivers
+
+CONFIG_SERIAL_MCTRL_GPIO=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_MOXA_INTELLIO=m
+CONFIG_MOXA_SMARTIO=m
+CONFIG_N_HDLC=m
+CONFIG_IPWIRELESS=m
+# CONFIG_N_GSM is not set
+CONFIG_NOZOMI=m
+CONFIG_NULL_TTY=m
+CONFIG_HVC_DRIVER=y
+CONFIG_HVC_IRQ=y
+CONFIG_HVC_XEN=y
+CONFIG_HVC_XEN_FRONTEND=y
+CONFIG_RPMSG_TTY=m
+CONFIG_SERIAL_DEV_BUS=y
+CONFIG_SERIAL_DEV_CTRL_TTYPORT=y
+CONFIG_PRINTER=m
+CONFIG_LP_CONSOLE=y
+CONFIG_PPDEV=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DMI_DECODE=y
+CONFIG_IPMI_PLAT_DATA=y
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_SSIF=m
+CONFIG_IPMI_IPMB=m
+CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
+CONFIG_SSIF_IPMI_BMC=m
+CONFIG_IPMB_DEVICE_INTERFACE=m
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_HW_RANDOM_INTEL=m
+CONFIG_HW_RANDOM_AMD=m
+CONFIG_HW_RANDOM_BA431=m
+CONFIG_HW_RANDOM_VIA=m
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_HW_RANDOM_XIPHERA=m
+CONFIG_APPLICOM=m
+CONFIG_MWAVE=m
+CONFIG_DEVMEM=y
+CONFIG_NVRAM=y
+CONFIG_DEVPORT=y
+CONFIG_HPET=y
+# CONFIG_HPET_MMAP is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TCG_TPM=y
+CONFIG_TCG_TPM2_HMAC=y
+CONFIG_HW_RANDOM_TPM=y
+CONFIG_TCG_TIS_CORE=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TIS_SPI=m
+CONFIG_TCG_TIS_SPI_CR50=y
+CONFIG_TCG_TIS_I2C=m
+CONFIG_TCG_TIS_I2C_CR50=m
+CONFIG_TCG_TIS_I2C_ATMEL=m
+CONFIG_TCG_TIS_I2C_INFINEON=m
+CONFIG_TCG_TIS_I2C_NUVOTON=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
+CONFIG_TCG_INFINEON=m
+CONFIG_TCG_XEN=m
+CONFIG_TCG_CRB=y
+CONFIG_TCG_VTPM_PROXY=m
+CONFIG_TCG_TIS_ST33ZP24=m
+CONFIG_TCG_TIS_ST33ZP24_I2C=m
+CONFIG_TCG_TIS_ST33ZP24_SPI=m
+CONFIG_TELCLOCK=m
+CONFIG_XILLYBUS_CLASS=m
+CONFIG_XILLYBUS=m
+CONFIG_XILLYBUS_PCIE=m
+CONFIG_XILLYUSB=m
+# end of Character devices
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_ACPI_I2C_OPREGION=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_MUX=m
+
+#
+# Multiplexer I2C Chip support
+#
+CONFIG_I2C_MUX_GPIO=m
+CONFIG_I2C_MUX_LTC4306=m
+CONFIG_I2C_MUX_PCA9541=m
+CONFIG_I2C_MUX_PCA954x=m
+CONFIG_I2C_MUX_REG=m
+CONFIG_I2C_MUX_MLXCPLD=m
+# end of Multiplexer I2C Chip support
+
+CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_SMBUS=m
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCA=m
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# PC SMBus host controller drivers
+#
+CONFIG_I2C_CCGX_UCSI=y
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD756_S4882=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_AMD_MP2=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I801_MUX=y
+CONFIG_I2C_ISCH=m
+CONFIG_I2C_ISMT=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_CHT_WC=m
+CONFIG_I2C_NCT6775=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_NFORCE2_S4985=m
+CONFIG_I2C_NVIDIA_GPU=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_ZHAOXIN=m
+
+#
+# ACPI drivers
+#
+CONFIG_I2C_SCMI=m
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+CONFIG_I2C_CBUS_GPIO=m
+CONFIG_I2C_DESIGNWARE_CORE=y
+CONFIG_I2C_DESIGNWARE_SLAVE=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
+CONFIG_I2C_DESIGNWARE_PCI=y
+CONFIG_I2C_EMEV2=m
+CONFIG_I2C_GPIO=m
+# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
+CONFIG_I2C_KEMPLD=m
+CONFIG_I2C_OCORES=m
+CONFIG_I2C_PCA_PLATFORM=m
+CONFIG_I2C_SIMTEC=m
+CONFIG_I2C_XILINX=m
+
+#
+# External I2C/SMBus adapter drivers
+#
+CONFIG_I2C_DIOLAN_U2C=m
+CONFIG_I2C_DLN2=m
+CONFIG_I2C_LJCA=m
+CONFIG_I2C_CP2615=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PCI1XXXX=m
+CONFIG_I2C_ROBOTFUZZ_OSIF=m
+CONFIG_I2C_TAOS_EVM=m
+CONFIG_I2C_TINY_USB=m
+CONFIG_I2C_VIPERBOARD=m
+
+#
+# Other I2C/SMBus bus drivers
+#
+CONFIG_I2C_MLXCPLD=m
+CONFIG_I2C_CROS_EC_TUNNEL=m
+CONFIG_I2C_VIRTIO=m
+# end of I2C Hardware Bus support
+
+CONFIG_I2C_STUB=m
+CONFIG_I2C_SLAVE=y
+CONFIG_I2C_SLAVE_EEPROM=m
+CONFIG_I2C_SLAVE_TESTUNIT=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# end of I2C support
+
+# CONFIG_I3C is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+CONFIG_SPI_MEM=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_ALTERA=m
+CONFIG_SPI_ALTERA_CORE=m
+CONFIG_SPI_ALTERA_DFL=m
+CONFIG_SPI_AXI_SPI_ENGINE=m
+CONFIG_SPI_BITBANG=m
+CONFIG_SPI_BUTTERFLY=m
+CONFIG_SPI_CADENCE=m
+CONFIG_SPI_CH341=m
+CONFIG_SPI_CS42L43=m
+CONFIG_SPI_DESIGNWARE=m
+CONFIG_SPI_DW_DMA=y
+CONFIG_SPI_DW_PCI=m
+CONFIG_SPI_DW_MMIO=m
+CONFIG_SPI_DLN2=m
+CONFIG_SPI_GPIO=m
+CONFIG_SPI_INTEL=m
+CONFIG_SPI_INTEL_PCI=m
+CONFIG_SPI_INTEL_PLATFORM=m
+CONFIG_SPI_LM70_LLP=m
+CONFIG_SPI_LJCA=m
+CONFIG_SPI_MICROCHIP_CORE=m
+CONFIG_SPI_MICROCHIP_CORE_QSPI=m
+# CONFIG_SPI_LANTIQ_SSC is not set
+CONFIG_SPI_OC_TINY=m
+CONFIG_SPI_PCI1XXXX=m
+CONFIG_SPI_PXA2XX=m
+CONFIG_SPI_PXA2XX_PCI=m
+CONFIG_SPI_SC18IS602=m
+CONFIG_SPI_SIFIVE=m
+CONFIG_SPI_MXIC=m
+CONFIG_SPI_XCOMM=m
+CONFIG_SPI_XILINX=m
+CONFIG_SPI_ZYNQMP_GQSPI=m
+CONFIG_SPI_AMD=m
+
+#
+# SPI Multiplexer support
+#
+CONFIG_SPI_MUX=m
+
+#
+# SPI Protocol Masters
+#
+CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_LOOPBACK_TEST=m
+CONFIG_SPI_TLE62X0=m
+CONFIG_SPI_SLAVE=y
+CONFIG_SPI_SLAVE_TIME=m
+CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
+CONFIG_SPI_DYNAMIC=y
+# CONFIG_SPMI is not set
+# CONFIG_HSI is not set
+CONFIG_PPS=m
+# CONFIG_PPS_DEBUG is not set
+
+#
+# PPS clients support
+#
+CONFIG_PPS_CLIENT_KTIMER=m
+CONFIG_PPS_CLIENT_LDISC=m
+CONFIG_PPS_CLIENT_PARPORT=m
+CONFIG_PPS_CLIENT_GPIO=m
+
+#
+# PPS generators support
+#
+
+#
+# PTP clock support
+#
+CONFIG_PTP_1588_CLOCK=m
+CONFIG_PTP_1588_CLOCK_OPTIONAL=m
+CONFIG_DP83640_PHY=m
+CONFIG_PTP_1588_CLOCK_INES=m
+CONFIG_PTP_1588_CLOCK_KVM=m
+CONFIG_PTP_1588_CLOCK_IDT82P33=m
+CONFIG_PTP_1588_CLOCK_IDTCM=m
+CONFIG_PTP_1588_CLOCK_FC3W=m
+CONFIG_PTP_1588_CLOCK_MOCK=m
+CONFIG_PTP_1588_CLOCK_VMW=m
+CONFIG_PTP_1588_CLOCK_OCP=m
+CONFIG_PTP_DFL_TOD=m
+# end of PTP clock support
+
+CONFIG_PINCTRL=y
+CONFIG_PINMUX=y
+CONFIG_PINCONF=y
+CONFIG_GENERIC_PINCONF=y
+# CONFIG_DEBUG_PINCTRL is not set
+CONFIG_PINCTRL_AMD=y
+CONFIG_PINCTRL_CY8C95X0=m
+CONFIG_PINCTRL_DA9062=m
+CONFIG_PINCTRL_MCP23S08_I2C=m
+CONFIG_PINCTRL_MCP23S08_SPI=m
+CONFIG_PINCTRL_MCP23S08=m
+CONFIG_PINCTRL_SX150X=y
+CONFIG_PINCTRL_CS42L43=m
+CONFIG_PINCTRL_MADERA=m
+CONFIG_PINCTRL_CS47L15=y
+CONFIG_PINCTRL_CS47L35=y
+CONFIG_PINCTRL_CS47L85=y
+CONFIG_PINCTRL_CS47L90=y
+CONFIG_PINCTRL_CS47L92=y
+
+#
+# Intel pinctrl drivers
+#
+CONFIG_PINCTRL_BAYTRAIL=y
+CONFIG_PINCTRL_CHERRYVIEW=y
+CONFIG_PINCTRL_LYNXPOINT=m
+CONFIG_PINCTRL_INTEL=y
+CONFIG_PINCTRL_INTEL_PLATFORM=m
+CONFIG_PINCTRL_ALDERLAKE=m
+CONFIG_PINCTRL_BROXTON=m
+CONFIG_PINCTRL_CANNONLAKE=m
+CONFIG_PINCTRL_CEDARFORK=m
+CONFIG_PINCTRL_DENVERTON=m
+CONFIG_PINCTRL_ELKHARTLAKE=m
+CONFIG_PINCTRL_EMMITSBURG=m
+CONFIG_PINCTRL_GEMINILAKE=m
+CONFIG_PINCTRL_ICELAKE=m
+CONFIG_PINCTRL_JASPERLAKE=m
+CONFIG_PINCTRL_LAKEFIELD=m
+CONFIG_PINCTRL_LEWISBURG=m
+CONFIG_PINCTRL_METEORLAKE=m
+CONFIG_PINCTRL_METEORPOINT=m
+CONFIG_PINCTRL_SUNRISEPOINT=m
+CONFIG_PINCTRL_TIGERLAKE=m
+# end of Intel pinctrl drivers
+
+#
+# Renesas pinctrl drivers
+#
+# end of Renesas pinctrl drivers
+
+CONFIG_GPIOLIB=y
+CONFIG_GPIOLIB_FASTPATH_LIMIT=512
+CONFIG_GPIO_ACPI=y
+CONFIG_GPIOLIB_IRQCHIP=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_CDEV=y
+CONFIG_GPIO_CDEV_V1=y
+CONFIG_GPIO_GENERIC=m
+CONFIG_GPIO_REGMAP=m
+CONFIG_GPIO_SWNODE_UNDEFINED=y
+CONFIG_GPIO_MAX730X=m
+CONFIG_GPIO_IDIO_16=m
+
+#
+# Memory mapped GPIO drivers
+#
+CONFIG_GPIO_AMDPT=m
+CONFIG_GPIO_DWAPB=m
+CONFIG_GPIO_EXAR=m
+CONFIG_GPIO_GENERIC_PLATFORM=m
+CONFIG_GPIO_GRANITERAPIDS=m
+CONFIG_GPIO_ICH=m
+CONFIG_GPIO_MB86S7X=m
+CONFIG_GPIO_MENZ127=m
+CONFIG_GPIO_SIOX=m
+CONFIG_GPIO_TANGIER=m
+CONFIG_GPIO_AMD_FCH=m
+# end of Memory mapped GPIO drivers
+
+#
+# Port-mapped I/O GPIO drivers
+#
+CONFIG_GPIO_VX855=m
+CONFIG_GPIO_F7188X=m
+CONFIG_GPIO_IT87=m
+CONFIG_GPIO_SCH=m
+CONFIG_GPIO_SCH311X=m
+CONFIG_GPIO_WINBOND=m
+CONFIG_GPIO_WS16C48=m
+# end of Port-mapped I/O GPIO drivers
+
+#
+# I2C GPIO expanders
+#
+CONFIG_GPIO_FXL6408=m
+CONFIG_GPIO_DS4520=m
+CONFIG_GPIO_MAX7300=m
+CONFIG_GPIO_MAX732X=m
+CONFIG_GPIO_PCA953X=m
+CONFIG_GPIO_PCA953X_IRQ=y
+CONFIG_GPIO_PCA9570=m
+CONFIG_GPIO_PCF857X=m
+CONFIG_GPIO_TPIC2810=m
+# end of I2C GPIO expanders
+
+#
+# MFD GPIO expanders
+#
+CONFIG_GPIO_ADP5520=m
+CONFIG_GPIO_ARIZONA=m
+CONFIG_GPIO_BD9571MWV=m
+CONFIG_GPIO_CROS_EC=m
+CONFIG_GPIO_CRYSTAL_COVE=m
+CONFIG_GPIO_DA9052=m
+CONFIG_GPIO_DA9055=m
+CONFIG_GPIO_DLN2=m
+CONFIG_GPIO_ELKHARTLAKE=m
+CONFIG_GPIO_JANZ_TTL=m
+CONFIG_GPIO_KEMPLD=m
+CONFIG_GPIO_LJCA=m
+CONFIG_GPIO_LP3943=m
+CONFIG_GPIO_LP873X=m
+CONFIG_GPIO_MADERA=m
+CONFIG_GPIO_PALMAS=y
+CONFIG_GPIO_RC5T583=y
+CONFIG_GPIO_TPS65086=m
+CONFIG_GPIO_TPS6586X=y
+CONFIG_GPIO_TPS65910=y
+CONFIG_GPIO_TPS65912=m
+CONFIG_GPIO_TPS68470=m
+CONFIG_GPIO_TQMX86=m
+CONFIG_GPIO_TWL4030=m
+CONFIG_GPIO_TWL6040=m
+CONFIG_GPIO_WHISKEY_COVE=m
+CONFIG_GPIO_WM831X=m
+CONFIG_GPIO_WM8350=m
+CONFIG_GPIO_WM8994=m
+# end of MFD GPIO expanders
+
+#
+# PCI GPIO expanders
+#
+CONFIG_GPIO_AMD8111=m
+CONFIG_GPIO_ML_IOH=m
+CONFIG_GPIO_PCI_IDIO_16=m
+CONFIG_GPIO_PCIE_IDIO_24=m
+CONFIG_GPIO_RDC321X=m
+# end of PCI GPIO expanders
+
+#
+# SPI GPIO expanders
+#
+CONFIG_GPIO_MAX3191X=m
+CONFIG_GPIO_MAX7301=m
+CONFIG_GPIO_MC33880=m
+CONFIG_GPIO_PISOSR=m
+CONFIG_GPIO_XRA1403=m
+# end of SPI GPIO expanders
+
+#
+# USB GPIO expanders
+#
+CONFIG_GPIO_VIPERBOARD=m
+# end of USB GPIO expanders
+
+#
+# Virtual GPIO drivers
+#
+CONFIG_GPIO_AGGREGATOR=m
+CONFIG_GPIO_LATCH=m
+CONFIG_GPIO_MOCKUP=m
+CONFIG_GPIO_VIRTIO=m
+CONFIG_GPIO_SIM=m
+# end of Virtual GPIO drivers
+
+#
+# GPIO Debugging utilities
+#
+# CONFIG_GPIO_VIRTUSER is not set
+# end of GPIO Debugging utilities
+
+CONFIG_W1=m
+CONFIG_W1_CON=y
+
+#
+# 1-wire Bus Masters
+#
+CONFIG_W1_MASTER_AMD_AXI=m
+CONFIG_W1_MASTER_MATROX=m
+CONFIG_W1_MASTER_DS2490=m
+CONFIG_W1_MASTER_DS2482=m
+CONFIG_W1_MASTER_GPIO=m
+CONFIG_W1_MASTER_SGI=m
+CONFIG_W1_MASTER_UART=m
+# end of 1-wire Bus Masters
+
+#
+# 1-wire Slaves
+#
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_DS2405=m
+CONFIG_W1_SLAVE_DS2408=m
+# CONFIG_W1_SLAVE_DS2408_READBACK is not set
+CONFIG_W1_SLAVE_DS2413=m
+CONFIG_W1_SLAVE_DS2406=m
+CONFIG_W1_SLAVE_DS2423=m
+CONFIG_W1_SLAVE_DS2805=m
+CONFIG_W1_SLAVE_DS2430=m
+CONFIG_W1_SLAVE_DS2431=m
+CONFIG_W1_SLAVE_DS2433=m
+# CONFIG_W1_SLAVE_DS2433_CRC is not set
+CONFIG_W1_SLAVE_DS2438=m
+CONFIG_W1_SLAVE_DS250X=m
+CONFIG_W1_SLAVE_DS2780=m
+CONFIG_W1_SLAVE_DS2781=m
+CONFIG_W1_SLAVE_DS28E04=m
+CONFIG_W1_SLAVE_DS28E17=m
+# end of 1-wire Slaves
+
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_ATC260X=m
+CONFIG_POWER_RESET_MT6323=y
+CONFIG_POWER_RESET_RESTART=y
+CONFIG_POWER_RESET_TPS65086=y
+CONFIG_POWER_SEQUENCING=m
+CONFIG_POWER_SEQUENCING_QCOM_WCN=m
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+CONFIG_POWER_SUPPLY_HWMON=y
+CONFIG_GENERIC_ADC_BATTERY=m
+CONFIG_IP5XXX_POWER=m
+CONFIG_MAX8925_POWER=m
+CONFIG_WM831X_BACKUP=m
+CONFIG_WM831X_POWER=m
+CONFIG_WM8350_POWER=m
+CONFIG_TEST_POWER=m
+CONFIG_BATTERY_88PM860X=m
+CONFIG_CHARGER_ADP5061=m
+CONFIG_BATTERY_CW2015=m
+CONFIG_BATTERY_DS2760=m
+CONFIG_BATTERY_DS2780=m
+CONFIG_BATTERY_DS2781=m
+CONFIG_BATTERY_DS2782=m
+# CONFIG_BATTERY_SAMSUNG_SDI is not set
+CONFIG_BATTERY_SBS=m
+CONFIG_CHARGER_SBS=m
+CONFIG_MANAGER_SBS=m
+CONFIG_BATTERY_BQ27XXX=m
+CONFIG_BATTERY_BQ27XXX_I2C=m
+CONFIG_BATTERY_BQ27XXX_HDQ=m
+# CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set
+CONFIG_BATTERY_DA9030=m
+CONFIG_BATTERY_DA9052=m
+CONFIG_CHARGER_DA9150=m
+CONFIG_BATTERY_DA9150=m
+CONFIG_CHARGER_AXP20X=m
+CONFIG_BATTERY_AXP20X=m
+CONFIG_AXP20X_POWER=m
+CONFIG_AXP288_CHARGER=m
+CONFIG_AXP288_FUEL_GAUGE=m
+CONFIG_BATTERY_MAX17040=m
+CONFIG_BATTERY_MAX17042=m
+CONFIG_BATTERY_MAX1720X=m
+CONFIG_BATTERY_MAX1721X=m
+CONFIG_BATTERY_TWL4030_MADC=m
+CONFIG_CHARGER_88PM860X=m
+CONFIG_CHARGER_PCF50633=m
+CONFIG_BATTERY_RX51=m
+CONFIG_CHARGER_ISP1704=m
+CONFIG_CHARGER_MAX8903=m
+CONFIG_CHARGER_TWL4030=m
+CONFIG_CHARGER_LP8727=m
+CONFIG_CHARGER_LP8788=m
+CONFIG_CHARGER_GPIO=m
+CONFIG_CHARGER_MANAGER=m
+CONFIG_CHARGER_LT3651=m
+CONFIG_CHARGER_LTC4162L=m
+CONFIG_CHARGER_MAX14577=m
+CONFIG_CHARGER_MAX77693=m
+CONFIG_CHARGER_MAX77976=m
+CONFIG_CHARGER_MAX8997=m
+CONFIG_CHARGER_MAX8998=m
+CONFIG_CHARGER_MP2629=m
+CONFIG_CHARGER_MT6360=m
+CONFIG_CHARGER_MT6370=m
+CONFIG_CHARGER_BQ2415X=m
+CONFIG_CHARGER_BQ24190=m
+CONFIG_CHARGER_BQ24257=m
+CONFIG_CHARGER_BQ24735=m
+CONFIG_CHARGER_BQ2515X=m
+CONFIG_CHARGER_BQ25890=m
+CONFIG_CHARGER_BQ25980=m
+CONFIG_CHARGER_BQ256XX=m
+CONFIG_CHARGER_SMB347=m
+CONFIG_CHARGER_TPS65090=m
+CONFIG_BATTERY_GAUGE_LTC2941=m
+CONFIG_BATTERY_GOLDFISH=m
+CONFIG_BATTERY_RT5033=m
+CONFIG_CHARGER_RT5033=m
+CONFIG_CHARGER_RT9455=m
+CONFIG_CHARGER_RT9467=m
+CONFIG_CHARGER_RT9471=m
+CONFIG_CHARGER_CROS_USBPD=m
+CONFIG_CHARGER_CROS_PCHG=m
+CONFIG_CHARGER_CROS_CONTROL=m
+CONFIG_CHARGER_BD99954=m
+CONFIG_CHARGER_WILCO=m
+CONFIG_BATTERY_SURFACE=m
+CONFIG_CHARGER_SURFACE=m
+CONFIG_BATTERY_UG3105=m
+CONFIG_FUEL_GAUGE_MM8013=m
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Native drivers
+#
+CONFIG_SENSORS_ABITUGURU=m
+CONFIG_SENSORS_ABITUGURU3=m
+CONFIG_SENSORS_SMPRO=m
+CONFIG_SENSORS_AD7314=m
+CONFIG_SENSORS_AD7414=m
+CONFIG_SENSORS_AD7418=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1029=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM1177=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ADT7X10=m
+CONFIG_SENSORS_ADT7310=m
+CONFIG_SENSORS_ADT7410=m
+CONFIG_SENSORS_ADT7411=m
+CONFIG_SENSORS_ADT7462=m
+CONFIG_SENSORS_ADT7470=m
+CONFIG_SENSORS_ADT7475=m
+CONFIG_SENSORS_AHT10=m
+CONFIG_SENSORS_AQUACOMPUTER_D5NEXT=m
+CONFIG_SENSORS_AS370=m
+CONFIG_SENSORS_ASC7621=m
+CONFIG_SENSORS_ASUS_ROG_RYUJIN=m
+CONFIG_SENSORS_AXI_FAN_CONTROL=m
+CONFIG_SENSORS_K8TEMP=m
+CONFIG_SENSORS_K10TEMP=m
+CONFIG_SENSORS_FAM15H_POWER=m
+CONFIG_SENSORS_APPLESMC=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_CHIPCAP2=m
+CONFIG_SENSORS_CORSAIR_CPRO=m
+CONFIG_SENSORS_CORSAIR_PSU=m
+CONFIG_SENSORS_CROS_EC=m
+CONFIG_SENSORS_DRIVETEMP=m
+CONFIG_SENSORS_DS620=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_DELL_SMM=m
+CONFIG_I8K=y
+CONFIG_SENSORS_DA9052_ADC=m
+CONFIG_SENSORS_DA9055=m
+CONFIG_SENSORS_I5K_AMB=m
+CONFIG_SENSORS_F71805F=m
+CONFIG_SENSORS_F71882FG=m
+CONFIG_SENSORS_F75375S=m
+CONFIG_SENSORS_MC13783_ADC=m
+CONFIG_SENSORS_FSCHMD=m
+CONFIG_SENSORS_FTSTEUTATES=m
+CONFIG_SENSORS_GIGABYTE_WATERFORCE=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_G760A=m
+CONFIG_SENSORS_G762=m
+CONFIG_SENSORS_HIH6130=m
+CONFIG_SENSORS_HS3001=m
+CONFIG_SENSORS_IBMAEM=m
+CONFIG_SENSORS_IBMPEX=m
+CONFIG_SENSORS_IIO_HWMON=m
+CONFIG_SENSORS_I5500=m
+CONFIG_SENSORS_CORETEMP=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_JC42=m
+CONFIG_SENSORS_POWERZ=m
+CONFIG_SENSORS_POWR1220=m
+CONFIG_SENSORS_LENOVO_EC=m
+CONFIG_SENSORS_LINEAGE=m
+CONFIG_SENSORS_LTC2945=m
+CONFIG_SENSORS_LTC2947=m
+CONFIG_SENSORS_LTC2947_I2C=m
+CONFIG_SENSORS_LTC2947_SPI=m
+CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
+CONFIG_SENSORS_LTC2992=m
+CONFIG_SENSORS_LTC4151=m
+CONFIG_SENSORS_LTC4215=m
+CONFIG_SENSORS_LTC4222=m
+CONFIG_SENSORS_LTC4245=m
+CONFIG_SENSORS_LTC4260=m
+CONFIG_SENSORS_LTC4261=m
+CONFIG_SENSORS_LTC4282=m
+CONFIG_SENSORS_MAX1111=m
+CONFIG_SENSORS_MAX127=m
+CONFIG_SENSORS_MAX16065=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_MAX1668=m
+CONFIG_SENSORS_MAX197=m
+CONFIG_SENSORS_MAX31722=m
+CONFIG_SENSORS_MAX31730=m
+CONFIG_SENSORS_MAX31760=m
+CONFIG_MAX31827=m
+CONFIG_SENSORS_MAX6620=m
+CONFIG_SENSORS_MAX6621=m
+CONFIG_SENSORS_MAX6639=m
+CONFIG_SENSORS_MAX6650=m
+CONFIG_SENSORS_MAX6697=m
+CONFIG_SENSORS_MAX31790=m
+CONFIG_SENSORS_MC34VR500=m
+CONFIG_SENSORS_MCP3021=m
+CONFIG_SENSORS_MLXREG_FAN=m
+CONFIG_SENSORS_TC654=m
+CONFIG_SENSORS_TPS23861=m
+CONFIG_SENSORS_MENF21BMC_HWMON=m
+CONFIG_SENSORS_MR75203=m
+CONFIG_SENSORS_ADCXX=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM70=m
+CONFIG_SENSORS_LM73=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_LM93=m
+CONFIG_SENSORS_LM95234=m
+CONFIG_SENSORS_LM95241=m
+CONFIG_SENSORS_LM95245=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_PC87427=m
+CONFIG_SENSORS_NTC_THERMISTOR=m
+CONFIG_SENSORS_NCT6683=m
+CONFIG_SENSORS_NCT6775_CORE=m
+CONFIG_SENSORS_NCT6775=m
+CONFIG_SENSORS_NCT6775_I2C=m
+CONFIG_SENSORS_NCT7802=m
+CONFIG_SENSORS_NCT7904=m
+CONFIG_SENSORS_NPCM7XX=m
+CONFIG_SENSORS_NZXT_KRAKEN2=m
+CONFIG_SENSORS_NZXT_KRAKEN3=m
+CONFIG_SENSORS_NZXT_SMART2=m
+CONFIG_SENSORS_OCC_P8_I2C=m
+CONFIG_SENSORS_OCC=m
+CONFIG_SENSORS_OXP=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_PMBUS=m
+CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_ACBEL_FSG032=m
+CONFIG_SENSORS_ADM1266=m
+CONFIG_SENSORS_ADM1275=m
+CONFIG_SENSORS_ADP1050=m
+CONFIG_SENSORS_BEL_PFE=m
+CONFIG_SENSORS_BPA_RS600=m
+CONFIG_SENSORS_DELTA_AHE50DC_FAN=m
+CONFIG_SENSORS_FSP_3Y=m
+CONFIG_SENSORS_IBM_CFFPS=m
+CONFIG_SENSORS_DPS920AB=m
+CONFIG_SENSORS_INSPUR_IPSPS=m
+CONFIG_SENSORS_IR35221=m
+CONFIG_SENSORS_IR36021=m
+CONFIG_SENSORS_IR38064=m
+CONFIG_SENSORS_IR38064_REGULATOR=y
+CONFIG_SENSORS_IRPS5401=m
+CONFIG_SENSORS_ISL68137=m
+CONFIG_SENSORS_LM25066=m
+CONFIG_SENSORS_LM25066_REGULATOR=y
+CONFIG_SENSORS_LT7182S=m
+CONFIG_SENSORS_LTC2978=m
+# CONFIG_SENSORS_LTC2978_REGULATOR is not set
+CONFIG_SENSORS_LTC3815=m
+CONFIG_SENSORS_LTC4286=y
+CONFIG_SENSORS_MAX15301=m
+CONFIG_SENSORS_MAX16064=m
+CONFIG_SENSORS_MAX16601=m
+CONFIG_SENSORS_MAX20730=m
+CONFIG_SENSORS_MAX20751=m
+CONFIG_SENSORS_MAX31785=m
+CONFIG_SENSORS_MAX34440=m
+CONFIG_SENSORS_MAX8688=m
+CONFIG_SENSORS_MP2856=m
+CONFIG_SENSORS_MP2888=m
+CONFIG_SENSORS_MP2891=m
+CONFIG_SENSORS_MP2975=m
+CONFIG_SENSORS_MP2993=m
+CONFIG_SENSORS_MP2975_REGULATOR=y
+CONFIG_SENSORS_MP5023=m
+CONFIG_SENSORS_MP5920=m
+CONFIG_SENSORS_MP5990=m
+CONFIG_SENSORS_MP9941=m
+CONFIG_SENSORS_MPQ7932_REGULATOR=y
+CONFIG_SENSORS_MPQ7932=m
+CONFIG_SENSORS_MPQ8785=m
+CONFIG_SENSORS_PIM4328=m
+CONFIG_SENSORS_PLI1209BC=m
+CONFIG_SENSORS_PLI1209BC_REGULATOR=y
+CONFIG_SENSORS_PM6764TR=m
+CONFIG_SENSORS_PXE1610=m
+CONFIG_SENSORS_Q54SJ108A2=m
+CONFIG_SENSORS_STPDDC60=m
+CONFIG_SENSORS_TDA38640=m
+CONFIG_SENSORS_TDA38640_REGULATOR=y
+CONFIG_SENSORS_TPS40422=m
+CONFIG_SENSORS_TPS53679=m
+CONFIG_SENSORS_TPS546D24=m
+CONFIG_SENSORS_UCD9000=m
+CONFIG_SENSORS_UCD9200=m
+CONFIG_SENSORS_XDP710=m
+CONFIG_SENSORS_XDPE152=m
+CONFIG_SENSORS_XDPE122=m
+CONFIG_SENSORS_XDPE122_REGULATOR=y
+CONFIG_SENSORS_ZL6100=m
+CONFIG_SENSORS_PT5161L=m
+CONFIG_SENSORS_PWM_FAN=m
+CONFIG_SENSORS_SBTSI=m
+CONFIG_SENSORS_SBRMI=m
+CONFIG_SENSORS_SHT15=m
+CONFIG_SENSORS_SHT21=m
+CONFIG_SENSORS_SHT3x=m
+CONFIG_SENSORS_SHT4x=m
+CONFIG_SENSORS_SHTC1=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SY7636A=m
+CONFIG_SENSORS_DME1737=m
+CONFIG_SENSORS_EMC1403=m
+CONFIG_SENSORS_EMC2103=m
+CONFIG_SENSORS_EMC2305=m
+CONFIG_SENSORS_EMC6W201=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47M192=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_SCH56XX_COMMON=m
+CONFIG_SENSORS_SCH5627=m
+CONFIG_SENSORS_SCH5636=m
+CONFIG_SENSORS_STTS751=m
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_ADC128D818=m
+CONFIG_SENSORS_ADS7828=m
+CONFIG_SENSORS_ADS7871=m
+CONFIG_SENSORS_AMC6821=m
+CONFIG_SENSORS_INA209=m
+CONFIG_SENSORS_INA2XX=m
+CONFIG_SENSORS_INA238=m
+CONFIG_SENSORS_INA3221=m
+CONFIG_SENSORS_SPD5118=m
+CONFIG_SENSORS_SPD5118_DETECT=y
+CONFIG_SENSORS_TC74=m
+CONFIG_SENSORS_THMC50=m
+CONFIG_SENSORS_TMP102=m
+CONFIG_SENSORS_TMP103=m
+CONFIG_SENSORS_TMP108=m
+CONFIG_SENSORS_TMP401=m
+CONFIG_SENSORS_TMP421=m
+CONFIG_SENSORS_TMP464=m
+CONFIG_SENSORS_TMP513=m
+CONFIG_SENSORS_VIA_CPUTEMP=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_VT1211=m
+CONFIG_SENSORS_VT8231=m
+CONFIG_SENSORS_W83773G=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83791D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83793=m
+CONFIG_SENSORS_W83795=m
+# CONFIG_SENSORS_W83795_FANCTRL is not set
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83L786NG=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83627EHF=m
+CONFIG_SENSORS_WM831X=m
+CONFIG_SENSORS_WM8350=m
+CONFIG_SENSORS_XGENE=m
+CONFIG_SENSORS_INTEL_M10_BMC_HWMON=m
+
+#
+# ACPI drivers
+#
+CONFIG_SENSORS_ACPI_POWER=m
+CONFIG_SENSORS_ATK0110=m
+CONFIG_SENSORS_ASUS_WMI=m
+CONFIG_SENSORS_ASUS_EC=m
+CONFIG_SENSORS_HP_WMI=m
+CONFIG_THERMAL=y
+CONFIG_THERMAL_NETLINK=y
+# CONFIG_THERMAL_STATISTICS is not set
+# CONFIG_THERMAL_DEBUGFS is not set
+CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
+CONFIG_THERMAL_HWMON=y
+CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
+# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
+# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
+# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
+# CONFIG_THERMAL_DEFAULT_GOV_BANG_BANG is not set
+CONFIG_THERMAL_GOV_FAIR_SHARE=y
+CONFIG_THERMAL_GOV_STEP_WISE=y
+CONFIG_THERMAL_GOV_BANG_BANG=y
+CONFIG_THERMAL_GOV_USER_SPACE=y
+CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
+CONFIG_DEVFREQ_THERMAL=y
+# CONFIG_THERMAL_EMULATION is not set
+
+#
+# Intel thermal drivers
+#
+CONFIG_INTEL_POWERCLAMP=m
+CONFIG_X86_THERMAL_VECTOR=y
+CONFIG_INTEL_TCC=y
+CONFIG_X86_PKG_TEMP_THERMAL=m
+CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
+CONFIG_INTEL_SOC_DTS_THERMAL=m
+
+#
+# ACPI INT340X thermal drivers
+#
+CONFIG_INT340X_THERMAL=m
+CONFIG_ACPI_THERMAL_REL=m
+CONFIG_INT3406_THERMAL=m
+CONFIG_PROC_THERMAL_MMIO_RAPL=m
+# end of ACPI INT340X thermal drivers
+
+CONFIG_INTEL_BXT_PMIC_THERMAL=m
+CONFIG_INTEL_PCH_THERMAL=m
+CONFIG_INTEL_TCC_COOLING=m
+CONFIG_INTEL_HFI_THERMAL=y
+# end of Intel thermal drivers
+
+CONFIG_GENERIC_ADC_THERMAL=m
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
+CONFIG_WATCHDOG_OPEN_TIMEOUT=0
+CONFIG_WATCHDOG_SYSFS=y
+# CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set
+
+#
+# Watchdog Pretimeout Governors
+#
+CONFIG_WATCHDOG_PRETIMEOUT_GOV=y
+CONFIG_WATCHDOG_PRETIMEOUT_GOV_SEL=m
+CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
+CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=y
+# CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP is not set
+CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC=y
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=m
+# CONFIG_SOFT_WATCHDOG_PRETIMEOUT is not set
+CONFIG_CROS_EC_WATCHDOG=m
+CONFIG_DA9052_WATCHDOG=m
+CONFIG_DA9055_WATCHDOG=m
+CONFIG_DA9063_WATCHDOG=m
+CONFIG_DA9062_WATCHDOG=m
+CONFIG_LENOVO_SE10_WDT=m
+CONFIG_MENF21BMC_WATCHDOG=m
+CONFIG_MENZ069_WATCHDOG=m
+CONFIG_WDAT_WDT=m
+CONFIG_WM831X_WATCHDOG=m
+CONFIG_WM8350_WATCHDOG=m
+CONFIG_XILINX_WATCHDOG=m
+CONFIG_ZIIRAVE_WATCHDOG=m
+CONFIG_RAVE_SP_WATCHDOG=m
+CONFIG_MLX_WDT=m
+CONFIG_CADENCE_WATCHDOG=m
+CONFIG_DW_WATCHDOG=m
+CONFIG_TWL4030_WATCHDOG=m
+CONFIG_MAX63XX_WATCHDOG=m
+CONFIG_RETU_WATCHDOG=m
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_ADVANTECH_EC_WDT=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_EBC_C384_WDT=m
+CONFIG_EXAR_WDT=m
+CONFIG_F71808E_WDT=m
+CONFIG_SP5100_TCO=m
+CONFIG_SBC_FITPC2_WATCHDOG=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_IB700_WDT=m
+CONFIG_IBMASR=m
+CONFIG_WAFER_WDT=m
+CONFIG_I6300ESB_WDT=m
+CONFIG_IE6XX_WDT=m
+CONFIG_ITCO_WDT=m
+CONFIG_ITCO_VENDOR_SUPPORT=y
+CONFIG_IT8712F_WDT=m
+CONFIG_IT87_WDT=m
+CONFIG_HP_WATCHDOG=m
+CONFIG_HPWDT_NMI_DECODING=y
+CONFIG_KEMPLD_WDT=m
+CONFIG_SC1200_WDT=m
+CONFIG_PC87413_WDT=m
+CONFIG_NV_TCO=m
+CONFIG_60XX_WDT=m
+CONFIG_CPU5_WDT=m
+CONFIG_SMSC_SCH311X_WDT=m
+CONFIG_SMSC37B787_WDT=m
+CONFIG_TQMX86_WDT=m
+CONFIG_VIA_WDT=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_W83977F_WDT=m
+CONFIG_MACHZ_WDT=m
+CONFIG_SBC_EPX_C3_WATCHDOG=m
+CONFIG_INTEL_MEI_WDT=m
+CONFIG_NI903X_WDT=m
+CONFIG_NIC7018_WDT=m
+CONFIG_SIEMENS_SIMATIC_IPC_WDT=m
+CONFIG_MEN_A21_WDT=m
+CONFIG_XEN_WDT=m
+
+#
+# PCI-based Watchdog Cards
+#
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_WDTPCI=m
+
+#
+# USB-based Watchdog Cards
+#
+CONFIG_USBPCWATCHDOG=m
+CONFIG_SSB_POSSIBLE=y
+CONFIG_SSB=m
+CONFIG_SSB_SPROM=y
+CONFIG_SSB_BLOCKIO=y
+CONFIG_SSB_PCIHOST_POSSIBLE=y
+CONFIG_SSB_PCIHOST=y
+CONFIG_SSB_B43_PCI_BRIDGE=y
+CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
+CONFIG_SSB_PCMCIAHOST=y
+CONFIG_SSB_SDIOHOST_POSSIBLE=y
+CONFIG_SSB_SDIOHOST=y
+CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
+CONFIG_SSB_DRIVER_PCICORE=y
+CONFIG_SSB_DRIVER_GPIO=y
+CONFIG_BCMA_POSSIBLE=y
+CONFIG_BCMA=m
+CONFIG_BCMA_BLOCKIO=y
+CONFIG_BCMA_HOST_PCI_POSSIBLE=y
+CONFIG_BCMA_HOST_PCI=y
+# CONFIG_BCMA_HOST_SOC is not set
+CONFIG_BCMA_DRIVER_PCI=y
+CONFIG_BCMA_DRIVER_GMAC_CMN=y
+CONFIG_BCMA_DRIVER_GPIO=y
+# CONFIG_BCMA_DEBUG is not set
+
+#
+# Multifunction device drivers
+#
+CONFIG_MFD_CORE=y
+CONFIG_MFD_AS3711=y
+CONFIG_MFD_SMPRO=m
+CONFIG_PMIC_ADP5520=y
+CONFIG_MFD_AAT2870_CORE=y
+CONFIG_MFD_BCM590XX=m
+CONFIG_MFD_BD9571MWV=m
+CONFIG_MFD_AXP20X=m
+CONFIG_MFD_AXP20X_I2C=m
+CONFIG_MFD_CROS_EC_DEV=m
+CONFIG_MFD_CS42L43=m
+CONFIG_MFD_CS42L43_I2C=m
+CONFIG_MFD_CS42L43_SDW=m
+CONFIG_MFD_MADERA=m
+CONFIG_MFD_MADERA_I2C=m
+CONFIG_MFD_MADERA_SPI=m
+CONFIG_MFD_CS47L15=y
+CONFIG_MFD_CS47L35=y
+CONFIG_MFD_CS47L85=y
+CONFIG_MFD_CS47L90=y
+CONFIG_MFD_CS47L92=y
+CONFIG_PMIC_DA903X=y
+CONFIG_PMIC_DA9052=y
+CONFIG_MFD_DA9052_SPI=y
+CONFIG_MFD_DA9052_I2C=y
+CONFIG_MFD_DA9055=y
+CONFIG_MFD_DA9062=m
+CONFIG_MFD_DA9063=m
+CONFIG_MFD_DA9150=m
+CONFIG_MFD_DLN2=m
+CONFIG_MFD_MC13XXX=m
+CONFIG_MFD_MC13XXX_SPI=m
+CONFIG_MFD_MC13XXX_I2C=m
+CONFIG_MFD_MP2629=m
+CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
+CONFIG_LPC_ICH=m
+CONFIG_LPC_SCH=m
+CONFIG_INTEL_SOC_PMIC=y
+CONFIG_INTEL_SOC_PMIC_BXTWC=m
+CONFIG_INTEL_SOC_PMIC_CHTWC=y
+CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
+CONFIG_INTEL_SOC_PMIC_MRFLD=m
+CONFIG_MFD_INTEL_LPSS=m
+CONFIG_MFD_INTEL_LPSS_ACPI=m
+CONFIG_MFD_INTEL_LPSS_PCI=m
+CONFIG_MFD_INTEL_PMC_BXT=m
+CONFIG_MFD_IQS62X=m
+CONFIG_MFD_JANZ_CMODIO=m
+CONFIG_MFD_KEMPLD=m
+CONFIG_MFD_88PM800=m
+CONFIG_MFD_88PM805=m
+CONFIG_MFD_88PM860X=y
+CONFIG_MFD_MAX14577=m
+CONFIG_MFD_MAX77541=m
+CONFIG_MFD_MAX77693=m
+CONFIG_MFD_MAX77843=y
+CONFIG_MFD_MAX8907=m
+CONFIG_MFD_MAX8925=y
+CONFIG_MFD_MAX8997=y
+CONFIG_MFD_MAX8998=y
+CONFIG_MFD_MT6360=m
+CONFIG_MFD_MT6370=m
+CONFIG_MFD_MT6397=m
+CONFIG_MFD_MENF21BMC=m
+CONFIG_MFD_OCELOT=m
+CONFIG_EZX_PCAP=y
+CONFIG_MFD_VIPERBOARD=m
+CONFIG_MFD_RETU=m
+CONFIG_MFD_PCF50633=m
+CONFIG_PCF50633_ADC=m
+CONFIG_PCF50633_GPIO=m
+CONFIG_MFD_SY7636A=m
+CONFIG_MFD_RDC321X=m
+CONFIG_MFD_RT4831=m
+CONFIG_MFD_RT5033=m
+CONFIG_MFD_RT5120=m
+CONFIG_MFD_RC5T583=y
+CONFIG_MFD_SI476X_CORE=m
+CONFIG_MFD_SIMPLE_MFD_I2C=m
+CONFIG_MFD_SM501=m
+CONFIG_MFD_SM501_GPIO=y
+CONFIG_MFD_SKY81452=m
+CONFIG_MFD_SYSCON=y
+CONFIG_MFD_LP3943=m
+CONFIG_MFD_LP8788=y
+CONFIG_MFD_TI_LMU=m
+CONFIG_MFD_PALMAS=m
+CONFIG_TPS6105X=m
+CONFIG_TPS65010=m
+CONFIG_TPS6507X=m
+CONFIG_MFD_TPS65086=m
+CONFIG_MFD_TPS65090=y
+CONFIG_MFD_TI_LP873X=m
+CONFIG_MFD_TPS6586X=y
+CONFIG_MFD_TPS65910=y
+CONFIG_MFD_TPS65912=m
+CONFIG_MFD_TPS65912_I2C=m
+CONFIG_MFD_TPS65912_SPI=m
+CONFIG_MFD_TPS6594=m
+CONFIG_MFD_TPS6594_I2C=m
+CONFIG_MFD_TPS6594_SPI=m
+CONFIG_TWL4030_CORE=y
+CONFIG_MFD_TWL4030_AUDIO=y
+CONFIG_TWL6040_CORE=y
+CONFIG_MFD_WL1273_CORE=m
+CONFIG_MFD_LM3533=m
+CONFIG_MFD_TQMX86=m
+CONFIG_MFD_VX855=m
+CONFIG_MFD_ARIZONA=m
+CONFIG_MFD_ARIZONA_I2C=m
+CONFIG_MFD_ARIZONA_SPI=m
+CONFIG_MFD_CS47L24=y
+CONFIG_MFD_WM5102=y
+CONFIG_MFD_WM5110=y
+CONFIG_MFD_WM8997=y
+CONFIG_MFD_WM8998=y
+CONFIG_MFD_WM8400=y
+CONFIG_MFD_WM831X=y
+CONFIG_MFD_WM831X_I2C=y
+CONFIG_MFD_WM831X_SPI=y
+CONFIG_MFD_WM8350=y
+CONFIG_MFD_WM8350_I2C=y
+CONFIG_MFD_WM8994=m
+CONFIG_MFD_WCD934X=m
+CONFIG_MFD_ATC260X=m
+CONFIG_MFD_ATC260X_I2C=m
+CONFIG_MFD_CS40L50_CORE=m
+CONFIG_MFD_CS40L50_I2C=m
+CONFIG_MFD_CS40L50_SPI=m
+CONFIG_RAVE_SP_CORE=m
+CONFIG_MFD_INTEL_M10_BMC_CORE=m
+CONFIG_MFD_INTEL_M10_BMC_SPI=m
+CONFIG_MFD_INTEL_M10_BMC_PMCI=m
+# end of Multifunction device drivers
+
+CONFIG_REGULATOR=y
+# CONFIG_REGULATOR_DEBUG is not set
+CONFIG_REGULATOR_FIXED_VOLTAGE=m
+CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
+CONFIG_REGULATOR_USERSPACE_CONSUMER=m
+CONFIG_REGULATOR_NETLINK_EVENTS=y
+CONFIG_REGULATOR_88PG86X=m
+CONFIG_REGULATOR_88PM800=m
+CONFIG_REGULATOR_88PM8607=m
+CONFIG_REGULATOR_ACT8865=m
+CONFIG_REGULATOR_AD5398=m
+CONFIG_REGULATOR_AAT2870=m
+CONFIG_REGULATOR_ARIZONA_LDO1=m
+CONFIG_REGULATOR_ARIZONA_MICSUPP=m
+CONFIG_REGULATOR_AS3711=m
+CONFIG_REGULATOR_ATC260X=m
+CONFIG_REGULATOR_AW37503=m
+CONFIG_REGULATOR_AXP20X=m
+CONFIG_REGULATOR_BCM590XX=m
+CONFIG_REGULATOR_BD9571MWV=m
+CONFIG_REGULATOR_DA903X=m
+CONFIG_REGULATOR_DA9052=m
+CONFIG_REGULATOR_DA9055=m
+CONFIG_REGULATOR_DA9062=m
+CONFIG_REGULATOR_DA9210=m
+CONFIG_REGULATOR_DA9211=m
+CONFIG_REGULATOR_FAN53555=m
+CONFIG_REGULATOR_GPIO=m
+CONFIG_REGULATOR_ISL9305=m
+CONFIG_REGULATOR_ISL6271A=m
+CONFIG_REGULATOR_LM363X=m
+CONFIG_REGULATOR_LP3971=m
+CONFIG_REGULATOR_LP3972=m
+CONFIG_REGULATOR_LP872X=m
+CONFIG_REGULATOR_LP8755=m
+CONFIG_REGULATOR_LP8788=m
+CONFIG_REGULATOR_LTC3589=m
+CONFIG_REGULATOR_LTC3676=m
+CONFIG_REGULATOR_MAX14577=m
+CONFIG_REGULATOR_MAX1586=m
+CONFIG_REGULATOR_MAX77503=m
+CONFIG_REGULATOR_MAX77541=m
+CONFIG_REGULATOR_MAX77857=m
+CONFIG_REGULATOR_MAX8649=m
+CONFIG_REGULATOR_MAX8660=m
+CONFIG_REGULATOR_MAX8893=m
+CONFIG_REGULATOR_MAX8907=m
+CONFIG_REGULATOR_MAX8925=m
+CONFIG_REGULATOR_MAX8952=m
+CONFIG_REGULATOR_MAX8997=m
+CONFIG_REGULATOR_MAX8998=m
+CONFIG_REGULATOR_MAX20086=m
+CONFIG_REGULATOR_MAX20411=m
+CONFIG_REGULATOR_MAX77693=m
+CONFIG_REGULATOR_MAX77826=m
+CONFIG_REGULATOR_MC13XXX_CORE=m
+CONFIG_REGULATOR_MC13783=m
+CONFIG_REGULATOR_MC13892=m
+CONFIG_REGULATOR_MP8859=m
+CONFIG_REGULATOR_MT6311=m
+CONFIG_REGULATOR_MT6323=m
+CONFIG_REGULATOR_MT6331=m
+CONFIG_REGULATOR_MT6332=m
+CONFIG_REGULATOR_MT6357=m
+CONFIG_REGULATOR_MT6358=m
+CONFIG_REGULATOR_MT6359=m
+CONFIG_REGULATOR_MT6360=m
+CONFIG_REGULATOR_MT6370=m
+CONFIG_REGULATOR_MT6397=m
+CONFIG_REGULATOR_PALMAS=m
+CONFIG_REGULATOR_PCA9450=m
+CONFIG_REGULATOR_PCAP=m
+CONFIG_REGULATOR_PCF50633=m
+CONFIG_REGULATOR_PV88060=m
+CONFIG_REGULATOR_PV88080=m
+CONFIG_REGULATOR_PV88090=m
+CONFIG_REGULATOR_PWM=m
+CONFIG_REGULATOR_RAA215300=m
+CONFIG_REGULATOR_RC5T583=m
+CONFIG_REGULATOR_RT4801=m
+CONFIG_REGULATOR_RT4803=m
+CONFIG_REGULATOR_RT4831=m
+CONFIG_REGULATOR_RT5033=m
+CONFIG_REGULATOR_RT5120=m
+CONFIG_REGULATOR_RT5190A=m
+CONFIG_REGULATOR_RT5739=m
+CONFIG_REGULATOR_RT5759=m
+CONFIG_REGULATOR_RT6160=m
+CONFIG_REGULATOR_RT6190=m
+CONFIG_REGULATOR_RT6245=m
+CONFIG_REGULATOR_RTQ2134=m
+CONFIG_REGULATOR_RTMV20=m
+CONFIG_REGULATOR_RTQ6752=m
+CONFIG_REGULATOR_RTQ2208=m
+CONFIG_REGULATOR_SKY81452=m
+CONFIG_REGULATOR_SLG51000=m
+CONFIG_REGULATOR_SY7636A=m
+CONFIG_REGULATOR_TPS51632=m
+CONFIG_REGULATOR_TPS6105X=m
+CONFIG_REGULATOR_TPS62360=m
+CONFIG_REGULATOR_TPS65023=m
+CONFIG_REGULATOR_TPS6507X=m
+CONFIG_REGULATOR_TPS65086=m
+CONFIG_REGULATOR_TPS65090=m
+CONFIG_REGULATOR_TPS65132=m
+CONFIG_REGULATOR_TPS6524X=m
+CONFIG_REGULATOR_TPS6586X=m
+CONFIG_REGULATOR_TPS65910=m
+CONFIG_REGULATOR_TPS65912=m
+CONFIG_REGULATOR_TPS68470=m
+CONFIG_REGULATOR_TWL4030=m
+CONFIG_REGULATOR_WM831X=m
+CONFIG_REGULATOR_WM8350=m
+CONFIG_REGULATOR_WM8400=m
+CONFIG_REGULATOR_WM8994=m
+CONFIG_RC_CORE=y
+CONFIG_BPF_LIRC_MODE2=y
+CONFIG_LIRC=y
+CONFIG_RC_MAP=m
+CONFIG_RC_DECODERS=y
+CONFIG_IR_IMON_DECODER=m
+CONFIG_IR_JVC_DECODER=m
+CONFIG_IR_MCE_KBD_DECODER=m
+CONFIG_IR_NEC_DECODER=m
+CONFIG_IR_RC5_DECODER=m
+CONFIG_IR_RC6_DECODER=m
+CONFIG_IR_RCMM_DECODER=m
+CONFIG_IR_SANYO_DECODER=m
+CONFIG_IR_SHARP_DECODER=m
+CONFIG_IR_SONY_DECODER=m
+CONFIG_IR_XMP_DECODER=m
+CONFIG_RC_DEVICES=y
+CONFIG_IR_ENE=m
+CONFIG_IR_FINTEK=m
+CONFIG_IR_IGORPLUGUSB=m
+CONFIG_IR_IGUANA=m
+CONFIG_IR_IMON=m
+CONFIG_IR_IMON_RAW=m
+CONFIG_IR_ITE_CIR=m
+CONFIG_IR_MCEUSB=m
+CONFIG_IR_NUVOTON=m
+CONFIG_IR_REDRAT3=m
+CONFIG_IR_SERIAL=m
+CONFIG_IR_SERIAL_TRANSMITTER=y
+CONFIG_IR_STREAMZAP=m
+CONFIG_IR_TOY=m
+CONFIG_IR_TTUSBIR=m
+CONFIG_IR_WINBOND_CIR=m
+CONFIG_RC_ATI_REMOTE=m
+CONFIG_RC_LOOPBACK=m
+CONFIG_RC_XBOX_DVD=m
+CONFIG_CEC_CORE=m
+CONFIG_CEC_NOTIFIER=y
+CONFIG_CEC_PIN=y
+
+#
+# CEC support
+#
+CONFIG_MEDIA_CEC_RC=y
+# CONFIG_CEC_PIN_ERROR_INJ is not set
+CONFIG_MEDIA_CEC_SUPPORT=y
+CONFIG_CEC_CH7322=m
+CONFIG_CEC_CROS_EC=m
+CONFIG_CEC_GPIO=m
+CONFIG_CEC_SECO=m
+CONFIG_CEC_SECO_RC=y
+CONFIG_USB_PULSE8_CEC=m
+CONFIG_USB_RAINSHADOW_CEC=m
+# end of CEC support
+
+CONFIG_MEDIA_SUPPORT=m
+CONFIG_MEDIA_SUPPORT_FILTER=y
+CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
+
+#
+# Media device types
+#
+CONFIG_MEDIA_CAMERA_SUPPORT=y
+CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
+CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
+CONFIG_MEDIA_RADIO_SUPPORT=y
+# CONFIG_MEDIA_SDR_SUPPORT is not set
+CONFIG_MEDIA_PLATFORM_SUPPORT=y
+CONFIG_MEDIA_TEST_SUPPORT=y
+# end of Media device types
+
+CONFIG_VIDEO_DEV=m
+CONFIG_MEDIA_CONTROLLER=y
+CONFIG_DVB_CORE=m
+
+#
+# Video4Linux options
+#
+CONFIG_VIDEO_V4L2_I2C=y
+CONFIG_VIDEO_V4L2_SUBDEV_API=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_TUNER=m
+CONFIG_V4L2_LOOPBACK=m
+CONFIG_V4L2_MEM2MEM_DEV=m
+CONFIG_V4L2_FLASH_LED_CLASS=m
+CONFIG_V4L2_FWNODE=m
+CONFIG_V4L2_ASYNC=m
+CONFIG_V4L2_CCI=m
+CONFIG_V4L2_CCI_I2C=m
+# end of Video4Linux options
+
+#
+# Media controller options
+#
+CONFIG_MEDIA_CONTROLLER_DVB=y
+# end of Media controller options
+
+#
+# Digital TV options
+#
+CONFIG_DVB_MMAP=y
+CONFIG_DVB_NET=y
+CONFIG_DVB_MAX_ADAPTERS=16
+CONFIG_DVB_DYNAMIC_MINORS=y
+# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
+# CONFIG_DVB_ULE_DEBUG is not set
+# end of Digital TV options
+
+#
+# Media drivers
+#
+
+#
+# Drivers filtered as selected at 'Filter media drivers'
+#
+
+#
+# Media drivers
+#
+CONFIG_MEDIA_USB_SUPPORT=y
+
+#
+# Webcam devices
+#
+CONFIG_USB_GSPCA=m
+CONFIG_USB_GSPCA_BENQ=m
+CONFIG_USB_GSPCA_CONEX=m
+CONFIG_USB_GSPCA_CPIA1=m
+CONFIG_USB_GSPCA_DTCS033=m
+CONFIG_USB_GSPCA_ETOMS=m
+CONFIG_USB_GSPCA_FINEPIX=m
+CONFIG_USB_GSPCA_JEILINJ=m
+CONFIG_USB_GSPCA_JL2005BCD=m
+CONFIG_USB_GSPCA_KINECT=m
+CONFIG_USB_GSPCA_KONICA=m
+CONFIG_USB_GSPCA_MARS=m
+CONFIG_USB_GSPCA_MR97310A=m
+CONFIG_USB_GSPCA_NW80X=m
+CONFIG_USB_GSPCA_OV519=m
+CONFIG_USB_GSPCA_OV534=m
+CONFIG_USB_GSPCA_OV534_9=m
+CONFIG_USB_GSPCA_PAC207=m
+CONFIG_USB_GSPCA_PAC7302=m
+CONFIG_USB_GSPCA_PAC7311=m
+CONFIG_USB_GSPCA_SE401=m
+CONFIG_USB_GSPCA_SN9C2028=m
+CONFIG_USB_GSPCA_SN9C20X=m
+CONFIG_USB_GSPCA_SONIXB=m
+CONFIG_USB_GSPCA_SONIXJ=m
+CONFIG_USB_GSPCA_SPCA1528=m
+CONFIG_USB_GSPCA_SPCA500=m
+CONFIG_USB_GSPCA_SPCA501=m
+CONFIG_USB_GSPCA_SPCA505=m
+CONFIG_USB_GSPCA_SPCA506=m
+CONFIG_USB_GSPCA_SPCA508=m
+CONFIG_USB_GSPCA_SPCA561=m
+CONFIG_USB_GSPCA_SQ905=m
+CONFIG_USB_GSPCA_SQ905C=m
+CONFIG_USB_GSPCA_SQ930X=m
+CONFIG_USB_GSPCA_STK014=m
+CONFIG_USB_GSPCA_STK1135=m
+CONFIG_USB_GSPCA_STV0680=m
+CONFIG_USB_GSPCA_SUNPLUS=m
+CONFIG_USB_GSPCA_T613=m
+CONFIG_USB_GSPCA_TOPRO=m
+CONFIG_USB_GSPCA_TOUPTEK=m
+CONFIG_USB_GSPCA_TV8532=m
+CONFIG_USB_GSPCA_VC032X=m
+CONFIG_USB_GSPCA_VICAM=m
+CONFIG_USB_GSPCA_XIRLINK_CIT=m
+CONFIG_USB_GSPCA_ZC3XX=m
+CONFIG_USB_GL860=m
+CONFIG_USB_M5602=m
+CONFIG_USB_STV06XX=m
+CONFIG_USB_PWC=m
+# CONFIG_USB_PWC_DEBUG is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
+CONFIG_USB_S2255=m
+CONFIG_VIDEO_USBTV=m
+CONFIG_USB_VIDEO_CLASS=m
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+
+#
+# Analog TV USB devices
+#
+CONFIG_VIDEO_GO7007=m
+CONFIG_VIDEO_GO7007_USB=m
+CONFIG_VIDEO_GO7007_LOADER=m
+CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
+CONFIG_VIDEO_HDPVR=m
+CONFIG_VIDEO_PVRUSB2=m
+CONFIG_VIDEO_PVRUSB2_SYSFS=y
+CONFIG_VIDEO_PVRUSB2_DVB=y
+# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
+CONFIG_VIDEO_STK1160=m
+
+#
+# Analog/digital TV USB devices
+#
+CONFIG_VIDEO_AU0828=m
+CONFIG_VIDEO_AU0828_V4L2=y
+CONFIG_VIDEO_AU0828_RC=y
+CONFIG_VIDEO_CX231XX=m
+CONFIG_VIDEO_CX231XX_RC=y
+CONFIG_VIDEO_CX231XX_ALSA=m
+CONFIG_VIDEO_CX231XX_DVB=m
+
+#
+# Digital TV USB devices
+#
+CONFIG_DVB_AS102=m
+CONFIG_DVB_B2C2_FLEXCOP_USB=m
+# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
+CONFIG_DVB_USB_V2=m
+CONFIG_DVB_USB_AF9015=m
+CONFIG_DVB_USB_AF9035=m
+CONFIG_DVB_USB_ANYSEE=m
+CONFIG_DVB_USB_AU6610=m
+CONFIG_DVB_USB_AZ6007=m
+CONFIG_DVB_USB_CE6230=m
+CONFIG_DVB_USB_DVBSKY=m
+CONFIG_DVB_USB_EC168=m
+CONFIG_DVB_USB_GL861=m
+CONFIG_DVB_USB_LME2510=m
+CONFIG_DVB_USB_MXL111SF=m
+CONFIG_DVB_USB_RTL28XXU=m
+CONFIG_DVB_USB_ZD1301=m
+CONFIG_DVB_USB=m
+# CONFIG_DVB_USB_DEBUG is not set
+CONFIG_DVB_USB_A800=m
+CONFIG_DVB_USB_AF9005=m
+CONFIG_DVB_USB_AF9005_REMOTE=m
+CONFIG_DVB_USB_AZ6027=m
+CONFIG_DVB_USB_CINERGY_T2=m
+CONFIG_DVB_USB_CXUSB=m
+CONFIG_DVB_USB_CXUSB_ANALOG=y
+CONFIG_DVB_USB_DIB0700=m
+CONFIG_DVB_USB_DIB3000MC=m
+CONFIG_DVB_USB_DIBUSB_MB=m
+CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y
+CONFIG_DVB_USB_DIBUSB_MC=m
+CONFIG_DVB_USB_DIGITV=m
+CONFIG_DVB_USB_DTT200U=m
+CONFIG_DVB_USB_DTV5100=m
+CONFIG_DVB_USB_DW2102=m
+CONFIG_DVB_USB_GP8PSK=m
+CONFIG_DVB_USB_M920X=m
+CONFIG_DVB_USB_NOVA_T_USB2=m
+CONFIG_DVB_USB_OPERA1=m
+CONFIG_DVB_USB_PCTV452E=m
+CONFIG_DVB_USB_TECHNISAT_USB2=m
+CONFIG_DVB_USB_TTUSB2=m
+CONFIG_DVB_USB_UMT_010=m
+CONFIG_DVB_USB_VP702X=m
+CONFIG_DVB_USB_VP7045=m
+CONFIG_SMS_USB_DRV=m
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+
+#
+# Webcam, TV (analog/digital) USB devices
+#
+CONFIG_VIDEO_EM28XX=m
+CONFIG_VIDEO_EM28XX_V4L2=m
+CONFIG_VIDEO_EM28XX_ALSA=m
+CONFIG_VIDEO_EM28XX_DVB=m
+CONFIG_VIDEO_EM28XX_RC=m
+CONFIG_MEDIA_PCI_SUPPORT=y
+
+#
+# Media capture support
+#
+CONFIG_VIDEO_MGB4=m
+CONFIG_VIDEO_SOLO6X10=m
+CONFIG_VIDEO_TW5864=m
+CONFIG_VIDEO_TW68=m
+CONFIG_VIDEO_TW686X=m
+# CONFIG_VIDEO_ZORAN is not set
+
+#
+# Media capture/analog TV support
+#
+CONFIG_VIDEO_DT3155=m
+CONFIG_VIDEO_IVTV=m
+CONFIG_VIDEO_IVTV_ALSA=m
+# CONFIG_VIDEO_FB_IVTV is not set
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_MXB=m
+
+#
+# Media capture/analog/hybrid TV support
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_DVB_BT8XX=m
+CONFIG_VIDEO_CX18=m
+CONFIG_VIDEO_CX18_ALSA=m
+CONFIG_VIDEO_CX23885=m
+CONFIG_MEDIA_ALTERA_CI=m
+CONFIG_VIDEO_CX25821=m
+CONFIG_VIDEO_CX25821_ALSA=m
+CONFIG_VIDEO_CX88=m
+CONFIG_VIDEO_CX88_ALSA=m
+CONFIG_VIDEO_CX88_BLACKBIRD=m
+CONFIG_VIDEO_CX88_DVB=m
+CONFIG_VIDEO_CX88_ENABLE_VP3054=y
+CONFIG_VIDEO_CX88_VP3054=m
+CONFIG_VIDEO_CX88_MPEG=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
+CONFIG_VIDEO_SAA7134_RC=y
+CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7134_GO7007=m
+CONFIG_VIDEO_SAA7164=m
+
+#
+# Media digital TV PCI Adapters
+#
+CONFIG_DVB_B2C2_FLEXCOP_PCI=m
+# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
+CONFIG_DVB_DDBRIDGE=m
+# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
+CONFIG_DVB_DM1105=m
+CONFIG_MANTIS_CORE=m
+CONFIG_DVB_MANTIS=m
+CONFIG_DVB_HOPPER=m
+CONFIG_DVB_NETUP_UNIDVB=m
+CONFIG_DVB_NGENE=m
+CONFIG_DVB_PLUTO2=m
+CONFIG_DVB_PT1=m
+CONFIG_DVB_PT3=m
+CONFIG_DVB_SMIPCIE=m
+CONFIG_DVB_BUDGET_CORE=m
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_VIDEO_IPU3_CIO2=m
+CONFIG_VIDEO_INTEL_IPU6=m
+CONFIG_INTEL_VSC=m
+CONFIG_IPU_BRIDGE=m
+CONFIG_RADIO_ADAPTERS=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_SAA7706H=m
+CONFIG_RADIO_SHARK=m
+CONFIG_RADIO_SHARK2=m
+CONFIG_RADIO_SI4713=m
+CONFIG_RADIO_SI476X=m
+CONFIG_RADIO_TEA575X=m
+CONFIG_RADIO_TEA5764=m
+CONFIG_RADIO_TEF6862=m
+CONFIG_RADIO_WL1273=m
+CONFIG_USB_DSBR=m
+CONFIG_USB_KEENE=m
+CONFIG_USB_MA901=m
+CONFIG_USB_MR800=m
+CONFIG_USB_RAREMONO=m
+CONFIG_RADIO_SI470X=m
+CONFIG_USB_SI470X=m
+CONFIG_I2C_SI470X=m
+CONFIG_USB_SI4713=m
+CONFIG_PLATFORM_SI4713=m
+CONFIG_I2C_SI4713=m
+CONFIG_RADIO_WL128X=m
+CONFIG_MEDIA_PLATFORM_DRIVERS=y
+CONFIG_V4L_PLATFORM_DRIVERS=y
+CONFIG_DVB_PLATFORM_DRIVERS=y
+CONFIG_V4L_MEM2MEM_DRIVERS=y
+CONFIG_VIDEO_MEM2MEM_DEINTERLACE=m
+
+#
+# Allegro DVT media platform drivers
+#
+
+#
+# Amlogic media platform drivers
+#
+
+#
+# Amphion drivers
+#
+
+#
+# Aspeed media platform drivers
+#
+
+#
+# Atmel media platform drivers
+#
+
+#
+# Cadence media platform drivers
+#
+CONFIG_VIDEO_CADENCE_CSI2RX=m
+CONFIG_VIDEO_CADENCE_CSI2TX=m
+
+#
+# Chips&Media media platform drivers
+#
+
+#
+# Intel media platform drivers
+#
+
+#
+# Marvell media platform drivers
+#
+CONFIG_VIDEO_CAFE_CCIC=m
+
+#
+# Mediatek media platform drivers
+#
+
+#
+# Microchip Technology, Inc. media platform drivers
+#
+
+#
+# Nuvoton media platform drivers
+#
+
+#
+# NVidia media platform drivers
+#
+
+#
+# NXP media platform drivers
+#
+
+#
+# Qualcomm media platform drivers
+#
+
+#
+# Raspberry Pi media platform drivers
+#
+
+#
+# Renesas media platform drivers
+#
+
+#
+# Rockchip media platform drivers
+#
+
+#
+# Samsung media platform drivers
+#
+
+#
+# STMicroelectronics media platform drivers
+#
+
+#
+# Sunxi media platform drivers
+#
+
+#
+# Texas Instruments drivers
+#
+
+#
+# Verisilicon media platform drivers
+#
+
+#
+# VIA media platform drivers
+#
+
+#
+# Xilinx media platform drivers
+#
+
+#
+# MMC/SDIO DVB adapters
+#
+CONFIG_SMS_SDIO_DRV=m
+CONFIG_V4L_TEST_DRIVERS=y
+CONFIG_VIDEO_VIM2M=m
+CONFIG_VIDEO_VICODEC=m
+CONFIG_VIDEO_VIMC=m
+CONFIG_VIDEO_VIVID=m
+CONFIG_VIDEO_VIVID_CEC=y
+CONFIG_VIDEO_VIVID_MAX_DEVS=64
+CONFIG_VIDEO_VISL=m
+# CONFIG_VISL_DEBUGFS is not set
+CONFIG_DVB_TEST_DRIVERS=y
+CONFIG_DVB_VIDTV=m
+
+#
+# FireWire (IEEE 1394) Adapters
+#
+CONFIG_DVB_FIREDTV=m
+CONFIG_DVB_FIREDTV_INPUT=y
+CONFIG_MEDIA_COMMON_OPTIONS=y
+
+#
+# common driver options
+#
+CONFIG_CYPRESS_FIRMWARE=m
+CONFIG_TTPCI_EEPROM=m
+CONFIG_UVC_COMMON=m
+CONFIG_VIDEO_CX2341X=m
+CONFIG_VIDEO_TVEEPROM=m
+CONFIG_DVB_B2C2_FLEXCOP=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_SMS_SIANO_MDTV=m
+CONFIG_SMS_SIANO_RC=y
+# CONFIG_SMS_SIANO_DEBUGFS is not set
+CONFIG_VIDEO_V4L2_TPG=m
+CONFIG_VIDEOBUF2_CORE=m
+CONFIG_VIDEOBUF2_V4L2=m
+CONFIG_VIDEOBUF2_MEMOPS=m
+CONFIG_VIDEOBUF2_DMA_CONTIG=m
+CONFIG_VIDEOBUF2_VMALLOC=m
+CONFIG_VIDEOBUF2_DMA_SG=m
+CONFIG_VIDEOBUF2_DVB=m
+# end of Media drivers
+
+CONFIG_MEDIA_HIDE_ANCILLARY_SUBDRV=y
+
+#
+# Media ancillary drivers
+#
+CONFIG_MEDIA_ATTACH=y
+
+#
+# IR I2C driver auto-selected by 'Autoselect ancillary drivers'
+#
+CONFIG_VIDEO_IR_I2C=m
+CONFIG_VIDEO_CAMERA_SENSOR=y
+CONFIG_VIDEO_APTINA_PLL=m
+CONFIG_VIDEO_CCS_PLL=m
+CONFIG_VIDEO_ALVIUM_CSI2=m
+CONFIG_VIDEO_AR0521=m
+CONFIG_VIDEO_GC0308=m
+CONFIG_VIDEO_GC05A2=m
+CONFIG_VIDEO_GC08A3=m
+CONFIG_VIDEO_GC2145=m
+CONFIG_VIDEO_HI556=m
+CONFIG_VIDEO_HI846=m
+CONFIG_VIDEO_HI847=m
+CONFIG_VIDEO_IMX208=m
+CONFIG_VIDEO_IMX214=m
+CONFIG_VIDEO_IMX219=m
+CONFIG_VIDEO_IMX258=m
+CONFIG_VIDEO_IMX274=m
+CONFIG_VIDEO_IMX283=m
+CONFIG_VIDEO_IMX290=m
+CONFIG_VIDEO_IMX296=m
+CONFIG_VIDEO_IMX319=m
+CONFIG_VIDEO_IMX355=m
+CONFIG_VIDEO_MAX9271_LIB=m
+CONFIG_VIDEO_MT9M001=m
+CONFIG_VIDEO_MT9M111=m
+CONFIG_VIDEO_MT9M114=m
+CONFIG_VIDEO_MT9P031=m
+CONFIG_VIDEO_MT9T112=m
+CONFIG_VIDEO_MT9V011=m
+CONFIG_VIDEO_MT9V032=m
+CONFIG_VIDEO_MT9V111=m
+CONFIG_VIDEO_OG01A1B=m
+CONFIG_VIDEO_OV01A10=m
+CONFIG_VIDEO_OV02A10=m
+CONFIG_VIDEO_OV08D10=m
+CONFIG_VIDEO_OV08X40=m
+CONFIG_VIDEO_OV13858=m
+CONFIG_VIDEO_OV13B10=m
+CONFIG_VIDEO_OV2640=m
+CONFIG_VIDEO_OV2659=m
+CONFIG_VIDEO_OV2680=m
+CONFIG_VIDEO_OV2685=m
+CONFIG_VIDEO_OV2740=m
+CONFIG_VIDEO_OV4689=m
+CONFIG_VIDEO_OV5647=m
+CONFIG_VIDEO_OV5648=m
+CONFIG_VIDEO_OV5670=m
+CONFIG_VIDEO_OV5675=m
+CONFIG_VIDEO_OV5693=m
+CONFIG_VIDEO_OV5695=m
+CONFIG_VIDEO_OV64A40=m
+CONFIG_VIDEO_OV6650=m
+CONFIG_VIDEO_OV7251=m
+CONFIG_VIDEO_OV7640=m
+CONFIG_VIDEO_OV7670=m
+CONFIG_VIDEO_OV772X=m
+CONFIG_VIDEO_OV7740=m
+CONFIG_VIDEO_OV8856=m
+CONFIG_VIDEO_OV8858=m
+CONFIG_VIDEO_OV8865=m
+CONFIG_VIDEO_OV9640=m
+CONFIG_VIDEO_OV9650=m
+CONFIG_VIDEO_OV9734=m
+CONFIG_VIDEO_RDACM20=m
+CONFIG_VIDEO_RDACM21=m
+CONFIG_VIDEO_RJ54N1=m
+CONFIG_VIDEO_S5C73M3=m
+CONFIG_VIDEO_S5K5BAF=m
+CONFIG_VIDEO_S5K6A3=m
+CONFIG_VIDEO_CCS=m
+CONFIG_VIDEO_ET8EK8=m
+
+#
+# Camera ISPs
+#
+CONFIG_VIDEO_THP7312=m
+# end of Camera ISPs
+
+#
+# Lens drivers
+#
+CONFIG_VIDEO_AD5820=m
+CONFIG_VIDEO_AK7375=m
+CONFIG_VIDEO_DW9714=m
+CONFIG_VIDEO_DW9719=m
+CONFIG_VIDEO_DW9768=m
+CONFIG_VIDEO_DW9807_VCM=m
+# end of Lens drivers
+
+#
+# Flash devices
+#
+CONFIG_VIDEO_ADP1653=m
+CONFIG_VIDEO_LM3560=m
+CONFIG_VIDEO_LM3646=m
+# end of Flash devices
+
+#
+# audio, video and radio I2C drivers auto-selected by 'Autoselect ancillary drivers'
+#
+CONFIG_VIDEO_CS3308=m
+CONFIG_VIDEO_CS5345=m
+CONFIG_VIDEO_CS53L32A=m
+CONFIG_VIDEO_MSP3400=m
+CONFIG_VIDEO_SONY_BTF_MPX=m
+CONFIG_VIDEO_TDA7432=m
+CONFIG_VIDEO_TDA9840=m
+CONFIG_VIDEO_TEA6415C=m
+CONFIG_VIDEO_TEA6420=m
+CONFIG_VIDEO_TVAUDIO=m
+CONFIG_VIDEO_UDA1342=m
+CONFIG_VIDEO_VP27SMPX=m
+CONFIG_VIDEO_WM8739=m
+CONFIG_VIDEO_WM8775=m
+CONFIG_VIDEO_SAA6588=m
+CONFIG_VIDEO_SAA711X=m
+CONFIG_VIDEO_TVP5150=m
+CONFIG_VIDEO_TW2804=m
+CONFIG_VIDEO_TW9903=m
+CONFIG_VIDEO_TW9906=m
+
+#
+# Video and audio decoders
+#
+CONFIG_VIDEO_SAA717X=m
+CONFIG_VIDEO_CX25840=m
+CONFIG_VIDEO_SAA7127=m
+CONFIG_VIDEO_UPD64031A=m
+CONFIG_VIDEO_UPD64083=m
+CONFIG_VIDEO_SAA6752HS=m
+CONFIG_VIDEO_M52790=m
+
+#
+# Video serializers and deserializers
+#
+# end of Video serializers and deserializers
+
+#
+# SPI I2C drivers auto-selected by 'Autoselect ancillary drivers'
+#
+
+#
+# Media SPI Adapters
+#
+CONFIG_CXD2880_SPI_DRV=m
+CONFIG_VIDEO_GS1662=m
+# end of Media SPI Adapters
+
+CONFIG_MEDIA_TUNER=m
+
+#
+# Tuner drivers auto-selected by 'Autoselect ancillary drivers'
+#
+CONFIG_MEDIA_TUNER_E4000=m
+CONFIG_MEDIA_TUNER_FC0011=m
+CONFIG_MEDIA_TUNER_FC0012=m
+CONFIG_MEDIA_TUNER_FC0013=m
+CONFIG_MEDIA_TUNER_FC2580=m
+CONFIG_MEDIA_TUNER_IT913X=m
+CONFIG_MEDIA_TUNER_M88RS6000T=m
+CONFIG_MEDIA_TUNER_MAX2165=m
+CONFIG_MEDIA_TUNER_MC44S803=m
+CONFIG_MEDIA_TUNER_MT2060=m
+CONFIG_MEDIA_TUNER_MT2063=m
+CONFIG_MEDIA_TUNER_MT20XX=m
+CONFIG_MEDIA_TUNER_MT2131=m
+CONFIG_MEDIA_TUNER_MT2266=m
+CONFIG_MEDIA_TUNER_MXL301RF=m
+CONFIG_MEDIA_TUNER_MXL5005S=m
+CONFIG_MEDIA_TUNER_MXL5007T=m
+CONFIG_MEDIA_TUNER_QM1D1B0004=m
+CONFIG_MEDIA_TUNER_QM1D1C0042=m
+CONFIG_MEDIA_TUNER_QT1010=m
+CONFIG_MEDIA_TUNER_R820T=m
+CONFIG_MEDIA_TUNER_SI2157=m
+CONFIG_MEDIA_TUNER_SIMPLE=m
+CONFIG_MEDIA_TUNER_TDA18212=m
+CONFIG_MEDIA_TUNER_TDA18218=m
+CONFIG_MEDIA_TUNER_TDA18250=m
+CONFIG_MEDIA_TUNER_TDA18271=m
+CONFIG_MEDIA_TUNER_TDA827X=m
+CONFIG_MEDIA_TUNER_TDA8290=m
+CONFIG_MEDIA_TUNER_TDA9887=m
+CONFIG_MEDIA_TUNER_TEA5761=m
+CONFIG_MEDIA_TUNER_TEA5767=m
+CONFIG_MEDIA_TUNER_TUA9001=m
+CONFIG_MEDIA_TUNER_XC2028=m
+CONFIG_MEDIA_TUNER_XC4000=m
+CONFIG_MEDIA_TUNER_XC5000=m
+
+#
+# DVB Frontend drivers auto-selected by 'Autoselect ancillary drivers'
+#
+
+#
+# Multistandard (satellite) frontends
+#
+CONFIG_DVB_M88DS3103=m
+CONFIG_DVB_MXL5XX=m
+CONFIG_DVB_STB0899=m
+CONFIG_DVB_STB6100=m
+CONFIG_DVB_STV090x=m
+CONFIG_DVB_STV0910=m
+CONFIG_DVB_STV6110x=m
+CONFIG_DVB_STV6111=m
+
+#
+# Multistandard (cable + terrestrial) frontends
+#
+CONFIG_DVB_DRXK=m
+CONFIG_DVB_MN88472=m
+CONFIG_DVB_MN88473=m
+CONFIG_DVB_SI2165=m
+CONFIG_DVB_TDA18271C2DD=m
+
+#
+# DVB-S (satellite) frontends
+#
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_CX24116=m
+CONFIG_DVB_CX24117=m
+CONFIG_DVB_CX24120=m
+CONFIG_DVB_CX24123=m
+CONFIG_DVB_DS3000=m
+CONFIG_DVB_MB86A16=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_S5H1420=m
+CONFIG_DVB_SI21XX=m
+CONFIG_DVB_STB6000=m
+CONFIG_DVB_STV0288=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_STV0900=m
+CONFIG_DVB_STV6110=m
+CONFIG_DVB_TDA10071=m
+CONFIG_DVB_TDA10086=m
+CONFIG_DVB_TDA8083=m
+CONFIG_DVB_TDA8261=m
+CONFIG_DVB_TDA826X=m
+CONFIG_DVB_TS2020=m
+CONFIG_DVB_TUA6100=m
+CONFIG_DVB_TUNER_CX24113=m
+CONFIG_DVB_TUNER_ITD1000=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_ZL10036=m
+CONFIG_DVB_ZL10039=m
+
+#
+# DVB-T (terrestrial) frontends
+#
+CONFIG_DVB_AF9013=m
+CONFIG_DVB_AS102_FE=m
+CONFIG_DVB_CX22700=m
+CONFIG_DVB_CX22702=m
+CONFIG_DVB_CXD2820R=m
+CONFIG_DVB_CXD2841ER=m
+CONFIG_DVB_DIB3000MB=m
+CONFIG_DVB_DIB3000MC=m
+CONFIG_DVB_DIB7000M=m
+CONFIG_DVB_DIB7000P=m
+CONFIG_DVB_DRXD=m
+CONFIG_DVB_EC100=m
+CONFIG_DVB_GP8PSK_FE=m
+CONFIG_DVB_L64781=m
+CONFIG_DVB_MT352=m
+CONFIG_DVB_NXT6000=m
+CONFIG_DVB_RTL2830=m
+CONFIG_DVB_RTL2832=m
+CONFIG_DVB_SI2168=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_STV0367=m
+CONFIG_DVB_TDA10048=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_ZD1301_DEMOD=m
+CONFIG_DVB_ZL10353=m
+
+#
+# DVB-C (cable) frontends
+#
+CONFIG_DVB_STV0297=m
+CONFIG_DVB_TDA10021=m
+CONFIG_DVB_TDA10023=m
+CONFIG_DVB_VES1820=m
+
+#
+# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
+#
+CONFIG_DVB_AU8522=m
+CONFIG_DVB_AU8522_DTV=m
+CONFIG_DVB_AU8522_V4L=m
+CONFIG_DVB_BCM3510=m
+CONFIG_DVB_LG2160=m
+CONFIG_DVB_LGDT3305=m
+CONFIG_DVB_LGDT3306A=m
+CONFIG_DVB_LGDT330X=m
+CONFIG_DVB_MXL692=m
+CONFIG_DVB_NXT200X=m
+CONFIG_DVB_OR51132=m
+CONFIG_DVB_OR51211=m
+CONFIG_DVB_S5H1409=m
+CONFIG_DVB_S5H1411=m
+
+#
+# ISDB-T (terrestrial) frontends
+#
+CONFIG_DVB_DIB8000=m
+CONFIG_DVB_MB86A20S=m
+CONFIG_DVB_S921=m
+
+#
+# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
+#
+CONFIG_DVB_TC90522=m
+
+#
+# Digital terrestrial only tuners/PLL
+#
+CONFIG_DVB_PLL=m
+CONFIG_DVB_TUNER_DIB0070=m
+CONFIG_DVB_TUNER_DIB0090=m
+
+#
+# SEC control devices for DVB-S
+#
+CONFIG_DVB_A8293=m
+CONFIG_DVB_AF9033=m
+CONFIG_DVB_ASCOT2E=m
+CONFIG_DVB_ATBM8830=m
+CONFIG_DVB_HELENE=m
+CONFIG_DVB_HORUS3A=m
+CONFIG_DVB_ISL6405=m
+CONFIG_DVB_ISL6421=m
+CONFIG_DVB_ISL6423=m
+CONFIG_DVB_IX2505V=m
+CONFIG_DVB_LGS8GXX=m
+CONFIG_DVB_LNBH25=m
+CONFIG_DVB_LNBP21=m
+CONFIG_DVB_LNBP22=m
+CONFIG_DVB_M88RS2000=m
+CONFIG_DVB_TDA665x=m
+CONFIG_DVB_DRX39XYJ=m
+
+#
+# Common Interface (EN50221) controller drivers
+#
+CONFIG_DVB_CXD2099=m
+CONFIG_DVB_SP2=m
+
+#
+# Tools to develop new frontends
+#
+CONFIG_DVB_DUMMY_FE=m
+# end of Media ancillary drivers
+
+#
+# Graphics support
+#
+CONFIG_APERTURE_HELPERS=y
+CONFIG_SCREEN_INFO=y
+CONFIG_VIDEO=y
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_PANEL is not set
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_VIA=m
+CONFIG_INTEL_GTT=m
+CONFIG_VGA_SWITCHEROO=y
+CONFIG_DRM=y
+CONFIG_DRM_MIPI_DBI=m
+CONFIG_DRM_MIPI_DSI=y
+# CONFIG_DRM_DEBUG_MM is not set
+CONFIG_DRM_KMS_HELPER=y
+CONFIG_DRM_FBDEV_EMULATION=y
+CONFIG_DRM_FBDEV_OVERALLOC=100
+CONFIG_DRM_LOAD_EDID_FIRMWARE=y
+CONFIG_DRM_DISPLAY_HELPER=m
+CONFIG_DRM_DISPLAY_DP_AUX_CEC=y
+CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV=y
+CONFIG_DRM_DISPLAY_DP_HELPER=y
+CONFIG_DRM_DISPLAY_DP_TUNNEL=y
+CONFIG_DRM_DISPLAY_HDCP_HELPER=y
+CONFIG_DRM_DISPLAY_HDMI_HELPER=y
+CONFIG_DRM_TTM=m
+CONFIG_DRM_EXEC=m
+CONFIG_DRM_GPUVM=m
+CONFIG_DRM_BUDDY=m
+CONFIG_DRM_VRAM_HELPER=m
+CONFIG_DRM_TTM_HELPER=m
+CONFIG_DRM_GEM_DMA_HELPER=m
+CONFIG_DRM_GEM_SHMEM_HELPER=y
+CONFIG_DRM_SUBALLOC_HELPER=m
+CONFIG_DRM_SCHED=m
+
+#
+# I2C encoder or helper chips
+#
+CONFIG_DRM_I2C_CH7006=m
+CONFIG_DRM_I2C_SIL164=m
+CONFIG_DRM_I2C_NXP_TDA998X=m
+CONFIG_DRM_I2C_NXP_TDA9950=m
+# end of I2C encoder or helper chips
+
+#
+# ARM devices
+#
+# end of ARM devices
+
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_RADEON_USERPTR=y
+CONFIG_DRM_AMDGPU=m
+CONFIG_DRM_AMDGPU_SI=y
+CONFIG_DRM_AMDGPU_CIK=y
+CONFIG_DRM_AMDGPU_USERPTR=y
+CONFIG_DRM_AMD_ISP=y
+
+#
+# ACP (Audio CoProcessor) Configuration
+#
+CONFIG_DRM_AMD_ACP=y
+# end of ACP (Audio CoProcessor) Configuration
+
+#
+# Display Engine Configuration
+#
+CONFIG_DRM_AMD_DC=y
+CONFIG_DRM_AMD_DC_FP=y
+CONFIG_DRM_AMD_DC_SI=y
+CONFIG_DRM_AMD_SECURE_DISPLAY=y
+CONFIG_AMD_PRIVATE_COLOR=y
+# end of Display Engine Configuration
+
+CONFIG_HSA_AMD=y
+CONFIG_HSA_AMD_SVM=y
+CONFIG_DRM_NOUVEAU=m
+CONFIG_NOUVEAU_DEBUG=5
+CONFIG_NOUVEAU_DEBUG_DEFAULT=3
+# CONFIG_NOUVEAU_DEBUG_MMU is not set
+# CONFIG_NOUVEAU_DEBUG_PUSH is not set
+CONFIG_DRM_NOUVEAU_BACKLIGHT=y
+CONFIG_DRM_NOUVEAU_SVM=y
+CONFIG_DRM_NOUVEAU_GSP_DEFAULT=y
+CONFIG_DRM_I915=m
+CONFIG_DRM_I915_FORCE_PROBE="*"
+CONFIG_DRM_I915_CAPTURE_ERROR=y
+CONFIG_DRM_I915_COMPRESS_ERROR=y
+CONFIG_DRM_I915_USERPTR=y
+CONFIG_DRM_I915_GVT_KVMGT=m
+CONFIG_DRM_I915_PXP=y
+CONFIG_DRM_I915_DP_TUNNEL=y
+CONFIG_DRM_I915_REQUEST_TIMEOUT=20000
+CONFIG_DRM_I915_FENCE_TIMEOUT=10000
+CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250
+CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500
+CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
+CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE=7500
+CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
+CONFIG_DRM_I915_STOP_TIMEOUT=100
+CONFIG_DRM_I915_TIMESLICE_DURATION=1
+CONFIG_DRM_I915_GVT=y
+CONFIG_DRM_XE=m
+CONFIG_DRM_XE_DISPLAY=y
+CONFIG_DRM_XE_FORCE_PROBE=""
+CONFIG_DRM_XE_JOB_TIMEOUT_MAX=10000
+CONFIG_DRM_XE_JOB_TIMEOUT_MIN=1
+CONFIG_DRM_XE_TIMESLICE_MAX=10000000
+CONFIG_DRM_XE_TIMESLICE_MIN=1
+CONFIG_DRM_XE_PREEMPT_TIMEOUT=640000
+CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX=10000000
+CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN=1
+CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT=y
+CONFIG_DRM_VGEM=m
+CONFIG_DRM_VKMS=m
+CONFIG_DRM_VMWGFX=m
+CONFIG_DRM_VMWGFX_MKSSTATS=y
+CONFIG_DRM_GMA500=m
+CONFIG_DRM_UDL=m
+CONFIG_DRM_AST=m
+CONFIG_DRM_MGAG200=m
+CONFIG_DRM_QXL=m
+CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_VIRTIO_GPU_KMS=y
+CONFIG_DRM_PANEL=y
+
+#
+# Display Panels
+#
+CONFIG_DRM_PANEL_AUO_A030JTN01=m
+CONFIG_DRM_PANEL_ILITEK_ILI9341=m
+CONFIG_DRM_PANEL_ORISETECH_OTA5601A=m
+CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
+CONFIG_DRM_PANEL_WIDECHIPS_WS2401=m
+# end of Display Panels
+
+CONFIG_DRM_BRIDGE=y
+CONFIG_DRM_PANEL_BRIDGE=y
+
+#
+# Display Interface Bridges
+#
+CONFIG_DRM_ANALOGIX_ANX78XX=m
+CONFIG_DRM_ANALOGIX_DP=m
+# end of Display Interface Bridges
+
+# CONFIG_DRM_ETNAVIV is not set
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_DRM_BOCHS=m
+CONFIG_DRM_CIRRUS_QEMU=m
+CONFIG_DRM_GM12U320=m
+CONFIG_DRM_PANEL_MIPI_DBI=m
+CONFIG_DRM_SIMPLEDRM=y
+CONFIG_TINYDRM_HX8357D=m
+CONFIG_TINYDRM_ILI9163=m
+CONFIG_TINYDRM_ILI9225=m
+CONFIG_TINYDRM_ILI9341=m
+CONFIG_TINYDRM_ILI9486=m
+CONFIG_TINYDRM_MI0283QT=m
+CONFIG_TINYDRM_REPAPER=m
+CONFIG_TINYDRM_ST7586=m
+CONFIG_TINYDRM_ST7735R=m
+CONFIG_DRM_XEN=y
+CONFIG_DRM_XEN_FRONTEND=m
+CONFIG_DRM_VBOXVIDEO=m
+CONFIG_DRM_GUD=m
+CONFIG_DRM_SSD130X=m
+CONFIG_DRM_SSD130X_I2C=m
+CONFIG_DRM_SSD130X_SPI=m
+CONFIG_DRM_HYPERV=m
+CONFIG_DRM_PRIVACY_SCREEN=y
+CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
+
+#
+# Frame buffer Devices
+#
+CONFIG_FB=y
+# CONFIG_FB_CIRRUS is not set
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
+# CONFIG_FB_ARC is not set
+# CONFIG_FB_ASILIANT is not set
+# CONFIG_FB_IMSTT is not set
+# CONFIG_FB_VGA16 is not set
+# CONFIG_FB_UVESA is not set
+CONFIG_FB_VESA=y
+CONFIG_FB_EFI=y
+# CONFIG_FB_N411 is not set
+# CONFIG_FB_HGA is not set
+# CONFIG_FB_OPENCORES is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_NVIDIA is not set
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_I740 is not set
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON is not set
+# CONFIG_FB_ATY128 is not set
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_S3 is not set
+# CONFIG_FB_SAVAGE is not set
+# CONFIG_FB_SIS is not set
+# CONFIG_FB_VIA is not set
+# CONFIG_FB_NEOMAGIC is not set
+# CONFIG_FB_KYRO is not set
+# CONFIG_FB_3DFX is not set
+# CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_VT8623 is not set
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_ARK is not set
+# CONFIG_FB_PM3 is not set
+# CONFIG_FB_CARMINE is not set
+# CONFIG_FB_SM501 is not set
+# CONFIG_FB_SMSCUFX is not set
+# CONFIG_FB_UDL is not set
+# CONFIG_FB_IBM_GXT4500 is not set
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_XEN_FBDEV_FRONTEND=m
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_HYPERV is not set
+# CONFIG_FB_SSD1307 is not set
+# CONFIG_FB_SM712 is not set
+CONFIG_FB_CORE=y
+CONFIG_FB_NOTIFY=y
+# CONFIG_FIRMWARE_EDID is not set
+CONFIG_FB_DEVICE=y
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+CONFIG_FB_SYS_FILLRECT=y
+CONFIG_FB_SYS_COPYAREA=y
+CONFIG_FB_SYS_IMAGEBLIT=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_SYSMEM_FOPS=y
+CONFIG_FB_DEFERRED_IO=y
+CONFIG_FB_DMAMEM_HELPERS=y
+CONFIG_FB_IOMEM_FOPS=y
+CONFIG_FB_IOMEM_HELPERS=y
+CONFIG_FB_SYSMEM_HELPERS=y
+CONFIG_FB_SYSMEM_HELPERS_DEFERRED=y
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+# end of Frame buffer Devices
+
+#
+# Backlight & LCD device support
+#
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_L4F00242T03=m
+CONFIG_LCD_LMS283GF05=m
+CONFIG_LCD_LTV350QV=m
+CONFIG_LCD_ILI922X=m
+CONFIG_LCD_ILI9320=m
+CONFIG_LCD_TDO24M=m
+CONFIG_LCD_VGG2432A4=m
+CONFIG_LCD_PLATFORM=m
+CONFIG_LCD_AMS369FG06=m
+CONFIG_LCD_LMS501KF03=m
+CONFIG_LCD_HX8357=m
+CONFIG_LCD_OTM3225A=m
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_KTD253=m
+CONFIG_BACKLIGHT_KTD2801=m
+CONFIG_BACKLIGHT_KTZ8866=m
+CONFIG_BACKLIGHT_LM3533=m
+CONFIG_BACKLIGHT_PWM=m
+CONFIG_BACKLIGHT_DA903X=m
+CONFIG_BACKLIGHT_DA9052=m
+CONFIG_BACKLIGHT_MAX8925=m
+CONFIG_BACKLIGHT_MT6370=m
+CONFIG_BACKLIGHT_APPLE=m
+CONFIG_BACKLIGHT_QCOM_WLED=m
+CONFIG_BACKLIGHT_RT4831=m
+CONFIG_BACKLIGHT_SAHARA=m
+CONFIG_BACKLIGHT_WM831X=m
+CONFIG_BACKLIGHT_ADP5520=m
+CONFIG_BACKLIGHT_ADP8860=m
+CONFIG_BACKLIGHT_ADP8870=m
+CONFIG_BACKLIGHT_88PM860X=m
+CONFIG_BACKLIGHT_PCF50633=m
+CONFIG_BACKLIGHT_AAT2870=m
+CONFIG_BACKLIGHT_LM3509=m
+CONFIG_BACKLIGHT_LM3630A=m
+CONFIG_BACKLIGHT_LM3639=m
+CONFIG_BACKLIGHT_LP855X=m
+CONFIG_BACKLIGHT_LP8788=m
+CONFIG_BACKLIGHT_MP3309C=m
+CONFIG_BACKLIGHT_PANDORA=m
+CONFIG_BACKLIGHT_SKY81452=m
+CONFIG_BACKLIGHT_AS3711=m
+CONFIG_BACKLIGHT_GPIO=m
+CONFIG_BACKLIGHT_LV5207LP=m
+CONFIG_BACKLIGHT_BD6107=m
+CONFIG_BACKLIGHT_ARCXCNN=m
+CONFIG_BACKLIGHT_RAVE_SP=m
+# end of Backlight & LCD device support
+
+CONFIG_VIDEOMODE_HELPERS=y
+CONFIG_HDMI=y
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_DUMMY_CONSOLE_COLUMNS=80
+CONFIG_DUMMY_CONSOLE_ROWS=25
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y
+# end of Console display driver support
+
+# CONFIG_LOGO is not set
+# end of Graphics support
+
+CONFIG_DRM_ACCEL=y
+CONFIG_DRM_ACCEL_HABANALABS=m
+CONFIG_DRM_ACCEL_IVPU=m
+CONFIG_DRM_ACCEL_QAIC=m
+CONFIG_SOUND=m
+CONFIG_SOUND_OSS_CORE=y
+# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_PCM_ELD=y
+CONFIG_SND_PCM_IEC958=y
+CONFIG_SND_DMAENGINE_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_SEQ_DEVICE=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_UMP=m
+CONFIG_SND_UMP_LEGACY_RAWMIDI=y
+CONFIG_SND_COMPRESS_OFFLOAD=m
+CONFIG_SND_JACK=y
+CONFIG_SND_JACK_INPUT_DEV=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
+CONFIG_SND_PCM_TIMER=y
+CONFIG_SND_HRTIMER=m
+CONFIG_SND_DYNAMIC_MINORS=y
+CONFIG_SND_MAX_CARDS=32
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND_PROC_FS=y
+CONFIG_SND_VERBOSE_PROCFS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_CTL_FAST_LOOKUP=y
+CONFIG_SND_DEBUG=y
+# CONFIG_SND_DEBUG_VERBOSE is not set
+# CONFIG_SND_PCM_XRUN_DEBUG is not set
+CONFIG_SND_CTL_INPUT_VALIDATION=y
+# CONFIG_SND_CTL_DEBUG is not set
+# CONFIG_SND_JACK_INJECTION_DEBUG is not set
+CONFIG_SND_VMASTER=y
+CONFIG_SND_DMA_SGBUF=y
+CONFIG_SND_CTL_LED=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+CONFIG_SND_SEQ_MIDI_EVENT=m
+CONFIG_SND_SEQ_MIDI=m
+CONFIG_SND_SEQ_MIDI_EMUL=m
+CONFIG_SND_SEQ_VIRMIDI=m
+CONFIG_SND_SEQ_UMP=y
+CONFIG_SND_SEQ_UMP_CLIENT=m
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL3_LIB_SEQ=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_DRIVERS=y
+# CONFIG_SND_PCSP is not set
+CONFIG_SND_DUMMY=m
+CONFIG_SND_ALOOP=m
+CONFIG_SND_PCMTEST=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_MTS64=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_PORTMAN2X4=m
+CONFIG_SND_AC97_POWER_SAVE=y
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=10
+CONFIG_SND_SB_COMMON=m
+CONFIG_SND_PCI=y
+CONFIG_SND_AD1889=m
+CONFIG_SND_ALS300=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ASIHPI=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AW2=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+# CONFIG_SND_BT87X_OVERCLOCK is not set
+CONFIG_SND_CA0106=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_OXYGEN_LIB=m
+CONFIG_SND_OXYGEN=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CTXFI=m
+CONFIG_SND_DARLA20=m
+CONFIG_SND_GINA20=m
+CONFIG_SND_LAYLA20=m
+CONFIG_SND_DARLA24=m
+CONFIG_SND_GINA24=m
+CONFIG_SND_LAYLA24=m
+CONFIG_SND_MONA=m
+CONFIG_SND_MIA=m
+CONFIG_SND_ECHO3G=m
+CONFIG_SND_INDIGO=m
+CONFIG_SND_INDIGOIO=m
+CONFIG_SND_INDIGODJ=m
+CONFIG_SND_INDIGOIOX=m
+CONFIG_SND_INDIGODJX=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_EMU10K1_SEQ=m
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_ES1968_INPUT=y
+CONFIG_SND_ES1968_RADIO=y
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X_BOOL=y
+CONFIG_SND_HDSP=m
+CONFIG_SND_HDSPM=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_LOLA=m
+CONFIG_SND_LX6464ES=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_MAESTRO3_INPUT=y
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_PCXHR=m
+CONFIG_SND_RIPTIDE=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VIA82XX_MODEM=m
+CONFIG_SND_VIRTUOSO=m
+CONFIG_SND_VX222=m
+CONFIG_SND_YMFPCI=m
+
+#
+# HD-Audio
+#
+CONFIG_SND_HDA=m
+CONFIG_SND_HDA_GENERIC_LEDS=y
+CONFIG_SND_HDA_INTEL=m
+CONFIG_SND_HDA_HWDEP=y
+CONFIG_SND_HDA_RECONFIG=y
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_INPUT_BEEP_MODE=0
+CONFIG_SND_HDA_PATCH_LOADER=y
+CONFIG_SND_HDA_CIRRUS_SCODEC=m
+CONFIG_SND_HDA_SCODEC_CS35L41=m
+CONFIG_SND_HDA_CS_DSP_CONTROLS=m
+CONFIG_SND_HDA_SCODEC_COMPONENT=m
+CONFIG_SND_HDA_SCODEC_CS35L41_I2C=m
+CONFIG_SND_HDA_SCODEC_CS35L41_SPI=m
+CONFIG_SND_HDA_SCODEC_CS35L56=m
+CONFIG_SND_HDA_SCODEC_CS35L56_I2C=m
+CONFIG_SND_HDA_SCODEC_CS35L56_SPI=m
+CONFIG_SND_HDA_SCODEC_TAS2781_I2C=m
+CONFIG_SND_HDA_CODEC_REALTEK=m
+CONFIG_SND_HDA_CODEC_ANALOG=m
+CONFIG_SND_HDA_CODEC_SIGMATEL=m
+CONFIG_SND_HDA_CODEC_VIA=m
+CONFIG_SND_HDA_CODEC_HDMI=m
+CONFIG_SND_HDA_CODEC_CIRRUS=m
+CONFIG_SND_HDA_CODEC_CS8409=m
+CONFIG_SND_HDA_CODEC_CONEXANT=m
+CONFIG_SND_HDA_CODEC_SENARYTECH=m
+CONFIG_SND_HDA_CODEC_CA0110=m
+CONFIG_SND_HDA_CODEC_CA0132=m
+CONFIG_SND_HDA_CODEC_CA0132_DSP=y
+CONFIG_SND_HDA_CODEC_CMEDIA=m
+CONFIG_SND_HDA_CODEC_SI3054=m
+CONFIG_SND_HDA_GENERIC=m
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10
+CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
+# CONFIG_SND_HDA_CTL_DEV_ID is not set
+# end of HD-Audio
+
+CONFIG_SND_HDA_CORE=m
+CONFIG_SND_HDA_DSP_LOADER=y
+CONFIG_SND_HDA_COMPONENT=y
+CONFIG_SND_HDA_I915=y
+CONFIG_SND_HDA_EXT_CORE=m
+CONFIG_SND_HDA_PREALLOC_SIZE=0
+CONFIG_SND_INTEL_NHLT=y
+CONFIG_SND_INTEL_DSP_CONFIG=m
+CONFIG_SND_INTEL_SOUNDWIRE_ACPI=m
+# CONFIG_SND_INTEL_BYT_PREFER_SOF is not set
+CONFIG_SND_SPI=y
+CONFIG_SND_USB=y
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_AUDIO_MIDI_V2=y
+CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
+CONFIG_SND_USB_UA101=m
+CONFIG_SND_USB_USX2Y=m
+CONFIG_SND_USB_CAIAQ=m
+CONFIG_SND_USB_CAIAQ_INPUT=y
+CONFIG_SND_USB_US122L=m
+CONFIG_SND_USB_6FIRE=m
+CONFIG_SND_USB_HIFACE=m
+CONFIG_SND_BCD2000=m
+CONFIG_SND_USB_LINE6=m
+CONFIG_SND_USB_POD=m
+CONFIG_SND_USB_PODHD=m
+CONFIG_SND_USB_TONEPORT=m
+CONFIG_SND_USB_VARIAX=m
+CONFIG_SND_FIREWIRE=y
+CONFIG_SND_FIREWIRE_LIB=m
+CONFIG_SND_DICE=m
+CONFIG_SND_OXFW=m
+CONFIG_SND_ISIGHT=m
+CONFIG_SND_FIREWORKS=m
+CONFIG_SND_BEBOB=m
+CONFIG_SND_FIREWIRE_DIGI00X=m
+CONFIG_SND_FIREWIRE_TASCAM=m
+CONFIG_SND_FIREWIRE_MOTU=m
+CONFIG_SND_FIREFACE=m
+CONFIG_SND_PCMCIA=y
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_PDAUDIOCF=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_AC97_BUS=y
+CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
+CONFIG_SND_SOC_COMPRESS=y
+CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_SND_SOC_ACPI=m
+CONFIG_SND_SOC_ADI=m
+CONFIG_SND_SOC_ADI_AXI_I2S=m
+CONFIG_SND_SOC_ADI_AXI_SPDIF=m
+CONFIG_SND_SOC_AMD_ACP=m
+CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
+CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
+CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m
+CONFIG_SND_SOC_AMD_ACP3x=m
+CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
+CONFIG_SND_SOC_AMD_RENOIR=m
+CONFIG_SND_SOC_AMD_RENOIR_MACH=m
+CONFIG_SND_SOC_AMD_ACP5x=m
+CONFIG_SND_SOC_AMD_VANGOGH_MACH=m
+CONFIG_SND_SOC_AMD_ACP6x=m
+CONFIG_SND_SOC_AMD_YC_MACH=m
+CONFIG_SND_AMD_ACP_CONFIG=m
+CONFIG_SND_SOC_AMD_ACP_COMMON=m
+CONFIG_SND_SOC_AMD_ACP_PDM=m
+CONFIG_SND_SOC_AMD_ACP_LEGACY_COMMON=m
+CONFIG_SND_SOC_AMD_ACP_I2S=m
+CONFIG_SND_SOC_AMD_ACP_PCM=m
+CONFIG_SND_SOC_AMD_ACP_PCI=m
+CONFIG_SND_AMD_ASOC_RENOIR=m
+CONFIG_SND_AMD_ASOC_REMBRANDT=m
+CONFIG_SND_AMD_ASOC_ACP63=m
+CONFIG_SND_AMD_ASOC_ACP70=m
+CONFIG_SND_SOC_AMD_MACH_COMMON=m
+CONFIG_SND_SOC_AMD_LEGACY_MACH=m
+CONFIG_SND_SOC_AMD_SOF_MACH=m
+CONFIG_SND_AMD_SOUNDWIRE_ACPI=m
+CONFIG_SND_SOC_AMD_RPL_ACP6x=m
+CONFIG_SND_SOC_AMD_ACP63_TOPLEVEL=m
+CONFIG_SND_SOC_AMD_SOUNDWIRE_LINK_BASELINE=m
+CONFIG_SND_SOC_AMD_SOUNDWIRE=m
+CONFIG_SND_SOC_AMD_PS=m
+CONFIG_SND_SOC_AMD_PS_MACH=m
+CONFIG_SND_ATMEL_SOC=m
+# CONFIG_SND_BCM63XX_I2S_WHISTLER is not set
+CONFIG_SND_DESIGNWARE_I2S=m
+CONFIG_SND_DESIGNWARE_PCM=y
+
+#
+# SoC Audio for Freescale CPUs
+#
+
+#
+# Common SoC Audio options for Freescale CPUs:
+#
+# CONFIG_SND_SOC_FSL_ASRC is not set
+# CONFIG_SND_SOC_FSL_SAI is not set
+# CONFIG_SND_SOC_FSL_AUDMIX is not set
+# CONFIG_SND_SOC_FSL_SSI is not set
+# CONFIG_SND_SOC_FSL_SPDIF is not set
+# CONFIG_SND_SOC_FSL_ESAI is not set
+# CONFIG_SND_SOC_FSL_MICFIL is not set
+CONFIG_SND_SOC_FSL_XCVR=m
+CONFIG_SND_SOC_FSL_UTILS=m
+# CONFIG_SND_SOC_IMX_AUDMUX is not set
+# end of SoC Audio for Freescale CPUs
+
+CONFIG_SND_SOC_CHV3_I2S=m
+CONFIG_SND_I2S_HI6210_I2S=m
+CONFIG_SND_SOC_IMG=y
+CONFIG_SND_SOC_IMG_I2S_IN=m
+CONFIG_SND_SOC_IMG_I2S_OUT=m
+CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
+CONFIG_SND_SOC_IMG_SPDIF_IN=m
+CONFIG_SND_SOC_IMG_SPDIF_OUT=m
+CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
+CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
+CONFIG_SND_SOC_INTEL_SST=m
+CONFIG_SND_SOC_INTEL_CATPT=m
+CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
+CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
+CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
+CONFIG_SND_SOC_INTEL_SKYLAKE=m
+CONFIG_SND_SOC_INTEL_SKL=m
+CONFIG_SND_SOC_INTEL_APL=m
+CONFIG_SND_SOC_INTEL_KBL=m
+CONFIG_SND_SOC_INTEL_GLK=m
+CONFIG_SND_SOC_INTEL_CNL=m
+CONFIG_SND_SOC_INTEL_CFL=m
+CONFIG_SND_SOC_INTEL_CML_H=m
+CONFIG_SND_SOC_INTEL_CML_LP=m
+CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
+CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
+CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
+CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
+CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
+CONFIG_SND_SOC_INTEL_AVS=m
+
+#
+# Intel AVS Machine drivers
+#
+
+#
+# Available DSP configurations
+#
+CONFIG_SND_SOC_INTEL_AVS_MACH_DA7219=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_DMIC=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_ES8336=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_HDAUDIO=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_I2S_TEST=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98927=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m
+# end of Intel AVS Machine drivers
+
+CONFIG_SND_SOC_INTEL_MACH=y
+CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES=y
+CONFIG_SND_SOC_INTEL_HDA_DSP_COMMON=m
+CONFIG_SND_SOC_INTEL_SOF_MAXIM_COMMON=m
+CONFIG_SND_SOC_INTEL_SOF_REALTEK_COMMON=m
+CONFIG_SND_SOC_INTEL_SOF_CIRRUS_COMMON=m
+CONFIG_SND_SOC_INTEL_SOF_NUVOTON_COMMON=m
+CONFIG_SND_SOC_INTEL_SOF_BOARD_HELPERS=m
+CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
+CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
+CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
+CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
+CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
+CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
+CONFIG_SND_SOC_INTEL_BYTCR_WM5102_MACH=m
+CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
+CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH=m
+CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH=m
+CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
+CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
+CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
+CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
+# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
+CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
+CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
+CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
+CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_WM8804_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
+CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
+CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_SSP_AMP_MACH=m
+CONFIG_SND_SOC_INTEL_EHL_RT5660_MACH=m
+CONFIG_SND_SOC_INTEL_SOUNDWIRE_SOF_MACH=m
+CONFIG_SND_SOC_MTK_BTCVSD=m
+CONFIG_SND_SOC_SOF_TOPLEVEL=y
+CONFIG_SND_SOC_SOF_PCI_DEV=m
+CONFIG_SND_SOC_SOF_PCI=m
+CONFIG_SND_SOC_SOF_ACPI=m
+CONFIG_SND_SOC_SOF_ACPI_DEV=m
+CONFIG_SND_SOC_SOF_DEBUG_PROBES=m
+CONFIG_SND_SOC_SOF_CLIENT=m
+CONFIG_SND_SOC_SOF=m
+CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
+CONFIG_SND_SOC_SOF_IPC3=y
+CONFIG_SND_SOC_SOF_IPC4=y
+CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m
+CONFIG_SND_SOC_SOF_AMD_COMMON=m
+CONFIG_SND_SOC_SOF_AMD_RENOIR=m
+CONFIG_SND_SOC_SOF_AMD_VANGOGH=m
+CONFIG_SND_SOC_SOF_AMD_REMBRANDT=m
+CONFIG_SND_SOC_SOF_ACP_PROBES=m
+CONFIG_SND_SOC_SOF_AMD_SOUNDWIRE_LINK_BASELINE=m
+CONFIG_SND_SOC_SOF_AMD_SOUNDWIRE=m
+CONFIG_SND_SOC_SOF_AMD_ACP63=m
+CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y
+CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m
+CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m
+CONFIG_SND_SOC_SOF_INTEL_COMMON=m
+CONFIG_SND_SOC_SOF_BAYTRAIL=m
+# CONFIG_SND_SOC_SOF_BROADWELL is not set
+CONFIG_SND_SOC_SOF_MERRIFIELD=m
+# CONFIG_SND_SOC_SOF_SKYLAKE is not set
+# CONFIG_SND_SOC_SOF_KABYLAKE is not set
+CONFIG_SND_SOC_SOF_INTEL_APL=m
+CONFIG_SND_SOC_SOF_APOLLOLAKE=m
+CONFIG_SND_SOC_SOF_GEMINILAKE=m
+CONFIG_SND_SOC_SOF_INTEL_CNL=m
+CONFIG_SND_SOC_SOF_CANNONLAKE=m
+CONFIG_SND_SOC_SOF_COFFEELAKE=m
+CONFIG_SND_SOC_SOF_COMETLAKE=m
+CONFIG_SND_SOC_SOF_INTEL_ICL=m
+CONFIG_SND_SOC_SOF_ICELAKE=m
+CONFIG_SND_SOC_SOF_JASPERLAKE=m
+CONFIG_SND_SOC_SOF_INTEL_TGL=m
+CONFIG_SND_SOC_SOF_TIGERLAKE=m
+CONFIG_SND_SOC_SOF_ELKHARTLAKE=m
+CONFIG_SND_SOC_SOF_ALDERLAKE=m
+CONFIG_SND_SOC_SOF_INTEL_MTL=m
+CONFIG_SND_SOC_SOF_METEORLAKE=m
+CONFIG_SND_SOC_SOF_INTEL_LNL=m
+CONFIG_SND_SOC_SOF_LUNARLAKE=m
+CONFIG_SND_SOC_SOF_HDA_COMMON=m
+CONFIG_SND_SOC_SOF_HDA_GENERIC=m
+CONFIG_SND_SOC_SOF_HDA_MLINK=m
+CONFIG_SND_SOC_SOF_HDA_LINK=y
+CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y
+CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m
+CONFIG_SND_SOC_SOF_HDA=m
+CONFIG_SND_SOC_SOF_HDA_PROBES=m
+CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=m
+CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE=m
+CONFIG_SND_SOC_SOF_XTENSA=m
+
+#
+# STMicroelectronics STM32 SOC audio support
+#
+# end of STMicroelectronics STM32 SOC audio support
+
+CONFIG_SND_SOC_XILINX_I2S=m
+CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
+CONFIG_SND_SOC_XILINX_SPDIF=m
+CONFIG_SND_SOC_XTFPGA_I2S=m
+CONFIG_SND_SOC_I2C_AND_SPI=m
+
+#
+# CODEC drivers
+#
+CONFIG_SND_SOC_ARIZONA=m
+CONFIG_SND_SOC_WM_ADSP=m
+CONFIG_SND_SOC_AC97_CODEC=m
+CONFIG_SND_SOC_ADAU_UTILS=m
+CONFIG_SND_SOC_ADAU1372=m
+CONFIG_SND_SOC_ADAU1372_I2C=m
+CONFIG_SND_SOC_ADAU1372_SPI=m
+CONFIG_SND_SOC_ADAU1701=m
+CONFIG_SND_SOC_ADAU17X1=m
+CONFIG_SND_SOC_ADAU1761=m
+CONFIG_SND_SOC_ADAU1761_I2C=m
+CONFIG_SND_SOC_ADAU1761_SPI=m
+CONFIG_SND_SOC_ADAU7002=m
+CONFIG_SND_SOC_ADAU7118=m
+CONFIG_SND_SOC_ADAU7118_HW=m
+CONFIG_SND_SOC_ADAU7118_I2C=m
+CONFIG_SND_SOC_AK4104=m
+CONFIG_SND_SOC_AK4118=m
+CONFIG_SND_SOC_AK4375=m
+CONFIG_SND_SOC_AK4458=m
+CONFIG_SND_SOC_AK4554=m
+CONFIG_SND_SOC_AK4613=m
+CONFIG_SND_SOC_AK4619=m
+CONFIG_SND_SOC_AK4642=m
+CONFIG_SND_SOC_AK5386=m
+CONFIG_SND_SOC_AK5558=m
+CONFIG_SND_SOC_ALC5623=m
+CONFIG_SND_SOC_AUDIO_IIO_AUX=m
+CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW88395_LIB=m
+CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88261=m
+CONFIG_SND_SOC_AW87390=m
+CONFIG_SND_SOC_AW88399=m
+CONFIG_SND_SOC_BD28623=m
+# CONFIG_SND_SOC_BT_SCO is not set
+CONFIG_SND_SOC_CHV3_CODEC=m
+CONFIG_SND_SOC_CROS_EC_CODEC=m
+CONFIG_SND_SOC_CS_AMP_LIB=m
+CONFIG_SND_SOC_CS35L32=m
+CONFIG_SND_SOC_CS35L33=m
+CONFIG_SND_SOC_CS35L34=m
+CONFIG_SND_SOC_CS35L35=m
+CONFIG_SND_SOC_CS35L36=m
+CONFIG_SND_SOC_CS35L41_LIB=m
+CONFIG_SND_SOC_CS35L41=m
+CONFIG_SND_SOC_CS35L41_SPI=m
+CONFIG_SND_SOC_CS35L41_I2C=m
+CONFIG_SND_SOC_CS35L45=m
+CONFIG_SND_SOC_CS35L45_SPI=m
+CONFIG_SND_SOC_CS35L45_I2C=m
+CONFIG_SND_SOC_CS35L56=m
+CONFIG_SND_SOC_CS35L56_SHARED=m
+CONFIG_SND_SOC_CS35L56_I2C=m
+CONFIG_SND_SOC_CS35L56_SPI=m
+CONFIG_SND_SOC_CS35L56_SDW=m
+CONFIG_SND_SOC_CS40L50=m
+CONFIG_SND_SOC_CS42L42_CORE=m
+CONFIG_SND_SOC_CS42L42=m
+CONFIG_SND_SOC_CS42L42_SDW=m
+CONFIG_SND_SOC_CS42L43=m
+CONFIG_SND_SOC_CS42L43_SDW=m
+CONFIG_SND_SOC_CS42L51=m
+CONFIG_SND_SOC_CS42L51_I2C=m
+CONFIG_SND_SOC_CS42L52=m
+CONFIG_SND_SOC_CS42L56=m
+CONFIG_SND_SOC_CS42L73=m
+CONFIG_SND_SOC_CS42L83=m
+CONFIG_SND_SOC_CS4234=m
+CONFIG_SND_SOC_CS4265=m
+CONFIG_SND_SOC_CS4270=m
+CONFIG_SND_SOC_CS4271=m
+CONFIG_SND_SOC_CS4271_I2C=m
+CONFIG_SND_SOC_CS4271_SPI=m
+CONFIG_SND_SOC_CS42XX8=m
+CONFIG_SND_SOC_CS42XX8_I2C=m
+CONFIG_SND_SOC_CS43130=m
+CONFIG_SND_SOC_CS4341=m
+CONFIG_SND_SOC_CS4349=m
+CONFIG_SND_SOC_CS53L30=m
+CONFIG_SND_SOC_CS530X=m
+CONFIG_SND_SOC_CS530X_I2C=m
+CONFIG_SND_SOC_CX2072X=m
+CONFIG_SND_SOC_DA7213=m
+CONFIG_SND_SOC_DA7219=m
+CONFIG_SND_SOC_DMIC=m
+CONFIG_SND_SOC_HDMI_CODEC=m
+CONFIG_SND_SOC_ES7134=m
+CONFIG_SND_SOC_ES7241=m
+CONFIG_SND_SOC_ES83XX_DSM_COMMON=m
+CONFIG_SND_SOC_ES8311=m
+CONFIG_SND_SOC_ES8316=m
+CONFIG_SND_SOC_ES8326=m
+CONFIG_SND_SOC_ES8328=m
+CONFIG_SND_SOC_ES8328_I2C=m
+CONFIG_SND_SOC_ES8328_SPI=m
+CONFIG_SND_SOC_GTM601=m
+CONFIG_SND_SOC_HDAC_HDMI=m
+CONFIG_SND_SOC_HDAC_HDA=m
+CONFIG_SND_SOC_HDA=m
+CONFIG_SND_SOC_ICS43432=m
+CONFIG_SND_SOC_IDT821034=m
+CONFIG_SND_SOC_MAX98088=m
+CONFIG_SND_SOC_MAX98090=m
+CONFIG_SND_SOC_MAX98357A=m
+CONFIG_SND_SOC_MAX98504=m
+CONFIG_SND_SOC_MAX9867=m
+CONFIG_SND_SOC_MAX98927=m
+CONFIG_SND_SOC_MAX98520=m
+CONFIG_SND_SOC_MAX98363=m
+CONFIG_SND_SOC_MAX98373=m
+CONFIG_SND_SOC_MAX98373_I2C=m
+CONFIG_SND_SOC_MAX98373_SDW=m
+CONFIG_SND_SOC_MAX98388=m
+CONFIG_SND_SOC_MAX98390=m
+CONFIG_SND_SOC_MAX98396=m
+CONFIG_SND_SOC_MAX9860=m
+CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
+CONFIG_SND_SOC_PCM1681=m
+CONFIG_SND_SOC_PCM1789=m
+CONFIG_SND_SOC_PCM1789_I2C=m
+CONFIG_SND_SOC_PCM179X=m
+CONFIG_SND_SOC_PCM179X_I2C=m
+CONFIG_SND_SOC_PCM179X_SPI=m
+CONFIG_SND_SOC_PCM186X=m
+CONFIG_SND_SOC_PCM186X_I2C=m
+CONFIG_SND_SOC_PCM186X_SPI=m
+CONFIG_SND_SOC_PCM3060=m
+CONFIG_SND_SOC_PCM3060_I2C=m
+CONFIG_SND_SOC_PCM3060_SPI=m
+CONFIG_SND_SOC_PCM3168A=m
+CONFIG_SND_SOC_PCM3168A_I2C=m
+CONFIG_SND_SOC_PCM3168A_SPI=m
+CONFIG_SND_SOC_PCM5102A=m
+CONFIG_SND_SOC_PCM512x=m
+CONFIG_SND_SOC_PCM512x_I2C=m
+CONFIG_SND_SOC_PCM512x_SPI=m
+CONFIG_SND_SOC_PCM6240=m
+CONFIG_SND_SOC_PEB2466=m
+CONFIG_SND_SOC_RL6231=m
+CONFIG_SND_SOC_RL6347A=m
+CONFIG_SND_SOC_RT274=m
+CONFIG_SND_SOC_RT286=m
+CONFIG_SND_SOC_RT298=m
+CONFIG_SND_SOC_RT1011=m
+CONFIG_SND_SOC_RT1015=m
+CONFIG_SND_SOC_RT1015P=m
+CONFIG_SND_SOC_RT1017_SDCA_SDW=m
+CONFIG_SND_SOC_RT1019=m
+CONFIG_SND_SOC_RT1308=m
+CONFIG_SND_SOC_RT1308_SDW=m
+CONFIG_SND_SOC_RT1316_SDW=m
+CONFIG_SND_SOC_RT1318_SDW=m
+CONFIG_SND_SOC_RT1320_SDW=m
+CONFIG_SND_SOC_RT5514=m
+CONFIG_SND_SOC_RT5514_SPI=m
+CONFIG_SND_SOC_RT5616=m
+CONFIG_SND_SOC_RT5631=m
+CONFIG_SND_SOC_RT5640=m
+CONFIG_SND_SOC_RT5645=m
+CONFIG_SND_SOC_RT5651=m
+CONFIG_SND_SOC_RT5659=m
+CONFIG_SND_SOC_RT5660=m
+CONFIG_SND_SOC_RT5663=m
+CONFIG_SND_SOC_RT5670=m
+CONFIG_SND_SOC_RT5677=m
+CONFIG_SND_SOC_RT5677_SPI=m
+CONFIG_SND_SOC_RT5682=m
+CONFIG_SND_SOC_RT5682_I2C=m
+CONFIG_SND_SOC_RT5682_SDW=m
+CONFIG_SND_SOC_RT5682S=m
+CONFIG_SND_SOC_RT700=m
+CONFIG_SND_SOC_RT700_SDW=m
+CONFIG_SND_SOC_RT711=m
+CONFIG_SND_SOC_RT711_SDW=m
+CONFIG_SND_SOC_RT711_SDCA_SDW=m
+CONFIG_SND_SOC_RT712_SDCA_SDW=m
+CONFIG_SND_SOC_RT712_SDCA_DMIC_SDW=m
+CONFIG_SND_SOC_RT722_SDCA_SDW=m
+CONFIG_SND_SOC_RT715=m
+CONFIG_SND_SOC_RT715_SDW=m
+CONFIG_SND_SOC_RT715_SDCA_SDW=m
+CONFIG_SND_SOC_RT9120=m
+CONFIG_SND_SOC_RTQ9128=m
+# CONFIG_SND_SOC_SDW_MOCKUP is not set
+CONFIG_SND_SOC_SGTL5000=m
+CONFIG_SND_SOC_SI476X=m
+CONFIG_SND_SOC_SIGMADSP=m
+CONFIG_SND_SOC_SIGMADSP_I2C=m
+CONFIG_SND_SOC_SIGMADSP_REGMAP=m
+CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
+CONFIG_SND_SOC_SIMPLE_MUX=m
+CONFIG_SND_SOC_SMA1303=m
+CONFIG_SND_SOC_SPDIF=m
+CONFIG_SND_SOC_SRC4XXX_I2C=m
+CONFIG_SND_SOC_SRC4XXX=m
+CONFIG_SND_SOC_SSM2305=m
+CONFIG_SND_SOC_SSM2518=m
+CONFIG_SND_SOC_SSM2602=m
+CONFIG_SND_SOC_SSM2602_SPI=m
+CONFIG_SND_SOC_SSM2602_I2C=m
+CONFIG_SND_SOC_SSM4567=m
+CONFIG_SND_SOC_STA32X=m
+CONFIG_SND_SOC_STA350=m
+CONFIG_SND_SOC_STI_SAS=m
+CONFIG_SND_SOC_TAS2552=m
+CONFIG_SND_SOC_TAS2562=m
+CONFIG_SND_SOC_TAS2764=m
+CONFIG_SND_SOC_TAS2770=m
+CONFIG_SND_SOC_TAS2780=m
+CONFIG_SND_SOC_TAS2781_COMLIB=m
+CONFIG_SND_SOC_TAS2781_FMWLIB=m
+CONFIG_SND_SOC_TAS2781_I2C=m
+CONFIG_SND_SOC_TAS5086=m
+CONFIG_SND_SOC_TAS571X=m
+CONFIG_SND_SOC_TAS5720=m
+CONFIG_SND_SOC_TAS5805M=m
+CONFIG_SND_SOC_TAS6424=m
+CONFIG_SND_SOC_TDA7419=m
+CONFIG_SND_SOC_TFA9879=m
+CONFIG_SND_SOC_TFA989X=m
+CONFIG_SND_SOC_TLV320ADC3XXX=m
+CONFIG_SND_SOC_TLV320AIC23=m
+CONFIG_SND_SOC_TLV320AIC23_I2C=m
+CONFIG_SND_SOC_TLV320AIC23_SPI=m
+CONFIG_SND_SOC_TLV320AIC31XX=m
+CONFIG_SND_SOC_TLV320AIC32X4=m
+CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
+CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
+CONFIG_SND_SOC_TLV320AIC3X=m
+CONFIG_SND_SOC_TLV320AIC3X_I2C=m
+CONFIG_SND_SOC_TLV320AIC3X_SPI=m
+CONFIG_SND_SOC_TLV320ADCX140=m
+CONFIG_SND_SOC_TS3A227E=m
+CONFIG_SND_SOC_TSCS42XX=m
+CONFIG_SND_SOC_TSCS454=m
+CONFIG_SND_SOC_UDA1334=m
+CONFIG_SND_SOC_WCD_CLASSH=m
+CONFIG_SND_SOC_WCD9335=m
+CONFIG_SND_SOC_WCD_MBHC=m
+CONFIG_SND_SOC_WCD934X=m
+CONFIG_SND_SOC_WCD937X=m
+CONFIG_SND_SOC_WCD937X_SDW=m
+CONFIG_SND_SOC_WCD938X=m
+CONFIG_SND_SOC_WCD938X_SDW=m
+CONFIG_SND_SOC_WCD939X=m
+CONFIG_SND_SOC_WCD939X_SDW=m
+CONFIG_SND_SOC_WM5102=m
+CONFIG_SND_SOC_WM8510=m
+CONFIG_SND_SOC_WM8523=m
+CONFIG_SND_SOC_WM8524=m
+CONFIG_SND_SOC_WM8580=m
+CONFIG_SND_SOC_WM8711=m
+CONFIG_SND_SOC_WM8728=m
+CONFIG_SND_SOC_WM8731=m
+CONFIG_SND_SOC_WM8731_I2C=m
+CONFIG_SND_SOC_WM8731_SPI=m
+CONFIG_SND_SOC_WM8737=m
+CONFIG_SND_SOC_WM8741=m
+CONFIG_SND_SOC_WM8750=m
+CONFIG_SND_SOC_WM8753=m
+CONFIG_SND_SOC_WM8770=m
+CONFIG_SND_SOC_WM8776=m
+CONFIG_SND_SOC_WM8782=m
+CONFIG_SND_SOC_WM8804=m
+CONFIG_SND_SOC_WM8804_I2C=m
+CONFIG_SND_SOC_WM8804_SPI=m
+CONFIG_SND_SOC_WM8903=m
+CONFIG_SND_SOC_WM8904=m
+CONFIG_SND_SOC_WM8940=m
+CONFIG_SND_SOC_WM8960=m
+CONFIG_SND_SOC_WM8961=m
+CONFIG_SND_SOC_WM8962=m
+CONFIG_SND_SOC_WM8974=m
+CONFIG_SND_SOC_WM8978=m
+CONFIG_SND_SOC_WM8985=m
+CONFIG_SND_SOC_WSA881X=m
+CONFIG_SND_SOC_WSA883X=m
+CONFIG_SND_SOC_WSA884X=m
+CONFIG_SND_SOC_ZL38060=m
+CONFIG_SND_SOC_MAX9759=m
+CONFIG_SND_SOC_MT6351=m
+CONFIG_SND_SOC_MT6358=m
+CONFIG_SND_SOC_MT6660=m
+CONFIG_SND_SOC_NAU8315=m
+CONFIG_SND_SOC_NAU8540=m
+CONFIG_SND_SOC_NAU8810=m
+CONFIG_SND_SOC_NAU8821=m
+CONFIG_SND_SOC_NAU8822=m
+CONFIG_SND_SOC_NAU8824=m
+CONFIG_SND_SOC_NAU8825=m
+CONFIG_SND_SOC_TPA6130A2=m
+CONFIG_SND_SOC_LPASS_MACRO_COMMON=m
+CONFIG_SND_SOC_LPASS_WSA_MACRO=m
+CONFIG_SND_SOC_LPASS_VA_MACRO=m
+CONFIG_SND_SOC_LPASS_RX_MACRO=m
+CONFIG_SND_SOC_LPASS_TX_MACRO=m
+# end of CODEC drivers
+
+CONFIG_SND_SIMPLE_CARD_UTILS=m
+CONFIG_SND_SIMPLE_CARD=m
+CONFIG_SND_X86=y
+CONFIG_HDMI_LPE_AUDIO=m
+CONFIG_SND_SYNTH_EMUX=m
+CONFIG_SND_XEN_FRONTEND=m
+CONFIG_SND_VIRTIO=m
+CONFIG_AC97_BUS=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+CONFIG_HID_BATTERY_STRENGTH=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=m
+CONFIG_HID_GENERIC=m
+
+#
+# Special HID drivers
+#
+CONFIG_HID_A4TECH=m
+CONFIG_HID_ACCUTOUCH=m
+CONFIG_HID_ACRUX=m
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=m
+CONFIG_HID_APPLEIR=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_ASUS=m
+CONFIG_HID_AUREAL=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_BETOP_FF=m
+CONFIG_HID_BIGBEN_FF=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CORSAIR=m
+CONFIG_HID_COUGAR=m
+CONFIG_HID_MACALLY=m
+CONFIG_HID_PRODIKEYS=m
+CONFIG_HID_CMEDIA=m
+CONFIG_HID_CP2112=m
+CONFIG_HID_CREATIVE_SB0540=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_DRAGONRISE=m
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=m
+CONFIG_HID_ELAN=m
+CONFIG_HID_ELECOM=m
+CONFIG_HID_ELO=m
+CONFIG_HID_EVISION=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_FT260=m
+CONFIG_HID_GEMBIRD=m
+CONFIG_HID_GFRM=m
+CONFIG_HID_GLORIOUS=m
+CONFIG_HID_HOLTEK=m
+CONFIG_HOLTEK_FF=y
+CONFIG_HID_VIVALDI_COMMON=m
+CONFIG_HID_GOOGLE_HAMMER=m
+CONFIG_HID_GOOGLE_STADIA_FF=m
+CONFIG_HID_VIVALDI=m
+CONFIG_HID_GT683R=m
+CONFIG_HID_KEYTOUCH=m
+CONFIG_HID_KYE=m
+CONFIG_HID_UCLOGIC=m
+CONFIG_HID_WALTOP=m
+CONFIG_HID_VIEWSONIC=m
+CONFIG_HID_VRC2=m
+CONFIG_HID_XIAOMI=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_ICADE=m
+CONFIG_HID_ITE=m
+CONFIG_HID_JABRA=m
+CONFIG_HID_TWINHAN=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LCPOWER=m
+CONFIG_HID_LED=m
+CONFIG_HID_LENOVO=m
+CONFIG_HID_LETSKETCH=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_LOGITECH_DJ=m
+CONFIG_HID_LOGITECH_HIDPP=m
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_LOGIWHEELS_FF=y
+CONFIG_HID_MAGICMOUSE=m
+CONFIG_HID_MALTRON=m
+CONFIG_HID_MAYFLASH=m
+CONFIG_HID_MEGAWORLD_FF=m
+CONFIG_HID_REDRAGON=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_MULTITOUCH=m
+CONFIG_HID_NINTENDO=m
+CONFIG_NINTENDO_FF=y
+CONFIG_HID_NTI=m
+CONFIG_HID_NTRIG=m
+CONFIG_HID_NVIDIA_SHIELD=m
+CONFIG_NVIDIA_SHIELD_FF=y
+CONFIG_HID_ORTEK=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PENMOUNT=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_PICOLCD=m
+CONFIG_HID_PICOLCD_FB=y
+CONFIG_HID_PICOLCD_BACKLIGHT=y
+CONFIG_HID_PICOLCD_LCD=y
+CONFIG_HID_PICOLCD_LEDS=y
+CONFIG_HID_PICOLCD_CIR=y
+CONFIG_HID_PLANTRONICS=m
+CONFIG_HID_PLAYSTATION=m
+CONFIG_PLAYSTATION_FF=y
+CONFIG_HID_PXRC=m
+CONFIG_HID_RAZER=m
+CONFIG_HID_PRIMAX=m
+CONFIG_HID_RETRODE=m
+CONFIG_HID_ROCCAT=m
+CONFIG_HID_SAITEK=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SEMITEK=m
+CONFIG_HID_SIGMAMICRO=m
+CONFIG_HID_SONY=m
+CONFIG_SONY_FF=y
+CONFIG_HID_SPEEDLINK=m
+CONFIG_HID_STEAM=m
+CONFIG_STEAM_FF=y
+CONFIG_HID_STEELSERIES=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_RMI=m
+CONFIG_HID_GREENASIA=m
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_HYPERV_MOUSE=m
+CONFIG_HID_SMARTJOYPLUS=m
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=m
+CONFIG_HID_TOPSEED=m
+CONFIG_HID_TOPRE=m
+CONFIG_HID_THINGM=m
+CONFIG_HID_THRUSTMASTER=m
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_HID_UDRAW_PS3=m
+CONFIG_HID_U2FZERO=m
+CONFIG_HID_WACOM=m
+CONFIG_HID_WIIMOTE=m
+CONFIG_HID_WINWING=m
+CONFIG_HID_XINMO=m
+CONFIG_HID_ZEROPLUS=m
+CONFIG_ZEROPLUS_FF=y
+CONFIG_HID_ZYDACRON=m
+CONFIG_HID_SENSOR_HUB=m
+CONFIG_HID_SENSOR_CUSTOM_SENSOR=m
+CONFIG_HID_ALPS=m
+CONFIG_HID_MCP2200=m
+CONFIG_HID_MCP2221=m
+# end of Special HID drivers
+
+#
+# HID-BPF support
+#
+CONFIG_HID_BPF=y
+# end of HID-BPF support
+
+#
+# USB HID support
+#
+CONFIG_USB_HID=m
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+# end of USB HID support
+
+CONFIG_I2C_HID=m
+CONFIG_I2C_HID_ACPI=m
+CONFIG_I2C_HID_OF=m
+CONFIG_I2C_HID_CORE=m
+
+#
+# Intel ISH HID support
+#
+CONFIG_INTEL_ISH_HID=m
+CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
+# end of Intel ISH HID support
+
+#
+# AMD SFH HID Support
+#
+CONFIG_AMD_SFH_HID=m
+# end of AMD SFH HID Support
+
+#
+# Surface System Aggregator Module HID support
+#
+CONFIG_SURFACE_HID=m
+CONFIG_SURFACE_KBD=m
+# end of Surface System Aggregator Module HID support
+
+CONFIG_SURFACE_HID_CORE=m
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_COMMON=y
+CONFIG_USB_LED_TRIG=y
+CONFIG_USB_ULPI_BUS=m
+CONFIG_USB_CONN_GPIO=m
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB=y
+CONFIG_USB_PCI=y
+CONFIG_USB_PCI_AMD=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEFAULT_PERSIST=y
+# CONFIG_USB_FEW_INIT_RETRIES is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+# CONFIG_USB_OTG_PRODUCTLIST is not set
+CONFIG_USB_LEDS_TRIGGER_USBPORT=m
+CONFIG_USB_AUTOSUSPEND_DELAY=2
+CONFIG_USB_DEFAULT_AUTHORIZATION_MODE=1
+CONFIG_USB_MON=m
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_C67X00_HCD=m
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_DBGCAP=y
+CONFIG_USB_XHCI_PCI=m
+CONFIG_USB_XHCI_PCI_RENESAS=m
+CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_EHCI_TT_NEWSCHED=y
+CONFIG_USB_EHCI_PCI=y
+CONFIG_USB_EHCI_FSL=m
+CONFIG_USB_EHCI_HCD_PLATFORM=m
+CONFIG_USB_OXU210HP_HCD=m
+CONFIG_USB_ISP116X_HCD=m
+CONFIG_USB_MAX3421_HCD=m
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PCI=y
+CONFIG_USB_OHCI_HCD_PLATFORM=m
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_SL811_HCD=m
+# CONFIG_USB_SL811_HCD_ISO is not set
+CONFIG_USB_SL811_CS=m
+CONFIG_USB_R8A66597_HCD=m
+CONFIG_USB_HCD_BCMA=m
+CONFIG_USB_HCD_SSB=m
+# CONFIG_USB_HCD_TEST_MODE is not set
+CONFIG_USB_XEN_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_WDM=m
+CONFIG_USB_TMC=m
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_REALTEK=m
+CONFIG_REALTEK_AUTOPM=y
+CONFIG_USB_STORAGE_DATAFAB=m
+CONFIG_USB_STORAGE_FREECOM=m
+CONFIG_USB_STORAGE_ISD200=m
+CONFIG_USB_STORAGE_USBAT=m
+CONFIG_USB_STORAGE_SDDR09=m
+CONFIG_USB_STORAGE_SDDR55=m
+CONFIG_USB_STORAGE_JUMPSHOT=m
+CONFIG_USB_STORAGE_ALAUDA=m
+CONFIG_USB_STORAGE_ONETOUCH=m
+CONFIG_USB_STORAGE_KARMA=m
+CONFIG_USB_STORAGE_CYPRESS_ATACB=m
+CONFIG_USB_STORAGE_ENE_UB6250=m
+CONFIG_USB_UAS=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USBIP_CORE=m
+CONFIG_USBIP_VHCI_HCD=m
+CONFIG_USBIP_VHCI_HC_PORTS=8
+CONFIG_USBIP_VHCI_NR_HCS=1
+CONFIG_USBIP_HOST=m
+CONFIG_USBIP_VUDC=m
+# CONFIG_USBIP_DEBUG is not set
+
+#
+# USB dual-mode controller drivers
+#
+CONFIG_USB_CDNS_SUPPORT=m
+CONFIG_USB_CDNS_HOST=y
+CONFIG_USB_CDNS3=m
+CONFIG_USB_CDNS3_GADGET=y
+CONFIG_USB_CDNS3_HOST=y
+CONFIG_USB_CDNS3_PCI_WRAP=m
+CONFIG_USB_CDNSP_PCI=m
+CONFIG_USB_CDNSP_GADGET=y
+CONFIG_USB_CDNSP_HOST=y
+CONFIG_USB_MUSB_HDRC=m
+# CONFIG_USB_MUSB_HOST is not set
+# CONFIG_USB_MUSB_GADGET is not set
+CONFIG_USB_MUSB_DUAL_ROLE=y
+
+#
+# Platform Glue Layer
+#
+
+#
+# MUSB DMA mode
+#
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_DWC3=m
+CONFIG_USB_DWC3_ULPI=y
+# CONFIG_USB_DWC3_HOST is not set
+# CONFIG_USB_DWC3_GADGET is not set
+CONFIG_USB_DWC3_DUAL_ROLE=y
+
+#
+# Platform Glue Driver Support
+#
+CONFIG_USB_DWC3_PCI=m
+CONFIG_USB_DWC3_HAPS=m
+CONFIG_USB_DWC2=m
+# CONFIG_USB_DWC2_HOST is not set
+
+#
+# Gadget/Dual-role mode requires USB Gadget support to be enabled
+#
+# CONFIG_USB_DWC2_PERIPHERAL is not set
+CONFIG_USB_DWC2_DUAL_ROLE=y
+CONFIG_USB_DWC2_PCI=m
+# CONFIG_USB_DWC2_DEBUG is not set
+# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
+CONFIG_USB_CHIPIDEA=m
+CONFIG_USB_CHIPIDEA_UDC=y
+CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_PCI=m
+CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
+CONFIG_USB_CHIPIDEA_GENERIC=m
+CONFIG_USB_ISP1760=m
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1761_UDC=y
+# CONFIG_USB_ISP1760_HOST_ROLE is not set
+# CONFIG_USB_ISP1760_GADGET_ROLE is not set
+CONFIG_USB_ISP1760_DUAL_ROLE=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_AIRCABLE=m
+CONFIG_USB_SERIAL_ARK3116=m
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_CP210X=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_F81232=m
+CONFIG_USB_SERIAL_F8153X=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_IPW=m
+CONFIG_USB_SERIAL_IUU=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_METRO=m
+CONFIG_USB_SERIAL_MOS7720=m
+CONFIG_USB_SERIAL_MOS7715_PARPORT=y
+CONFIG_USB_SERIAL_MOS7840=m
+CONFIG_USB_SERIAL_MXUPORT=m
+CONFIG_USB_SERIAL_NAVMAN=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_QCAUX=m
+CONFIG_USB_SERIAL_QUALCOMM=m
+CONFIG_USB_SERIAL_SPCP8X5=m
+CONFIG_USB_SERIAL_SAFE=m
+# CONFIG_USB_SERIAL_SAFE_PADDED is not set
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_SYMBOL=m
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_WWAN=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_SERIAL_OPTICON=m
+CONFIG_USB_SERIAL_XSENS_MT=m
+CONFIG_USB_SERIAL_WISHBONE=m
+CONFIG_USB_SERIAL_SSU100=m
+CONFIG_USB_SERIAL_QT2=m
+CONFIG_USB_SERIAL_UPD78F0730=m
+CONFIG_USB_SERIAL_XR=m
+CONFIG_USB_SERIAL_DEBUG=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_USS720=m
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_ADUTUX=m
+CONFIG_USB_SEVSEG=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_CYPRESS_CY7C63=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_IDMOUSE=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_APPLE_MFI_FASTCHARGE=m
+CONFIG_USB_LJCA=m
+CONFIG_USB_SISUSBVGA=m
+CONFIG_USB_LD=m
+CONFIG_USB_TRANCEVIBRATOR=m
+CONFIG_USB_IOWARRIOR=m
+CONFIG_USB_TEST=m
+CONFIG_USB_EHSET_TEST_FIXTURE=m
+CONFIG_USB_ISIGHTFW=m
+CONFIG_USB_YUREX=m
+CONFIG_USB_EZUSB_FX2=m
+CONFIG_USB_HUB_USB251XB=m
+CONFIG_USB_HSIC_USB3503=m
+CONFIG_USB_HSIC_USB4604=m
+CONFIG_USB_LINK_LAYER_TEST=m
+CONFIG_USB_CHAOSKEY=m
+CONFIG_USB_ATM=m
+CONFIG_USB_SPEEDTOUCH=m
+CONFIG_USB_CXACRU=m
+CONFIG_USB_UEAGLEATM=m
+CONFIG_USB_XUSBATM=m
+
+#
+# USB Physical Layer drivers
+#
+CONFIG_USB_PHY=y
+CONFIG_NOP_USB_XCEIV=m
+CONFIG_USB_GPIO_VBUS=m
+CONFIG_TAHVO_USB=m
+# CONFIG_TAHVO_USB_HOST_BY_DEFAULT is not set
+CONFIG_USB_ISP1301=m
+# end of USB Physical Layer drivers
+
+CONFIG_USB_GADGET=m
+# CONFIG_USB_GADGET_DEBUG is not set
+# CONFIG_USB_GADGET_DEBUG_FILES is not set
+# CONFIG_USB_GADGET_DEBUG_FS is not set
+CONFIG_USB_GADGET_VBUS_DRAW=2
+CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
+CONFIG_U_SERIAL_CONSOLE=y
+
+#
+# USB Peripheral Controller
+#
+CONFIG_USB_GR_UDC=m
+CONFIG_USB_R8A66597=m
+CONFIG_USB_PXA27X=m
+CONFIG_USB_MV_UDC=m
+CONFIG_USB_MV_U3D=m
+CONFIG_USB_SNP_CORE=m
+CONFIG_USB_M66592=m
+CONFIG_USB_BDC_UDC=m
+CONFIG_USB_AMD5536UDC=m
+CONFIG_USB_NET2272=m
+# CONFIG_USB_NET2272_DMA is not set
+CONFIG_USB_NET2280=m
+CONFIG_USB_GOKU=m
+CONFIG_USB_EG20T=m
+CONFIG_USB_MAX3420_UDC=m
+CONFIG_USB_CDNS2_UDC=m
+CONFIG_USB_DUMMY_HCD=m
+# end of USB Peripheral Controller
+
+CONFIG_USB_LIBCOMPOSITE=m
+CONFIG_USB_F_ACM=m
+CONFIG_USB_F_SS_LB=m
+CONFIG_USB_U_SERIAL=m
+CONFIG_USB_U_ETHER=m
+CONFIG_USB_U_AUDIO=m
+CONFIG_USB_F_SERIAL=m
+CONFIG_USB_F_OBEX=m
+CONFIG_USB_F_NCM=m
+CONFIG_USB_F_ECM=m
+CONFIG_USB_F_PHONET=m
+CONFIG_USB_F_EEM=m
+CONFIG_USB_F_SUBSET=m
+CONFIG_USB_F_RNDIS=m
+CONFIG_USB_F_MASS_STORAGE=m
+CONFIG_USB_F_FS=m
+CONFIG_USB_F_UAC1=m
+CONFIG_USB_F_UAC1_LEGACY=m
+CONFIG_USB_F_UAC2=m
+CONFIG_USB_F_UVC=m
+CONFIG_USB_F_MIDI=m
+CONFIG_USB_F_MIDI2=m
+CONFIG_USB_F_HID=m
+CONFIG_USB_F_PRINTER=m
+CONFIG_USB_F_TCM=m
+CONFIG_USB_CONFIGFS=m
+CONFIG_USB_CONFIGFS_SERIAL=y
+CONFIG_USB_CONFIGFS_ACM=y
+CONFIG_USB_CONFIGFS_OBEX=y
+CONFIG_USB_CONFIGFS_NCM=y
+CONFIG_USB_CONFIGFS_ECM=y
+CONFIG_USB_CONFIGFS_ECM_SUBSET=y
+CONFIG_USB_CONFIGFS_RNDIS=y
+CONFIG_USB_CONFIGFS_EEM=y
+CONFIG_USB_CONFIGFS_PHONET=y
+CONFIG_USB_CONFIGFS_MASS_STORAGE=y
+CONFIG_USB_CONFIGFS_F_LB_SS=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_UAC1=y
+CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
+CONFIG_USB_CONFIGFS_F_UAC2=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_USB_CONFIGFS_F_MIDI2=y
+CONFIG_USB_CONFIGFS_F_HID=y
+CONFIG_USB_CONFIGFS_F_UVC=y
+CONFIG_USB_CONFIGFS_F_PRINTER=y
+CONFIG_USB_CONFIGFS_F_TCM=y
+
+#
+# USB Gadget precomposed configurations
+#
+CONFIG_USB_ZERO=m
+CONFIG_USB_AUDIO=m
+# CONFIG_GADGET_UAC1 is not set
+CONFIG_USB_ETH=m
+CONFIG_USB_ETH_RNDIS=y
+CONFIG_USB_ETH_EEM=y
+CONFIG_USB_G_NCM=m
+CONFIG_USB_GADGETFS=m
+CONFIG_USB_FUNCTIONFS=m
+CONFIG_USB_FUNCTIONFS_ETH=y
+CONFIG_USB_FUNCTIONFS_RNDIS=y
+CONFIG_USB_FUNCTIONFS_GENERIC=y
+CONFIG_USB_MASS_STORAGE=m
+CONFIG_USB_GADGET_TARGET=m
+CONFIG_USB_G_SERIAL=m
+CONFIG_USB_MIDI_GADGET=m
+CONFIG_USB_G_PRINTER=m
+CONFIG_USB_CDC_COMPOSITE=m
+CONFIG_USB_G_NOKIA=m
+CONFIG_USB_G_ACM_MS=m
+CONFIG_USB_G_MULTI=m
+CONFIG_USB_G_MULTI_RNDIS=y
+CONFIG_USB_G_MULTI_CDC=y
+CONFIG_USB_G_HID=m
+CONFIG_USB_G_DBGP=m
+# CONFIG_USB_G_DBGP_PRINTK is not set
+CONFIG_USB_G_DBGP_SERIAL=y
+CONFIG_USB_G_WEBCAM=m
+CONFIG_USB_RAW_GADGET=m
+# end of USB Gadget precomposed configurations
+
+CONFIG_TYPEC=m
+CONFIG_TYPEC_TCPM=m
+CONFIG_TYPEC_TCPCI=m
+CONFIG_TYPEC_RT1711H=m
+CONFIG_TYPEC_MT6360=m
+CONFIG_TYPEC_TCPCI_MT6370=m
+CONFIG_TYPEC_TCPCI_MAXIM=m
+CONFIG_TYPEC_FUSB302=m
+CONFIG_TYPEC_WCOVE=m
+CONFIG_TYPEC_UCSI=m
+CONFIG_UCSI_CCG=m
+CONFIG_UCSI_ACPI=m
+CONFIG_UCSI_STM32G0=m
+CONFIG_TYPEC_TPS6598X=m
+CONFIG_TYPEC_ANX7411=m
+CONFIG_TYPEC_RT1719=m
+CONFIG_TYPEC_HD3SS3220=m
+CONFIG_TYPEC_STUSB160X=m
+CONFIG_TYPEC_WUSB3801=m
+
+#
+# USB Type-C Multiplexer/DeMultiplexer Switch support
+#
+CONFIG_TYPEC_MUX_FSA4480=m
+CONFIG_TYPEC_MUX_GPIO_SBU=m
+CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_INTEL_PMC=m
+CONFIG_TYPEC_MUX_IT5205=m
+CONFIG_TYPEC_MUX_NB7VPQ904M=m
+CONFIG_TYPEC_MUX_PTN36502=m
+CONFIG_TYPEC_MUX_WCD939X_USBSS=m
+# end of USB Type-C Multiplexer/DeMultiplexer Switch support
+
+#
+# USB Type-C Alternate Mode drivers
+#
+CONFIG_TYPEC_DP_ALTMODE=m
+CONFIG_TYPEC_NVIDIA_ALTMODE=m
+# end of USB Type-C Alternate Mode drivers
+
+CONFIG_USB_ROLE_SWITCH=m
+CONFIG_USB_ROLES_INTEL_XHCI=m
+CONFIG_MMC=m
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_BLOCK_MINORS=8
+CONFIG_SDIO_UART=m
+CONFIG_MMC_TEST=m
+CONFIG_MMC_CRYPTO=y
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_SDHCI=m
+CONFIG_MMC_SDHCI_IO_ACCESSORS=y
+CONFIG_MMC_SDHCI_PCI=m
+CONFIG_MMC_RICOH_MMC=y
+CONFIG_MMC_SDHCI_ACPI=m
+CONFIG_MMC_SDHCI_PLTFM=m
+CONFIG_MMC_SDHCI_F_SDH30=m
+CONFIG_MMC_WBSD=m
+CONFIG_MMC_ALCOR=m
+CONFIG_MMC_TIFM_SD=m
+CONFIG_MMC_SPI=m
+CONFIG_MMC_SDRICOH_CS=m
+CONFIG_MMC_CB710=m
+CONFIG_MMC_VIA_SDMMC=m
+CONFIG_MMC_VUB300=m
+CONFIG_MMC_USHC=m
+CONFIG_MMC_USDHI6ROL0=m
+CONFIG_MMC_REALTEK_PCI=m
+CONFIG_MMC_REALTEK_USB=m
+CONFIG_MMC_CQHCI=m
+CONFIG_MMC_HSQ=m
+CONFIG_MMC_TOSHIBA_PCI=m
+CONFIG_MMC_MTK=m
+CONFIG_MMC_SDHCI_XENON=m
+CONFIG_SCSI_UFSHCD=m
+CONFIG_SCSI_UFS_BSG=y
+CONFIG_SCSI_UFS_CRYPTO=y
+CONFIG_SCSI_UFS_HWMON=y
+CONFIG_SCSI_UFSHCD_PCI=m
+# CONFIG_SCSI_UFS_DWC_TC_PCI is not set
+CONFIG_SCSI_UFSHCD_PLATFORM=m
+CONFIG_SCSI_UFS_CDNS_PLATFORM=m
+CONFIG_MEMSTICK=m
+# CONFIG_MEMSTICK_DEBUG is not set
+
+#
+# MemoryStick drivers
+#
+# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
+CONFIG_MSPRO_BLOCK=m
+CONFIG_MS_BLOCK=m
+
+#
+# MemoryStick Host Controller Drivers
+#
+CONFIG_MEMSTICK_TIFM_MS=m
+CONFIG_MEMSTICK_JMICRON_38X=m
+CONFIG_MEMSTICK_R592=m
+CONFIG_MEMSTICK_REALTEK_USB=m
+CONFIG_LEDS_EXPRESSWIRE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_CLASS_FLASH=m
+CONFIG_LEDS_CLASS_MULTICOLOR=m
+CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
+
+#
+# LED drivers
+#
+CONFIG_LEDS_88PM860X=m
+CONFIG_LEDS_APU=m
+CONFIG_LEDS_AW200XX=m
+CONFIG_LEDS_CHT_WCOVE=m
+CONFIG_LEDS_CROS_EC=m
+CONFIG_LEDS_LM3530=m
+CONFIG_LEDS_LM3532=m
+CONFIG_LEDS_LM3533=m
+CONFIG_LEDS_LM3642=m
+CONFIG_LEDS_MT6323=m
+CONFIG_LEDS_PCA9532=m
+CONFIG_LEDS_PCA9532_GPIO=y
+CONFIG_LEDS_GPIO=m
+CONFIG_LEDS_LP3944=m
+CONFIG_LEDS_LP3952=m
+CONFIG_LEDS_LP50XX=m
+CONFIG_LEDS_LP8788=m
+CONFIG_LEDS_PCA955X=m
+CONFIG_LEDS_PCA955X_GPIO=y
+CONFIG_LEDS_PCA963X=m
+CONFIG_LEDS_PCA995X=m
+CONFIG_LEDS_WM831X_STATUS=m
+CONFIG_LEDS_WM8350=m
+CONFIG_LEDS_DA903X=m
+CONFIG_LEDS_DA9052=m
+CONFIG_LEDS_DAC124S085=m
+CONFIG_LEDS_PWM=m
+CONFIG_LEDS_REGULATOR=m
+CONFIG_LEDS_BD2606MVV=m
+CONFIG_LEDS_BD2802=m
+CONFIG_LEDS_INTEL_SS4200=m
+CONFIG_LEDS_LT3593=m
+CONFIG_LEDS_ADP5520=m
+CONFIG_LEDS_MC13783=m
+CONFIG_LEDS_TCA6507=m
+CONFIG_LEDS_TLC591XX=m
+CONFIG_LEDS_MAX8997=m
+CONFIG_LEDS_LM355x=m
+CONFIG_LEDS_MENF21BMC=m
+CONFIG_LEDS_IS31FL319X=m
+
+#
+# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
+#
+CONFIG_LEDS_BLINKM=m
+CONFIG_LEDS_MLXCPLD=m
+CONFIG_LEDS_MLXREG=m
+CONFIG_LEDS_USER=m
+CONFIG_LEDS_NIC78BX=m
+CONFIG_LEDS_SPI_BYTE=m
+CONFIG_LEDS_TI_LMU_COMMON=m
+CONFIG_LEDS_LM36274=m
+CONFIG_LEDS_TPS6105X=m
+
+#
+# Flash and Torch LED drivers
+#
+CONFIG_LEDS_AS3645A=m
+CONFIG_LEDS_LM3601X=m
+CONFIG_LEDS_MT6370_FLASH=m
+CONFIG_LEDS_RT8515=m
+CONFIG_LEDS_SGM3140=m
+
+#
+# RGB LED drivers
+#
+CONFIG_LEDS_KTD202X=m
+CONFIG_LEDS_PWM_MULTICOLOR=m
+CONFIG_LEDS_MT6370_RGB=m
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_DISK=y
+CONFIG_LEDS_TRIGGER_MTD=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_ACTIVITY=m
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+CONFIG_LEDS_TRIGGER_TRANSIENT=m
+CONFIG_LEDS_TRIGGER_CAMERA=m
+CONFIG_LEDS_TRIGGER_PANIC=y
+CONFIG_LEDS_TRIGGER_NETDEV=m
+CONFIG_LEDS_TRIGGER_PATTERN=m
+CONFIG_LEDS_TRIGGER_TTY=m
+CONFIG_LEDS_TRIGGER_INPUT_EVENTS=m
+
+#
+# Simple LED drivers
+#
+CONFIG_LEDS_SIEMENS_SIMATIC_IPC=m
+CONFIG_LEDS_SIEMENS_SIMATIC_IPC_APOLLOLAKE=m
+CONFIG_LEDS_SIEMENS_SIMATIC_IPC_F7188X=m
+CONFIG_LEDS_SIEMENS_SIMATIC_IPC_ELKHARTLAKE=m
+CONFIG_ACCESSIBILITY=y
+CONFIG_A11Y_BRAILLE_CONSOLE=y
+
+#
+# Speakup console speech
+#
+CONFIG_SPEAKUP=m
+CONFIG_SPEAKUP_SYNTH_ACNTSA=m
+CONFIG_SPEAKUP_SYNTH_APOLLO=m
+CONFIG_SPEAKUP_SYNTH_AUDPTR=m
+CONFIG_SPEAKUP_SYNTH_BNS=m
+CONFIG_SPEAKUP_SYNTH_DECTLK=m
+CONFIG_SPEAKUP_SYNTH_DECEXT=m
+CONFIG_SPEAKUP_SYNTH_LTLK=m
+CONFIG_SPEAKUP_SYNTH_SOFT=m
+CONFIG_SPEAKUP_SYNTH_SPKOUT=m
+CONFIG_SPEAKUP_SYNTH_TXPRT=m
+CONFIG_SPEAKUP_SYNTH_DUMMY=m
+# end of Speakup console speech
+
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_USER_MEM=y
+CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
+CONFIG_INFINIBAND_ADDR_TRANS=y
+CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
+CONFIG_INFINIBAND_VIRT_DMA=y
+CONFIG_INFINIBAND_BNXT_RE=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_INFINIBAND_EFA=m
+CONFIG_INFINIBAND_ERDMA=m
+CONFIG_INFINIBAND_HFI1=m
+# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
+# CONFIG_SDMA_VERBOSITY is not set
+CONFIG_INFINIBAND_IRDMA=m
+CONFIG_MANA_INFINIBAND=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
+CONFIG_INFINIBAND_OCRDMA=m
+CONFIG_INFINIBAND_QEDR=m
+CONFIG_INFINIBAND_QIB=m
+CONFIG_INFINIBAND_QIB_DCA=y
+CONFIG_INFINIBAND_USNIC=m
+CONFIG_INFINIBAND_VMWARE_PVRDMA=m
+CONFIG_INFINIBAND_RDMAVT=m
+CONFIG_RDMA_RXE=m
+CONFIG_RDMA_SIW=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_SRPT=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_INFINIBAND_ISERT=m
+CONFIG_INFINIBAND_RTRS=m
+CONFIG_INFINIBAND_RTRS_CLIENT=m
+CONFIG_INFINIBAND_RTRS_SERVER=m
+CONFIG_INFINIBAND_OPA_VNIC=m
+CONFIG_EDAC_ATOMIC_SCRUB=y
+CONFIG_EDAC_SUPPORT=y
+CONFIG_EDAC=y
+CONFIG_EDAC_LEGACY_SYSFS=y
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_DECODE_MCE=y
+CONFIG_EDAC_GHES=y
+CONFIG_EDAC_AMD64=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82975X=m
+CONFIG_EDAC_I3000=m
+CONFIG_EDAC_I3200=m
+CONFIG_EDAC_IE31200=m
+CONFIG_EDAC_X38=m
+CONFIG_EDAC_I5400=m
+CONFIG_EDAC_I7CORE=m
+CONFIG_EDAC_I5100=m
+CONFIG_EDAC_I7300=m
+CONFIG_EDAC_SBRIDGE=m
+CONFIG_EDAC_SKX=m
+CONFIG_EDAC_I10NM=m
+CONFIG_EDAC_PND2=m
+CONFIG_EDAC_IGEN6=m
+CONFIG_RTC_LIB=y
+CONFIG_RTC_MC146818_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+CONFIG_RTC_SYSTOHC=y
+CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+CONFIG_RTC_NVMEM=y
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+CONFIG_RTC_DRV_88PM860X=m
+CONFIG_RTC_DRV_88PM80X=m
+CONFIG_RTC_DRV_ABB5ZES3=m
+CONFIG_RTC_DRV_ABEOZ9=m
+CONFIG_RTC_DRV_ABX80X=m
+CONFIG_RTC_DRV_DS1307=m
+CONFIG_RTC_DRV_DS1307_CENTURY=y
+CONFIG_RTC_DRV_DS1374=m
+CONFIG_RTC_DRV_DS1374_WDT=y
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_LP8788=m
+CONFIG_RTC_DRV_MAX6900=m
+CONFIG_RTC_DRV_MAX8907=m
+CONFIG_RTC_DRV_MAX8925=m
+CONFIG_RTC_DRV_MAX8998=m
+CONFIG_RTC_DRV_MAX8997=m
+CONFIG_RTC_DRV_MAX31335=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_ISL1208=m
+CONFIG_RTC_DRV_ISL12022=m
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_DRV_PCF8523=m
+CONFIG_RTC_DRV_PCF85063=m
+CONFIG_RTC_DRV_PCF85363=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_PCF8583=m
+CONFIG_RTC_DRV_M41T80=m
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_RTC_DRV_BQ32K=m
+CONFIG_RTC_DRV_PALMAS=m
+CONFIG_RTC_DRV_TPS6586X=m
+CONFIG_RTC_DRV_TPS6594=m
+CONFIG_RTC_DRV_TPS65910=m
+CONFIG_RTC_DRV_RC5T583=m
+CONFIG_RTC_DRV_S35390A=m
+CONFIG_RTC_DRV_FM3130=m
+CONFIG_RTC_DRV_RX8010=m
+CONFIG_RTC_DRV_RX8111=m
+CONFIG_RTC_DRV_RX8581=m
+CONFIG_RTC_DRV_RX8025=m
+CONFIG_RTC_DRV_EM3027=m
+CONFIG_RTC_DRV_RV3028=m
+CONFIG_RTC_DRV_RV3032=m
+CONFIG_RTC_DRV_RV8803=m
+CONFIG_RTC_DRV_SD3078=m
+
+#
+# SPI RTC drivers
+#
+CONFIG_RTC_DRV_M41T93=m
+CONFIG_RTC_DRV_M41T94=m
+CONFIG_RTC_DRV_DS1302=m
+CONFIG_RTC_DRV_DS1305=m
+CONFIG_RTC_DRV_DS1343=m
+CONFIG_RTC_DRV_DS1347=m
+CONFIG_RTC_DRV_DS1390=m
+CONFIG_RTC_DRV_MAX6916=m
+CONFIG_RTC_DRV_R9701=m
+CONFIG_RTC_DRV_RX4581=m
+CONFIG_RTC_DRV_RS5C348=m
+CONFIG_RTC_DRV_MAX6902=m
+CONFIG_RTC_DRV_PCF2123=m
+CONFIG_RTC_DRV_MCP795=m
+CONFIG_RTC_I2C_AND_SPI=y
+
+#
+# SPI and I2C RTC drivers
+#
+CONFIG_RTC_DRV_DS3232=m
+CONFIG_RTC_DRV_DS3232_HWMON=y
+CONFIG_RTC_DRV_PCF2127=m
+CONFIG_RTC_DRV_RV3029C2=m
+CONFIG_RTC_DRV_RV3029_HWMON=y
+CONFIG_RTC_DRV_RX6110=m
+
+#
+# Platform RTC drivers
+#
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1286=m
+CONFIG_RTC_DRV_DS1511=m
+CONFIG_RTC_DRV_DS1553=m
+CONFIG_RTC_DRV_DS1685_FAMILY=m
+CONFIG_RTC_DRV_DS1685=y
+# CONFIG_RTC_DRV_DS1689 is not set
+# CONFIG_RTC_DRV_DS17285 is not set
+# CONFIG_RTC_DRV_DS17485 is not set
+# CONFIG_RTC_DRV_DS17885 is not set
+CONFIG_RTC_DRV_DS1742=m
+CONFIG_RTC_DRV_DS2404=m
+CONFIG_RTC_DRV_DA9052=m
+CONFIG_RTC_DRV_DA9055=m
+CONFIG_RTC_DRV_DA9063=m
+CONFIG_RTC_DRV_STK17TA8=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_M48T35=m
+CONFIG_RTC_DRV_M48T59=m
+CONFIG_RTC_DRV_MSM6242=m
+CONFIG_RTC_DRV_RP5C01=m
+CONFIG_RTC_DRV_WM831X=m
+CONFIG_RTC_DRV_WM8350=m
+CONFIG_RTC_DRV_PCF50633=m
+CONFIG_RTC_DRV_CROS_EC=m
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_FTRTC010=m
+CONFIG_RTC_DRV_PCAP=m
+CONFIG_RTC_DRV_MC13XXX=m
+CONFIG_RTC_DRV_MT6397=m
+
+#
+# HID Sensor RTC drivers
+#
+CONFIG_RTC_DRV_HID_SENSOR_TIME=m
+CONFIG_RTC_DRV_GOLDFISH=m
+CONFIG_RTC_DRV_WILCO_EC=m
+CONFIG_DMADEVICES=y
+# CONFIG_DMADEVICES_DEBUG is not set
+
+#
+# DMA Devices
+#
+CONFIG_DMA_ENGINE=y
+CONFIG_DMA_VIRTUAL_CHANNELS=y
+CONFIG_DMA_ACPI=y
+CONFIG_ALTERA_MSGDMA=m
+CONFIG_INTEL_IDMA64=m
+CONFIG_INTEL_IDXD_BUS=m
+CONFIG_INTEL_IDXD=m
+# CONFIG_INTEL_IDXD_COMPAT is not set
+CONFIG_INTEL_IDXD_SVM=y
+CONFIG_INTEL_IDXD_PERFMON=y
+CONFIG_INTEL_IOATDMA=m
+CONFIG_PLX_DMA=m
+CONFIG_XILINX_DMA=m
+CONFIG_XILINX_XDMA=m
+CONFIG_AMD_PTDMA=m
+CONFIG_QCOM_HIDMA_MGMT=m
+CONFIG_QCOM_HIDMA=m
+CONFIG_DW_DMAC_CORE=y
+CONFIG_DW_DMAC=m
+CONFIG_DW_DMAC_PCI=y
+CONFIG_DW_EDMA=m
+CONFIG_DW_EDMA_PCIE=m
+CONFIG_HSU_DMA=m
+CONFIG_SF_PDMA=m
+CONFIG_INTEL_LDMA=y
+
+#
+# DMA Clients
+#
+CONFIG_ASYNC_TX_DMA=y
+# CONFIG_DMATEST is not set
+CONFIG_DMA_ENGINE_RAID=y
+
+#
+# DMABUF options
+#
+CONFIG_SYNC_FILE=y
+# CONFIG_SW_SYNC is not set
+CONFIG_UDMABUF=y
+# CONFIG_DMABUF_MOVE_NOTIFY is not set
+# CONFIG_DMABUF_DEBUG is not set
+# CONFIG_DMABUF_SELFTESTS is not set
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_SYSFS_STATS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DMABUF_HEAPS_CMA=y
+# end of DMABUF options
+
+CONFIG_DCA=m
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_UIO_DMEM_GENIRQ=m
+CONFIG_UIO_AEC=m
+CONFIG_UIO_SERCOS3=m
+CONFIG_UIO_PCI_GENERIC=m
+CONFIG_UIO_NETX=m
+CONFIG_UIO_MF624=m
+CONFIG_UIO_HV_GENERIC=m
+CONFIG_UIO_DFL=m
+CONFIG_VFIO=m
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_IOMMU_TYPE1=m
+# CONFIG_VFIO_NOIOMMU is not set
+CONFIG_VFIO_VIRQFD=y
+CONFIG_VFIO_DEBUGFS=y
+
+#
+# VFIO support for PCI devices
+#
+CONFIG_VFIO_PCI_CORE=m
+CONFIG_VFIO_PCI_MMAP=y
+CONFIG_VFIO_PCI_INTX=y
+CONFIG_VFIO_PCI=m
+CONFIG_VFIO_PCI_VGA=y
+CONFIG_VFIO_PCI_IGD=y
+CONFIG_MLX5_VFIO_PCI=m
+CONFIG_PDS_VFIO_PCI=m
+CONFIG_VIRTIO_VFIO_PCI=m
+CONFIG_QAT_VFIO_PCI=m
+# end of VFIO support for PCI devices
+
+CONFIG_VFIO_MDEV=m
+CONFIG_IRQ_BYPASS_MANAGER=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VMGENID=y
+CONFIG_VBOXGUEST=m
+CONFIG_NITRO_ENCLAVES=m
+CONFIG_ACRN_HSM=m
+CONFIG_TSM_REPORTS=m
+CONFIG_EFI_SECRET=m
+CONFIG_SEV_GUEST=m
+CONFIG_TDX_GUEST_DRIVER=m
+CONFIG_VIRTIO_ANCHOR=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI_LIB=y
+CONFIG_VIRTIO_PCI_LIB_LEGACY=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_PCI_ADMIN_LEGACY=y
+CONFIG_VIRTIO_PCI_LEGACY=y
+CONFIG_VIRTIO_VDPA=m
+CONFIG_VIRTIO_PMEM=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_MEM=m
+CONFIG_VIRTIO_INPUT=m
+CONFIG_VIRTIO_MMIO=m
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VIRTIO_DMA_SHARED_BUFFER=m
+CONFIG_VIRTIO_DEBUG=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_IFCVF=m
+CONFIG_MLX5_VDPA=y
+CONFIG_MLX5_VDPA_NET=m
+# CONFIG_MLX5_VDPA_STEERING_DEBUG is not set
+CONFIG_VP_VDPA=m
+CONFIG_ALIBABA_ENI_VDPA=m
+CONFIG_SNET_VDPA=m
+CONFIG_PDS_VDPA=m
+CONFIG_OCTEONEP_VDPA=m
+CONFIG_VHOST_IOTLB=m
+CONFIG_VHOST_RING=m
+CONFIG_VHOST_TASK=y
+CONFIG_VHOST=m
+CONFIG_VHOST_MENU=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_SCSI=m
+CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_VDPA=m
+# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
+
+#
+# Microsoft Hyper-V guest support
+#
+CONFIG_HYPERV=m
+# CONFIG_HYPERV_VTL_MODE is not set
+CONFIG_HYPERV_TIMER=y
+CONFIG_HYPERV_UTILS=m
+CONFIG_HYPERV_BALLOON=m
+# end of Microsoft Hyper-V guest support
+
+#
+# Xen driver support
+#
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
+CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
+CONFIG_XEN_DEV_EVTCHN=m
+CONFIG_XEN_BACKEND=y
+CONFIG_XENFS=m
+CONFIG_XEN_COMPAT_XENFS=y
+CONFIG_XEN_SYS_HYPERVISOR=y
+CONFIG_XEN_XENBUS_FRONTEND=y
+CONFIG_XEN_GNTDEV=m
+CONFIG_XEN_GNTDEV_DMABUF=y
+CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_XEN_GRANT_DMA_ALLOC=y
+CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PCI_STUB=y
+CONFIG_XEN_PCIDEV_BACKEND=m
+CONFIG_XEN_PVCALLS_FRONTEND=m
+CONFIG_XEN_PVCALLS_BACKEND=m
+CONFIG_XEN_SCSI_BACKEND=m
+CONFIG_XEN_PRIVCMD=m
+CONFIG_XEN_PRIVCMD_EVENTFD=y
+CONFIG_XEN_ACPI_PROCESSOR=m
+CONFIG_XEN_MCE_LOG=y
+CONFIG_XEN_HAVE_PVMMU=y
+CONFIG_XEN_EFI=y
+CONFIG_XEN_AUTO_XLATE=y
+CONFIG_XEN_ACPI=y
+CONFIG_XEN_SYMS=y
+CONFIG_XEN_HAVE_VPMU=y
+CONFIG_XEN_FRONT_PGDIR_SHBUF=m
+CONFIG_XEN_UNPOPULATED_ALLOC=y
+CONFIG_XEN_GRANT_DMA_OPS=y
+CONFIG_XEN_VIRTIO=y
+# CONFIG_XEN_VIRTIO_FORCE_GRANT is not set
+# end of Xen driver support
+
+# CONFIG_GREYBUS is not set
+# CONFIG_COMEDI is not set
+CONFIG_STAGING=y
+CONFIG_RTLLIB=m
+CONFIG_RTLLIB_CRYPTO_CCMP=m
+CONFIG_RTLLIB_CRYPTO_TKIP=m
+CONFIG_RTLLIB_CRYPTO_WEP=m
+CONFIG_RTL8192E=m
+CONFIG_RTL8723BS=m
+CONFIG_R8712U=m
+CONFIG_RTS5208=m
+CONFIG_VT6655=m
+CONFIG_VT6656=m
+
+#
+# IIO staging drivers
+#
+
+#
+# Accelerometers
+#
+CONFIG_ADIS16203=m
+CONFIG_ADIS16240=m
+# end of Accelerometers
+
+#
+# Analog to digital converters
+#
+CONFIG_AD7816=m
+# end of Analog to digital converters
+
+#
+# Analog digital bi-direction converters
+#
+CONFIG_ADT7316=m
+CONFIG_ADT7316_SPI=m
+CONFIG_ADT7316_I2C=m
+# end of Analog digital bi-direction converters
+
+#
+# Direct Digital Synthesis
+#
+CONFIG_AD9832=m
+CONFIG_AD9834=m
+# end of Direct Digital Synthesis
+
+#
+# Network Analyzer, Impedance Converters
+#
+CONFIG_AD5933=m
+# end of Network Analyzer, Impedance Converters
+# end of IIO staging drivers
+
+# CONFIG_FB_SM750 is not set
+CONFIG_STAGING_MEDIA=y
+# CONFIG_INTEL_ATOMISP is not set
+CONFIG_DVB_AV7110_IR=y
+CONFIG_DVB_AV7110=m
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_SP8870=m
+CONFIG_VIDEO_IPU3_IMGU=m
+
+#
+# StarFive media platform drivers
+#
+# CONFIG_STAGING_MEDIA_DEPRECATED is not set
+CONFIG_LTE_GDM724X=m
+# CONFIG_FB_TFT is not set
+CONFIG_MOST_COMPONENTS=m
+CONFIG_MOST_NET=m
+CONFIG_MOST_VIDEO=m
+CONFIG_MOST_I2C=m
+CONFIG_KS7010=m
+CONFIG_FIELDBUS_DEV=m
+# CONFIG_VME_BUS is not set
+CONFIG_APPLE_BCE=m
+# CONFIG_GOLDFISH is not set
+CONFIG_CHROME_PLATFORMS=y
+CONFIG_CHROMEOS_ACPI=m
+CONFIG_CHROMEOS_LAPTOP=m
+CONFIG_CHROMEOS_PSTORE=m
+CONFIG_CHROMEOS_TBMC=m
+CONFIG_CROS_EC=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_ISHTP=m
+CONFIG_CROS_EC_SPI=m
+CONFIG_CROS_EC_UART=m
+CONFIG_CROS_EC_LPC=m
+CONFIG_CROS_EC_PROTO=y
+CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_EC_CHARDEV=m
+CONFIG_CROS_EC_LIGHTBAR=m
+CONFIG_CROS_EC_DEBUGFS=m
+CONFIG_CROS_EC_SENSORHUB=m
+CONFIG_CROS_EC_SYSFS=m
+CONFIG_CROS_EC_TYPEC=m
+CONFIG_CROS_HPS_I2C=m
+CONFIG_CROS_USBPD_LOGGER=m
+CONFIG_CROS_USBPD_NOTIFY=m
+CONFIG_CHROMEOS_PRIVACY_SCREEN=m
+CONFIG_CROS_TYPEC_SWITCH=m
+CONFIG_WILCO_EC=m
+CONFIG_WILCO_EC_DEBUGFS=m
+CONFIG_WILCO_EC_EVENTS=m
+CONFIG_WILCO_EC_TELEMETRY=m
+CONFIG_CZNIC_PLATFORMS=y
+CONFIG_MELLANOX_PLATFORM=y
+CONFIG_MLXREG_HOTPLUG=m
+CONFIG_MLXREG_IO=m
+CONFIG_MLXREG_LC=m
+CONFIG_NVSW_SN2201=m
+CONFIG_SURFACE_PLATFORMS=y
+CONFIG_SURFACE3_WMI=m
+CONFIG_SURFACE_3_POWER_OPREGION=m
+CONFIG_SURFACE_ACPI_NOTIFY=m
+CONFIG_SURFACE_AGGREGATOR_CDEV=m
+CONFIG_SURFACE_AGGREGATOR_HUB=m
+CONFIG_SURFACE_AGGREGATOR_REGISTRY=m
+CONFIG_SURFACE_AGGREGATOR_TABLET_SWITCH=m
+CONFIG_SURFACE_DTX=m
+CONFIG_SURFACE_GPE=m
+CONFIG_SURFACE_HOTPLUG=m
+CONFIG_SURFACE_PLATFORM_PROFILE=m
+CONFIG_SURFACE_PRO3_BUTTON=m
+CONFIG_SURFACE_AGGREGATOR=m
+CONFIG_SURFACE_AGGREGATOR_BUS=y
+# CONFIG_SURFACE_AGGREGATOR_ERROR_INJECTION is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+CONFIG_ACPI_WMI=m
+CONFIG_WMI_BMOF=m
+CONFIG_HUAWEI_WMI=m
+CONFIG_MXM_WMI=m
+CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
+CONFIG_XIAOMI_WMI=m
+CONFIG_GIGABYTE_WMI=m
+CONFIG_YOGABOOK=m
+CONFIG_YT2_1380=m
+CONFIG_ACERHDF=m
+CONFIG_ACER_WIRELESS=m
+CONFIG_ACER_WMI=m
+CONFIG_AMD_PMF=m
+# CONFIG_AMD_PMF_DEBUG is not set
+CONFIG_AMD_PMC=m
+CONFIG_AMD_MP2_STB=y
+CONFIG_AMD_HFI=y
+CONFIG_AMD_3D_VCACHE=y
+CONFIG_AMD_HSMP=m
+CONFIG_AMD_WBRF=y
+CONFIG_ADV_SWBUTTON=m
+CONFIG_APPLE_GMUX=m
+CONFIG_ASUS_LAPTOP=m
+CONFIG_ASUS_WIRELESS=m
+CONFIG_ASUS_WMI=m
+CONFIG_ASUS_NB_WMI=m
+CONFIG_ASUS_TF103C_DOCK=m
+CONFIG_MERAKI_MX100=m
+CONFIG_EEEPC_LAPTOP=m
+CONFIG_EEEPC_WMI=m
+CONFIG_X86_PLATFORM_DRIVERS_DELL=y
+CONFIG_ALIENWARE_WMI=m
+CONFIG_DCDBAS=m
+CONFIG_DELL_LAPTOP=m
+# CONFIG_DELL_RBU is not set
+CONFIG_DELL_RBTN=m
+CONFIG_DELL_PC=m
+CONFIG_DELL_SMBIOS=m
+CONFIG_DELL_SMBIOS_WMI=y
+CONFIG_DELL_SMBIOS_SMM=y
+CONFIG_DELL_SMO8800=m
+CONFIG_DELL_UART_BACKLIGHT=m
+CONFIG_DELL_WMI=m
+CONFIG_DELL_WMI_PRIVACY=y
+CONFIG_DELL_WMI_AIO=m
+CONFIG_DELL_WMI_DESCRIPTOR=m
+CONFIG_DELL_WMI_DDV=m
+CONFIG_DELL_WMI_LED=m
+CONFIG_DELL_WMI_SYSMAN=m
+CONFIG_AMILO_RFKILL=m
+CONFIG_FUJITSU_LAPTOP=m
+CONFIG_FUJITSU_TABLET=m
+CONFIG_GPD_POCKET_FAN=m
+CONFIG_X86_PLATFORM_DRIVERS_HP=y
+CONFIG_HP_ACCEL=m
+CONFIG_HP_WMI=m
+CONFIG_HP_BIOSCFG=m
+CONFIG_WIRELESS_HOTKEY=m
+CONFIG_IBM_RTL=m
+CONFIG_IDEAPAD_LAPTOP=m
+CONFIG_LENOVO_YMC=m
+CONFIG_SENSORS_HDAPS=m
+CONFIG_THINKPAD_ACPI=m
+CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
+# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
+# CONFIG_THINKPAD_ACPI_DEBUG is not set
+# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
+CONFIG_THINKPAD_ACPI_VIDEO=y
+CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
+CONFIG_THINKPAD_LMI=m
+CONFIG_INTEL_ATOMISP2_PDX86=y
+CONFIG_INTEL_ATOMISP2_LED=m
+CONFIG_INTEL_ATOMISP2_PM=m
+CONFIG_INTEL_IFS=m
+CONFIG_INTEL_SAR_INT1092=m
+CONFIG_INTEL_SKL_INT3472=m
+CONFIG_INTEL_PMC_CORE=m
+CONFIG_INTEL_PMT_CLASS=m
+CONFIG_INTEL_PMT_TELEMETRY=m
+CONFIG_INTEL_PMT_CRASHLOG=m
+
+#
+# Intel Speed Select Technology interface support
+#
+CONFIG_INTEL_SPEED_SELECT_TPMI=m
+CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
+# end of Intel Speed Select Technology interface support
+
+CONFIG_INTEL_TELEMETRY=m
+CONFIG_INTEL_WMI=y
+CONFIG_INTEL_WMI_SBL_FW_UPDATE=m
+CONFIG_INTEL_WMI_THUNDERBOLT=m
+
+#
+# Intel Uncore Frequency Control
+#
+CONFIG_INTEL_UNCORE_FREQ_CONTROL_TPMI=m
+CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
+# end of Intel Uncore Frequency Control
+
+CONFIG_INTEL_HID_EVENT=m
+CONFIG_INTEL_VBTN=m
+CONFIG_INTEL_INT0002_VGPIO=m
+CONFIG_INTEL_OAKTRAIL=m
+CONFIG_INTEL_BXTWC_PMIC_TMU=m
+CONFIG_INTEL_BYTCRC_PWRSRC=m
+CONFIG_INTEL_CHTDC_TI_PWRBTN=m
+CONFIG_INTEL_CHTWC_INT33FE=m
+CONFIG_INTEL_ISHTP_ECLITE=m
+CONFIG_INTEL_MRFLD_PWRBTN=m
+CONFIG_INTEL_PUNIT_IPC=m
+CONFIG_INTEL_RST=m
+CONFIG_INTEL_SDSI=m
+CONFIG_INTEL_SMARTCONNECT=m
+CONFIG_INTEL_TPMI_POWER_DOMAINS=m
+CONFIG_INTEL_TPMI=m
+CONFIG_INTEL_PLR_TPMI=m
+CONFIG_INTEL_TURBO_MAX_3=y
+CONFIG_INTEL_VSEC=m
+CONFIG_ACPI_QUICKSTART=m
+CONFIG_MEEGOPAD_ANX7428=m
+CONFIG_MSI_EC=m
+CONFIG_MSI_LAPTOP=m
+CONFIG_MSI_WMI=m
+CONFIG_MSI_WMI_PLATFORM=m
+CONFIG_PCENGINES_APU2=m
+CONFIG_BARCO_P50_GPIO=m
+CONFIG_SAMSUNG_LAPTOP=m
+CONFIG_SAMSUNG_Q10=m
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_TOSHIBA_BT_RFKILL=m
+CONFIG_TOSHIBA_HAPS=m
+CONFIG_TOSHIBA_WMI=m
+CONFIG_ACPI_CMPC=m
+CONFIG_COMPAL_LAPTOP=m
+CONFIG_LG_LAPTOP=m
+CONFIG_PANASONIC_LAPTOP=m
+CONFIG_SONY_LAPTOP=m
+CONFIG_SONYPI_COMPAT=y
+CONFIG_SYSTEM76_ACPI=m
+CONFIG_TOPSTAR_LAPTOP=m
+CONFIG_SERIAL_MULTI_INSTANTIATE=m
+CONFIG_MLX_PLATFORM=m
+CONFIG_TOUCHSCREEN_DMI=y
+CONFIG_INSPUR_PLATFORM_PROFILE=m
+CONFIG_LENOVO_WMI_CAMERA=m
+CONFIG_X86_ANDROID_TABLETS=m
+CONFIG_FW_ATTR_CLASS=m
+CONFIG_INTEL_IPS=m
+CONFIG_INTEL_SCU_IPC=y
+CONFIG_INTEL_SCU=y
+CONFIG_INTEL_SCU_PCI=y
+CONFIG_INTEL_SCU_PLATFORM=m
+CONFIG_INTEL_SCU_IPC_UTIL=m
+CONFIG_SIEMENS_SIMATIC_IPC=m
+CONFIG_SIEMENS_SIMATIC_IPC_BATT=m
+CONFIG_SIEMENS_SIMATIC_IPC_BATT_APOLLOLAKE=m
+CONFIG_SIEMENS_SIMATIC_IPC_BATT_ELKHARTLAKE=m
+CONFIG_SIEMENS_SIMATIC_IPC_BATT_F7188X=m
+CONFIG_SILICOM_PLATFORM=m
+CONFIG_WINMATE_FM07_KEYS=m
+CONFIG_SEL3350_PLATFORM=m
+CONFIG_P2SB=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_CLK_PREPARE=y
+CONFIG_COMMON_CLK=y
+CONFIG_COMMON_CLK_WM831X=m
+CONFIG_LMK04832=m
+CONFIG_COMMON_CLK_MAX9485=m
+CONFIG_COMMON_CLK_SI5341=m
+CONFIG_COMMON_CLK_SI5351=m
+CONFIG_COMMON_CLK_SI544=m
+CONFIG_COMMON_CLK_CDCE706=m
+CONFIG_COMMON_CLK_TPS68470=m
+CONFIG_COMMON_CLK_CS2000_CP=m
+CONFIG_CLK_TWL=m
+CONFIG_CLK_TWL6040=m
+CONFIG_COMMON_CLK_PALMAS=m
+CONFIG_COMMON_CLK_PWM=m
+CONFIG_XILINX_VCU=m
+CONFIG_HWSPINLOCK=y
+
+#
+# Clock Source drivers
+#
+CONFIG_CLKEVT_I8253=y
+CONFIG_I8253_LOCK=y
+CONFIG_CLKBLD_I8253=y
+# end of Clock Source drivers
+
+CONFIG_MAILBOX=y
+CONFIG_PCC=y
+CONFIG_ALTERA_MBOX=m
+CONFIG_IOMMU_IOVA=y
+CONFIG_IOMMU_API=y
+CONFIG_IOMMUFD_DRIVER=y
+CONFIG_IOMMU_SUPPORT=y
+
+#
+# Generic IOMMU Pagetable Support
+#
+CONFIG_IOMMU_IO_PGTABLE=y
+# end of Generic IOMMU Pagetable Support
+
+# CONFIG_IOMMU_DEBUGFS is not set
+# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
+CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
+# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
+CONFIG_IOMMU_DMA=y
+CONFIG_IOMMU_SVA=y
+CONFIG_IOMMU_IOPF=y
+CONFIG_AMD_IOMMU=y
+CONFIG_DMAR_TABLE=y
+CONFIG_INTEL_IOMMU=y
+CONFIG_INTEL_IOMMU_SVM=y
+# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
+CONFIG_INTEL_IOMMU_FLOPPY_WA=y
+CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON=y
+CONFIG_INTEL_IOMMU_PERF_EVENTS=y
+CONFIG_IOMMUFD=m
+CONFIG_IRQ_REMAP=y
+CONFIG_HYPERV_IOMMU=y
+CONFIG_VIRTIO_IOMMU=m
+
+#
+# Remoteproc drivers
+#
+CONFIG_REMOTEPROC=y
+CONFIG_REMOTEPROC_CDEV=y
+# end of Remoteproc drivers
+
+#
+# Rpmsg drivers
+#
+CONFIG_RPMSG=m
+CONFIG_RPMSG_CHAR=m
+CONFIG_RPMSG_CTRL=m
+CONFIG_RPMSG_NS=m
+CONFIG_RPMSG_QCOM_GLINK=m
+CONFIG_RPMSG_QCOM_GLINK_RPM=m
+CONFIG_RPMSG_VIRTIO=m
+# end of Rpmsg drivers
+
+CONFIG_SOUNDWIRE=m
+
+#
+# SoundWire Devices
+#
+CONFIG_SOUNDWIRE_AMD=m
+CONFIG_SOUNDWIRE_CADENCE=m
+CONFIG_SOUNDWIRE_INTEL=m
+CONFIG_SOUNDWIRE_QCOM=m
+CONFIG_SOUNDWIRE_GENERIC_ALLOCATION=m
+
+#
+# SOC (System On Chip) specific Drivers
+#
+
+#
+# Amlogic SoC drivers
+#
+# end of Amlogic SoC drivers
+
+#
+# Broadcom SoC drivers
+#
+# end of Broadcom SoC drivers
+
+#
+# NXP/Freescale QorIQ SoC drivers
+#
+# end of NXP/Freescale QorIQ SoC drivers
+
+#
+# fujitsu SoC drivers
+#
+# end of fujitsu SoC drivers
+
+#
+# i.MX SoC drivers
+#
+# end of i.MX SoC drivers
+
+#
+# Enable LiteX SoC Builder specific drivers
+#
+# end of Enable LiteX SoC Builder specific drivers
+
+CONFIG_WPCM450_SOC=m
+
+#
+# Qualcomm SoC drivers
+#
+CONFIG_QCOM_PMIC_PDCHARGER_ULOG=m
+CONFIG_QCOM_QMI_HELPERS=m
+# end of Qualcomm SoC drivers
+
+CONFIG_SOC_TI=y
+
+#
+# Xilinx SoC drivers
+#
+# end of Xilinx SoC drivers
+# end of SOC (System On Chip) specific Drivers
+
+#
+# PM Domains
+#
+
+#
+# Amlogic PM Domains
+#
+# end of Amlogic PM Domains
+
+#
+# Broadcom PM Domains
+#
+# end of Broadcom PM Domains
+
+#
+# i.MX PM Domains
+#
+# end of i.MX PM Domains
+
+#
+# Qualcomm PM Domains
+#
+# end of Qualcomm PM Domains
+# end of PM Domains
+
+CONFIG_PM_DEVFREQ=y
+
+#
+# DEVFREQ Governors
+#
+CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m
+CONFIG_DEVFREQ_GOV_PERFORMANCE=m
+CONFIG_DEVFREQ_GOV_POWERSAVE=m
+CONFIG_DEVFREQ_GOV_USERSPACE=m
+CONFIG_DEVFREQ_GOV_PASSIVE=m
+
+#
+# DEVFREQ Drivers
+#
+CONFIG_PM_DEVFREQ_EVENT=y
+CONFIG_EXTCON=y
+
+#
+# Extcon Device Drivers
+#
+CONFIG_EXTCON_ADC_JACK=m
+CONFIG_EXTCON_AXP288=m
+CONFIG_EXTCON_FSA9480=m
+CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_INTEL_INT3496=m
+CONFIG_EXTCON_INTEL_CHT_WC=m
+CONFIG_EXTCON_INTEL_MRFLD=m
+CONFIG_EXTCON_MAX14577=m
+CONFIG_EXTCON_MAX3355=m
+CONFIG_EXTCON_MAX77693=m
+CONFIG_EXTCON_MAX77843=m
+CONFIG_EXTCON_MAX8997=m
+CONFIG_EXTCON_PALMAS=m
+CONFIG_EXTCON_PTN5150=m
+CONFIG_EXTCON_RT8973A=m
+CONFIG_EXTCON_SM5502=m
+CONFIG_EXTCON_USB_GPIO=m
+CONFIG_EXTCON_USBC_CROS_EC=m
+CONFIG_EXTCON_USBC_TUSB320=m
+CONFIG_MEMORY=y
+CONFIG_FPGA_DFL_EMIF=m
+CONFIG_IIO=m
+CONFIG_IIO_BUFFER=y
+CONFIG_IIO_BUFFER_CB=m
+CONFIG_IIO_BUFFER_DMA=m
+CONFIG_IIO_BUFFER_DMAENGINE=m
+CONFIG_IIO_BUFFER_HW_CONSUMER=m
+CONFIG_IIO_KFIFO_BUF=m
+CONFIG_IIO_TRIGGERED_BUFFER=m
+CONFIG_IIO_CONFIGFS=m
+CONFIG_IIO_GTS_HELPER=m
+CONFIG_IIO_TRIGGER=y
+CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
+CONFIG_IIO_SW_DEVICE=m
+CONFIG_IIO_SW_TRIGGER=m
+CONFIG_IIO_TRIGGERED_EVENT=m
+CONFIG_IIO_BACKEND=m
+
+#
+# Accelerometers
+#
+CONFIG_ADIS16201=m
+CONFIG_ADIS16209=m
+CONFIG_ADXL313=m
+CONFIG_ADXL313_I2C=m
+CONFIG_ADXL313_SPI=m
+CONFIG_ADXL355=m
+CONFIG_ADXL355_I2C=m
+CONFIG_ADXL355_SPI=m
+CONFIG_ADXL367=m
+CONFIG_ADXL367_SPI=m
+CONFIG_ADXL367_I2C=m
+CONFIG_ADXL372=m
+CONFIG_ADXL372_SPI=m
+CONFIG_ADXL372_I2C=m
+CONFIG_BMA220=m
+CONFIG_BMA400=m
+CONFIG_BMA400_I2C=m
+CONFIG_BMA400_SPI=m
+CONFIG_BMC150_ACCEL=m
+CONFIG_BMC150_ACCEL_I2C=m
+CONFIG_BMC150_ACCEL_SPI=m
+CONFIG_BMI088_ACCEL=m
+CONFIG_BMI088_ACCEL_I2C=m
+CONFIG_BMI088_ACCEL_SPI=m
+CONFIG_DA280=m
+CONFIG_DA311=m
+CONFIG_DMARD06=m
+CONFIG_DMARD09=m
+CONFIG_DMARD10=m
+CONFIG_FXLS8962AF=m
+CONFIG_FXLS8962AF_I2C=m
+CONFIG_FXLS8962AF_SPI=m
+CONFIG_HID_SENSOR_ACCEL_3D=m
+CONFIG_IIO_CROS_EC_ACCEL_LEGACY=m
+CONFIG_IIO_ST_ACCEL_3AXIS=m
+CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
+CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
+CONFIG_IIO_KX022A=m
+CONFIG_IIO_KX022A_SPI=m
+CONFIG_IIO_KX022A_I2C=m
+CONFIG_KXSD9=m
+CONFIG_KXSD9_SPI=m
+CONFIG_KXSD9_I2C=m
+CONFIG_KXCJK1013=m
+CONFIG_MC3230=m
+CONFIG_MMA7455=m
+CONFIG_MMA7455_I2C=m
+CONFIG_MMA7455_SPI=m
+CONFIG_MMA7660=m
+CONFIG_MMA8452=m
+CONFIG_MMA9551_CORE=m
+CONFIG_MMA9551=m
+CONFIG_MMA9553=m
+CONFIG_MSA311=m
+CONFIG_MXC4005=m
+CONFIG_MXC6255=m
+CONFIG_SCA3000=m
+CONFIG_SCA3300=m
+CONFIG_STK8312=m
+CONFIG_STK8BA50=m
+# end of Accelerometers
+
+#
+# Analog to digital converters
+#
+CONFIG_AD_SIGMA_DELTA=m
+CONFIG_AD4130=m
+CONFIG_AD7091R=m
+CONFIG_AD7091R5=m
+CONFIG_AD7091R8=m
+CONFIG_AD7124=m
+CONFIG_AD7173=m
+CONFIG_AD7192=m
+CONFIG_AD7266=m
+CONFIG_AD7280=m
+CONFIG_AD7291=m
+CONFIG_AD7292=m
+CONFIG_AD7298=m
+CONFIG_AD7380=m
+CONFIG_AD7476=m
+CONFIG_AD7606=m
+CONFIG_AD7606_IFACE_PARALLEL=m
+CONFIG_AD7606_IFACE_SPI=m
+CONFIG_AD7766=m
+CONFIG_AD7768_1=m
+CONFIG_AD7780=m
+CONFIG_AD7791=m
+CONFIG_AD7793=m
+CONFIG_AD7887=m
+CONFIG_AD7923=m
+CONFIG_AD7944=m
+CONFIG_AD7949=m
+CONFIG_AD799X=m
+CONFIG_AD9467=m
+CONFIG_AXP20X_ADC=m
+CONFIG_AXP288_ADC=m
+CONFIG_CC10001_ADC=m
+CONFIG_DA9150_GPADC=m
+CONFIG_DLN2_ADC=m
+CONFIG_ENVELOPE_DETECTOR=m
+CONFIG_HI8435=m
+CONFIG_HX711=m
+CONFIG_INA2XX_ADC=m
+CONFIG_INTEL_MRFLD_ADC=m
+CONFIG_LP8788_ADC=m
+CONFIG_LTC2309=m
+CONFIG_LTC2471=m
+CONFIG_LTC2485=m
+CONFIG_LTC2496=m
+CONFIG_LTC2497=m
+CONFIG_MAX1027=m
+CONFIG_MAX11100=m
+CONFIG_MAX1118=m
+CONFIG_MAX11205=m
+CONFIG_MAX11410=m
+CONFIG_MAX1241=m
+CONFIG_MAX1363=m
+CONFIG_MAX34408=m
+CONFIG_MAX77541_ADC=m
+CONFIG_MAX9611=m
+CONFIG_MCP320X=m
+CONFIG_MCP3422=m
+CONFIG_MCP3564=m
+CONFIG_MCP3911=m
+CONFIG_MEDIATEK_MT6359_AUXADC=m
+CONFIG_MEDIATEK_MT6360_ADC=m
+CONFIG_MEDIATEK_MT6370_ADC=m
+CONFIG_MEN_Z188_ADC=m
+CONFIG_MP2629_ADC=m
+CONFIG_NAU7802=m
+CONFIG_PAC1934=m
+CONFIG_PALMAS_GPADC=m
+CONFIG_RICHTEK_RTQ6056=m
+CONFIG_SD_ADC_MODULATOR=m
+CONFIG_TI_ADC081C=m
+CONFIG_TI_ADC0832=m
+CONFIG_TI_ADC084S021=m
+CONFIG_TI_ADC12138=m
+CONFIG_TI_ADC108S102=m
+CONFIG_TI_ADC128S052=m
+CONFIG_TI_ADC161S626=m
+CONFIG_TI_ADS1015=m
+CONFIG_TI_ADS1119=m
+CONFIG_TI_ADS7924=m
+CONFIG_TI_ADS1100=m
+CONFIG_TI_ADS1298=m
+CONFIG_TI_ADS7950=m
+CONFIG_TI_ADS8344=m
+CONFIG_TI_ADS8688=m
+CONFIG_TI_ADS124S08=m
+CONFIG_TI_ADS131E08=m
+CONFIG_TI_LMP92064=m
+CONFIG_TI_TLC4541=m
+CONFIG_TI_TSC2046=m
+CONFIG_TWL4030_MADC=m
+CONFIG_TWL6030_GPADC=m
+CONFIG_VF610_ADC=m
+CONFIG_VIPERBOARD_ADC=m
+CONFIG_XILINX_XADC=m
+# end of Analog to digital converters
+
+#
+# Analog to digital and digital to analog converters
+#
+CONFIG_AD74115=m
+CONFIG_AD74413R=m
+# end of Analog to digital and digital to analog converters
+
+#
+# Analog Front Ends
+#
+CONFIG_IIO_RESCALE=m
+# end of Analog Front Ends
+
+#
+# Amplifiers
+#
+CONFIG_AD8366=m
+CONFIG_ADA4250=m
+CONFIG_HMC425=m
+# end of Amplifiers
+
+#
+# Capacitance to digital converters
+#
+CONFIG_AD7150=m
+CONFIG_AD7746=m
+# end of Capacitance to digital converters
+
+#
+# Chemical Sensors
+#
+CONFIG_AOSONG_AGS02MA=m
+CONFIG_ATLAS_PH_SENSOR=m
+CONFIG_ATLAS_EZO_SENSOR=m
+CONFIG_BME680=m
+CONFIG_BME680_I2C=m
+CONFIG_BME680_SPI=m
+CONFIG_CCS811=m
+CONFIG_ENS160=m
+CONFIG_ENS160_I2C=m
+CONFIG_ENS160_SPI=m
+CONFIG_IAQCORE=m
+CONFIG_PMS7003=m
+CONFIG_SCD30_CORE=m
+CONFIG_SCD30_I2C=m
+CONFIG_SCD30_SERIAL=m
+CONFIG_SCD4X=m
+CONFIG_SENSIRION_SGP30=m
+CONFIG_SENSIRION_SGP40=m
+CONFIG_SPS30=m
+CONFIG_SPS30_I2C=m
+CONFIG_SPS30_SERIAL=m
+CONFIG_SENSEAIR_SUNRISE_CO2=m
+CONFIG_VZ89X=m
+# end of Chemical Sensors
+
+CONFIG_IIO_CROS_EC_SENSORS_CORE=m
+CONFIG_IIO_CROS_EC_SENSORS=m
+CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE=m
+
+#
+# Hid Sensor IIO Common
+#
+CONFIG_HID_SENSOR_IIO_COMMON=m
+CONFIG_HID_SENSOR_IIO_TRIGGER=m
+# end of Hid Sensor IIO Common
+
+CONFIG_IIO_INV_SENSORS_TIMESTAMP=m
+CONFIG_IIO_MS_SENSORS_I2C=m
+
+#
+# IIO SCMI Sensors
+#
+# end of IIO SCMI Sensors
+
+#
+# SSP Sensor Common
+#
+CONFIG_IIO_SSP_SENSORS_COMMONS=m
+CONFIG_IIO_SSP_SENSORHUB=m
+# end of SSP Sensor Common
+
+CONFIG_IIO_ST_SENSORS_I2C=m
+CONFIG_IIO_ST_SENSORS_SPI=m
+CONFIG_IIO_ST_SENSORS_CORE=m
+
+#
+# Digital to analog converters
+#
+CONFIG_AD3552R=m
+CONFIG_AD5064=m
+CONFIG_AD5360=m
+CONFIG_AD5380=m
+CONFIG_AD5421=m
+CONFIG_AD5446=m
+CONFIG_AD5449=m
+CONFIG_AD5592R_BASE=m
+CONFIG_AD5592R=m
+CONFIG_AD5593R=m
+CONFIG_AD5504=m
+CONFIG_AD5624R_SPI=m
+CONFIG_AD9739A=m
+CONFIG_LTC2688=m
+CONFIG_AD5686=m
+CONFIG_AD5686_SPI=m
+CONFIG_AD5696_I2C=m
+CONFIG_AD5755=m
+CONFIG_AD5758=m
+CONFIG_AD5761=m
+CONFIG_AD5764=m
+CONFIG_AD5766=m
+CONFIG_AD5770R=m
+CONFIG_AD5791=m
+CONFIG_AD7293=m
+CONFIG_AD7303=m
+CONFIG_AD8801=m
+CONFIG_DPOT_DAC=m
+CONFIG_DS4424=m
+CONFIG_LTC1660=m
+CONFIG_LTC2632=m
+CONFIG_M62332=m
+CONFIG_MAX517=m
+CONFIG_MAX5522=m
+CONFIG_MAX5821=m
+CONFIG_MCP4725=m
+CONFIG_MCP4728=m
+CONFIG_MCP4821=m
+CONFIG_MCP4922=m
+CONFIG_TI_DAC082S085=m
+CONFIG_TI_DAC5571=m
+CONFIG_TI_DAC7311=m
+CONFIG_TI_DAC7612=m
+CONFIG_VF610_DAC=m
+# end of Digital to analog converters
+
+#
+# IIO dummy driver
+#
+# CONFIG_IIO_SIMPLE_DUMMY is not set
+# end of IIO dummy driver
+
+#
+# Filters
+#
+CONFIG_ADMV8818=m
+# end of Filters
+
+#
+# Frequency Synthesizers DDS/PLL
+#
+
+#
+# Clock Generator/Distribution
+#
+CONFIG_AD9523=m
+# end of Clock Generator/Distribution
+
+#
+# Phase-Locked Loop (PLL) frequency synthesizers
+#
+CONFIG_ADF4350=m
+CONFIG_ADF4371=m
+CONFIG_ADF4377=m
+CONFIG_ADMFM2000=m
+CONFIG_ADMV1013=m
+CONFIG_ADMV1014=m
+CONFIG_ADMV4420=m
+CONFIG_ADRF6780=m
+# end of Phase-Locked Loop (PLL) frequency synthesizers
+# end of Frequency Synthesizers DDS/PLL
+
+#
+# Digital gyroscope sensors
+#
+CONFIG_ADIS16080=m
+CONFIG_ADIS16130=m
+CONFIG_ADIS16136=m
+CONFIG_ADIS16260=m
+CONFIG_ADXRS290=m
+CONFIG_ADXRS450=m
+CONFIG_BMG160=m
+CONFIG_BMG160_I2C=m
+CONFIG_BMG160_SPI=m
+CONFIG_FXAS21002C=m
+CONFIG_FXAS21002C_I2C=m
+CONFIG_FXAS21002C_SPI=m
+CONFIG_HID_SENSOR_GYRO_3D=m
+CONFIG_MPU3050=m
+CONFIG_MPU3050_I2C=m
+CONFIG_IIO_ST_GYRO_3AXIS=m
+CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
+CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
+CONFIG_ITG3200=m
+# end of Digital gyroscope sensors
+
+#
+# Health Sensors
+#
+
+#
+# Heart Rate Monitors
+#
+CONFIG_AFE4403=m
+CONFIG_AFE4404=m
+CONFIG_MAX30100=m
+CONFIG_MAX30102=m
+# end of Heart Rate Monitors
+# end of Health Sensors
+
+#
+# Humidity sensors
+#
+CONFIG_AM2315=m
+CONFIG_DHT11=m
+CONFIG_HDC100X=m
+CONFIG_HDC2010=m
+CONFIG_HDC3020=m
+CONFIG_HID_SENSOR_HUMIDITY=m
+CONFIG_HTS221=m
+CONFIG_HTS221_I2C=m
+CONFIG_HTS221_SPI=m
+CONFIG_HTU21=m
+CONFIG_SI7005=m
+CONFIG_SI7020=m
+# end of Humidity sensors
+
+#
+# Inertial measurement units
+#
+CONFIG_ADIS16400=m
+CONFIG_ADIS16460=m
+CONFIG_ADIS16475=m
+CONFIG_ADIS16480=m
+CONFIG_BMI160=m
+CONFIG_BMI160_I2C=m
+CONFIG_BMI160_SPI=m
+CONFIG_BMI323=m
+CONFIG_BMI323_I2C=m
+CONFIG_BMI323_SPI=m
+CONFIG_BOSCH_BNO055=m
+CONFIG_BOSCH_BNO055_SERIAL=m
+CONFIG_BOSCH_BNO055_I2C=m
+CONFIG_FXOS8700=m
+CONFIG_FXOS8700_I2C=m
+CONFIG_FXOS8700_SPI=m
+CONFIG_KMX61=m
+CONFIG_INV_ICM42600=m
+CONFIG_INV_ICM42600_I2C=m
+CONFIG_INV_ICM42600_SPI=m
+CONFIG_INV_MPU6050_IIO=m
+CONFIG_INV_MPU6050_I2C=m
+CONFIG_INV_MPU6050_SPI=m
+CONFIG_IIO_ST_LSM6DSX=m
+CONFIG_IIO_ST_LSM6DSX_I2C=m
+CONFIG_IIO_ST_LSM6DSX_SPI=m
+CONFIG_IIO_ST_LSM9DS0=m
+CONFIG_IIO_ST_LSM9DS0_I2C=m
+CONFIG_IIO_ST_LSM9DS0_SPI=m
+# end of Inertial measurement units
+
+CONFIG_IIO_ADIS_LIB=m
+CONFIG_IIO_ADIS_LIB_BUFFER=y
+
+#
+# Light sensors
+#
+CONFIG_ACPI_ALS=m
+CONFIG_ADJD_S311=m
+CONFIG_ADUX1020=m
+CONFIG_AL3010=m
+CONFIG_AL3320A=m
+CONFIG_APDS9300=m
+CONFIG_APDS9306=m
+CONFIG_APDS9960=m
+CONFIG_AS73211=m
+CONFIG_BH1750=m
+CONFIG_BH1780=m
+CONFIG_CM32181=m
+CONFIG_CM3232=m
+CONFIG_CM3323=m
+CONFIG_CM3605=m
+CONFIG_CM36651=m
+CONFIG_IIO_CROS_EC_LIGHT_PROX=m
+CONFIG_GP2AP002=m
+CONFIG_GP2AP020A00F=m
+CONFIG_IQS621_ALS=m
+CONFIG_SENSORS_ISL29018=m
+CONFIG_SENSORS_ISL29028=m
+CONFIG_ISL29125=m
+CONFIG_ISL76682=m
+CONFIG_HID_SENSOR_ALS=m
+CONFIG_HID_SENSOR_PROX=m
+CONFIG_JSA1212=m
+CONFIG_ROHM_BU27008=m
+CONFIG_ROHM_BU27034=m
+CONFIG_RPR0521=m
+CONFIG_SENSORS_LM3533=m
+CONFIG_LTR390=m
+CONFIG_LTR501=m
+CONFIG_LTRF216A=m
+CONFIG_LV0104CS=m
+CONFIG_MAX44000=m
+CONFIG_MAX44009=m
+CONFIG_NOA1305=m
+CONFIG_OPT3001=m
+CONFIG_OPT4001=m
+CONFIG_PA12203001=m
+CONFIG_SI1133=m
+CONFIG_SI1145=m
+CONFIG_STK3310=m
+CONFIG_ST_UVIS25=m
+CONFIG_ST_UVIS25_I2C=m
+CONFIG_ST_UVIS25_SPI=m
+CONFIG_TCS3414=m
+CONFIG_TCS3472=m
+CONFIG_SENSORS_TSL2563=m
+CONFIG_TSL2583=m
+CONFIG_TSL2591=m
+CONFIG_TSL2772=m
+CONFIG_TSL4531=m
+CONFIG_US5182D=m
+CONFIG_VCNL4000=m
+CONFIG_VCNL4035=m
+CONFIG_VEML6030=m
+CONFIG_VEML6040=m
+CONFIG_VEML6070=m
+CONFIG_VEML6075=m
+CONFIG_VL6180=m
+CONFIG_ZOPT2201=m
+# end of Light sensors
+
+#
+# Magnetometer sensors
+#
+CONFIG_AK8974=m
+CONFIG_AK8975=m
+CONFIG_AK09911=m
+CONFIG_BMC150_MAGN=m
+CONFIG_BMC150_MAGN_I2C=m
+CONFIG_BMC150_MAGN_SPI=m
+CONFIG_MAG3110=m
+CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
+CONFIG_MMC35240=m
+CONFIG_IIO_ST_MAGN_3AXIS=m
+CONFIG_IIO_ST_MAGN_I2C_3AXIS=m
+CONFIG_IIO_ST_MAGN_SPI_3AXIS=m
+CONFIG_SENSORS_HMC5843=m
+CONFIG_SENSORS_HMC5843_I2C=m
+CONFIG_SENSORS_HMC5843_SPI=m
+CONFIG_SENSORS_RM3100=m
+CONFIG_SENSORS_RM3100_I2C=m
+CONFIG_SENSORS_RM3100_SPI=m
+CONFIG_TI_TMAG5273=m
+CONFIG_YAMAHA_YAS530=m
+# end of Magnetometer sensors
+
+#
+# Multiplexers
+#
+CONFIG_IIO_MUX=m
+# end of Multiplexers
+
+#
+# Inclinometer sensors
+#
+CONFIG_HID_SENSOR_INCLINOMETER_3D=m
+CONFIG_HID_SENSOR_DEVICE_ROTATION=m
+# end of Inclinometer sensors
+
+#
+# Triggers - standalone
+#
+CONFIG_IIO_HRTIMER_TRIGGER=m
+CONFIG_IIO_INTERRUPT_TRIGGER=m
+CONFIG_IIO_TIGHTLOOP_TRIGGER=m
+CONFIG_IIO_SYSFS_TRIGGER=m
+# end of Triggers - standalone
+
+#
+# Linear and angular position sensors
+#
+CONFIG_IQS624_POS=m
+CONFIG_HID_SENSOR_CUSTOM_INTEL_HINGE=m
+# end of Linear and angular position sensors
+
+#
+# Digital potentiometers
+#
+CONFIG_AD5110=m
+CONFIG_AD5272=m
+CONFIG_DS1803=m
+CONFIG_MAX5432=m
+CONFIG_MAX5481=m
+CONFIG_MAX5487=m
+CONFIG_MCP4018=m
+CONFIG_MCP4131=m
+CONFIG_MCP4531=m
+CONFIG_MCP41010=m
+CONFIG_TPL0102=m
+CONFIG_X9250=m
+# end of Digital potentiometers
+
+#
+# Digital potentiostats
+#
+CONFIG_LMP91000=m
+# end of Digital potentiostats
+
+#
+# Pressure sensors
+#
+CONFIG_ABP060MG=m
+CONFIG_ROHM_BM1390=m
+CONFIG_BMP280=m
+CONFIG_BMP280_I2C=m
+CONFIG_BMP280_SPI=m
+CONFIG_IIO_CROS_EC_BARO=m
+CONFIG_DLHL60D=m
+CONFIG_DPS310=m
+CONFIG_HID_SENSOR_PRESS=m
+CONFIG_HP03=m
+CONFIG_HSC030PA=m
+CONFIG_HSC030PA_I2C=m
+CONFIG_HSC030PA_SPI=m
+CONFIG_ICP10100=m
+CONFIG_MPL115=m
+CONFIG_MPL115_I2C=m
+CONFIG_MPL115_SPI=m
+CONFIG_MPL3115=m
+CONFIG_MPRLS0025PA=m
+CONFIG_MPRLS0025PA_I2C=m
+CONFIG_MPRLS0025PA_SPI=m
+CONFIG_MS5611=m
+CONFIG_MS5611_I2C=m
+CONFIG_MS5611_SPI=m
+CONFIG_MS5637=m
+CONFIG_IIO_ST_PRESS=m
+CONFIG_IIO_ST_PRESS_I2C=m
+CONFIG_IIO_ST_PRESS_SPI=m
+CONFIG_T5403=m
+CONFIG_HP206C=m
+CONFIG_ZPA2326=m
+CONFIG_ZPA2326_I2C=m
+CONFIG_ZPA2326_SPI=m
+# end of Pressure sensors
+
+#
+# Lightning sensors
+#
+CONFIG_AS3935=m
+# end of Lightning sensors
+
+#
+# Proximity and distance sensors
+#
+CONFIG_CROS_EC_MKBP_PROXIMITY=m
+CONFIG_IRSD200=m
+CONFIG_ISL29501=m
+CONFIG_LIDAR_LITE_V2=m
+CONFIG_MB1232=m
+CONFIG_PING=m
+CONFIG_RFD77402=m
+CONFIG_SRF04=m
+CONFIG_SX_COMMON=m
+CONFIG_SX9310=m
+CONFIG_SX9324=m
+CONFIG_SX9360=m
+CONFIG_SX9500=m
+CONFIG_SRF08=m
+CONFIG_VCNL3020=m
+CONFIG_VL53L0X_I2C=m
+# end of Proximity and distance sensors
+
+#
+# Resolver to digital converters
+#
+CONFIG_AD2S90=m
+CONFIG_AD2S1200=m
+CONFIG_AD2S1210=m
+# end of Resolver to digital converters
+
+#
+# Temperature sensors
+#
+CONFIG_IQS620AT_TEMP=m
+CONFIG_LTC2983=m
+CONFIG_MAXIM_THERMOCOUPLE=m
+CONFIG_HID_SENSOR_TEMP=m
+CONFIG_MLX90614=m
+CONFIG_MLX90632=m
+CONFIG_MLX90635=m
+CONFIG_TMP006=m
+CONFIG_TMP007=m
+CONFIG_TMP117=m
+CONFIG_TSYS01=m
+CONFIG_TSYS02D=m
+CONFIG_MAX30208=m
+CONFIG_MAX31856=m
+CONFIG_MAX31865=m
+CONFIG_MCP9600=m
+# end of Temperature sensors
+
+CONFIG_NTB=m
+CONFIG_NTB_MSI=y
+CONFIG_NTB_AMD=m
+CONFIG_NTB_IDT=m
+CONFIG_NTB_INTEL=m
+CONFIG_NTB_EPF=m
+CONFIG_NTB_SWITCHTEC=m
+# CONFIG_NTB_PINGPONG is not set
+# CONFIG_NTB_TOOL is not set
+# CONFIG_NTB_PERF is not set
+# CONFIG_NTB_MSI_TEST is not set
+CONFIG_NTB_TRANSPORT=m
+CONFIG_PWM=y
+# CONFIG_PWM_DEBUG is not set
+CONFIG_PWM_CLK=m
+CONFIG_PWM_CRC=m
+CONFIG_PWM_CROS_EC=m
+CONFIG_PWM_DWC_CORE=m
+CONFIG_PWM_DWC=m
+CONFIG_PWM_GPIO=m
+CONFIG_PWM_IQS620A=m
+CONFIG_PWM_LP3943=m
+CONFIG_PWM_LPSS=m
+CONFIG_PWM_LPSS_PCI=m
+CONFIG_PWM_LPSS_PLATFORM=m
+CONFIG_PWM_PCA9685=m
+CONFIG_PWM_TWL=m
+CONFIG_PWM_TWL_LED=m
+
+#
+# IRQ chip support
+#
+CONFIG_LAN966X_OIC=m
+CONFIG_MADERA_IRQ=m
+# end of IRQ chip support
+
+CONFIG_IPACK_BUS=m
+CONFIG_BOARD_TPCI200=m
+CONFIG_SERIAL_IPOCTAL=m
+CONFIG_RESET_CONTROLLER=y
+CONFIG_RESET_GPIO=m
+CONFIG_RESET_TI_SYSCON=m
+CONFIG_RESET_TI_TPS380X=m
+
+#
+# PHY Subsystem
+#
+CONFIG_GENERIC_PHY=y
+CONFIG_GENERIC_PHY_MIPI_DPHY=y
+CONFIG_USB_LGM_PHY=m
+CONFIG_PHY_CAN_TRANSCEIVER=m
+
+#
+# PHY drivers for Broadcom platforms
+#
+CONFIG_BCM_KONA_USB2_PHY=m
+# end of PHY drivers for Broadcom platforms
+
+CONFIG_PHY_PXA_28NM_HSIC=m
+CONFIG_PHY_PXA_28NM_USB2=m
+CONFIG_PHY_CPCAP_USB=m
+CONFIG_PHY_QCOM_USB_HS=m
+CONFIG_PHY_QCOM_USB_HSIC=m
+CONFIG_PHY_SAMSUNG_USB2=m
+CONFIG_PHY_TUSB1210=m
+CONFIG_PHY_INTEL_LGM_EMMC=m
+# end of PHY Subsystem
+
+CONFIG_POWERCAP=y
+CONFIG_INTEL_RAPL_CORE=m
+CONFIG_INTEL_RAPL=m
+CONFIG_INTEL_RAPL_TPMI=m
+CONFIG_IDLE_INJECT=y
+CONFIG_MCB=m
+CONFIG_MCB_PCI=m
+CONFIG_MCB_LPC=m
+
+#
+# Performance monitor support
+#
+CONFIG_DWC_PCIE_PMU=m
+CONFIG_CXL_PMU=m
+# end of Performance monitor support
+
+CONFIG_RAS=y
+CONFIG_RAS_CEC=y
+# CONFIG_RAS_CEC_DEBUG is not set
+CONFIG_AMD_ATL=m
+CONFIG_RAS_FMPM=m
+CONFIG_USB4=m
+# CONFIG_USB4_DEBUGFS_WRITE is not set
+# CONFIG_USB4_DMA_TEST is not set
+
+#
+# Android
+#
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_BINDERFS=y
+CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
+# CONFIG_ANDROID_BINDER_IPC_SELFTEST is not set
+# end of Android
+
+CONFIG_LIBNVDIMM=m
+CONFIG_BLK_DEV_PMEM=m
+CONFIG_ND_CLAIM=y
+CONFIG_ND_BTT=m
+CONFIG_BTT=y
+CONFIG_ND_PFN=m
+CONFIG_NVDIMM_PFN=y
+CONFIG_NVDIMM_DAX=y
+CONFIG_NVDIMM_KEYS=y
+# CONFIG_NVDIMM_SECURITY_TEST is not set
+CONFIG_DAX=y
+CONFIG_DEV_DAX=m
+CONFIG_DEV_DAX_PMEM=m
+CONFIG_DEV_DAX_HMEM=m
+CONFIG_DEV_DAX_CXL=m
+CONFIG_DEV_DAX_HMEM_DEVICES=y
+CONFIG_DEV_DAX_KMEM=m
+CONFIG_NVMEM=y
+CONFIG_NVMEM_SYSFS=y
+# CONFIG_NVMEM_LAYOUTS is not set
+CONFIG_NVMEM_RAVE_SP_EEPROM=m
+CONFIG_NVMEM_RMEM=m
+
+#
+# HW tracing support
+#
+CONFIG_STM=m
+CONFIG_STM_PROTO_BASIC=m
+CONFIG_STM_PROTO_SYS_T=m
+# CONFIG_STM_DUMMY is not set
+CONFIG_STM_SOURCE_CONSOLE=m
+CONFIG_STM_SOURCE_HEARTBEAT=m
+CONFIG_STM_SOURCE_FTRACE=m
+CONFIG_INTEL_TH=m
+CONFIG_INTEL_TH_PCI=m
+CONFIG_INTEL_TH_ACPI=m
+CONFIG_INTEL_TH_GTH=m
+CONFIG_INTEL_TH_STH=m
+CONFIG_INTEL_TH_MSU=m
+CONFIG_INTEL_TH_PTI=m
+# CONFIG_INTEL_TH_DEBUG is not set
+# end of HW tracing support
+
+CONFIG_FPGA=m
+CONFIG_ALTERA_PR_IP_CORE=m
+CONFIG_FPGA_MGR_ALTERA_PS_SPI=m
+CONFIG_FPGA_MGR_ALTERA_CVP=m
+CONFIG_FPGA_MGR_XILINX_CORE=m
+CONFIG_FPGA_MGR_XILINX_SELECTMAP=m
+CONFIG_FPGA_MGR_XILINX_SPI=m
+CONFIG_FPGA_MGR_MACHXO2_SPI=m
+CONFIG_FPGA_BRIDGE=m
+CONFIG_ALTERA_FREEZE_BRIDGE=m
+CONFIG_XILINX_PR_DECOUPLER=m
+CONFIG_FPGA_REGION=m
+CONFIG_FPGA_DFL=m
+CONFIG_FPGA_DFL_FME=m
+CONFIG_FPGA_DFL_FME_MGR=m
+CONFIG_FPGA_DFL_FME_BRIDGE=m
+CONFIG_FPGA_DFL_FME_REGION=m
+CONFIG_FPGA_DFL_AFU=m
+CONFIG_FPGA_DFL_NIOS_INTEL_PAC_N3000=m
+CONFIG_FPGA_DFL_PCI=m
+CONFIG_FPGA_M10_BMC_SEC_UPDATE=m
+CONFIG_FPGA_MGR_MICROCHIP_SPI=m
+CONFIG_FPGA_MGR_LATTICE_SYSCONFIG=m
+CONFIG_FPGA_MGR_LATTICE_SYSCONFIG_SPI=m
+CONFIG_TEE=m
+CONFIG_AMDTEE=m
+CONFIG_MULTIPLEXER=m
+
+#
+# Multiplexer drivers
+#
+CONFIG_MUX_ADG792A=m
+CONFIG_MUX_ADGS1408=m
+CONFIG_MUX_GPIO=m
+# end of Multiplexer drivers
+
+CONFIG_PM_OPP=y
+CONFIG_SIOX=m
+CONFIG_SIOX_BUS_GPIO=m
+CONFIG_SLIMBUS=m
+CONFIG_SLIM_QCOM_CTRL=m
+CONFIG_INTERCONNECT=y
+CONFIG_COUNTER=m
+CONFIG_INTEL_QEP=m
+CONFIG_INTERRUPT_CNT=m
+CONFIG_MOST=m
+CONFIG_MOST_USB_HDM=m
+CONFIG_MOST_CDEV=m
+CONFIG_MOST_SND=m
+# CONFIG_PECI is not set
+CONFIG_HTE=y
+CONFIG_DPLL=y
+# end of Device Drivers
+
+#
+# File systems
+#
+CONFIG_DCACHE_WORD_ACCESS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_FS_IOMAP=y
+CONFIG_FS_STACK=y
+CONFIG_BUFFER_HEAD=y
+CONFIG_LEGACY_DIRECT_IO=y
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_USE_FOR_EXT2=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_EXT4_DEBUG is not set
+CONFIG_JBD2=m
+# CONFIG_JBD2_DEBUG is not set
+CONFIG_FS_MBCACHE=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_SUPPORT_V4=y
+CONFIG_XFS_SUPPORT_ASCII_CI=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DRAIN_INTENTS=y
+CONFIG_XFS_LIVE_HOOKS=y
+CONFIG_XFS_MEMORY_BUFS=y
+CONFIG_XFS_BTREE_IN_MEM=y
+CONFIG_XFS_ONLINE_SCRUB=y
+# CONFIG_XFS_ONLINE_SCRUB_STATS is not set
+CONFIG_XFS_ONLINE_REPAIR=y
+# CONFIG_XFS_WARN is not set
+# CONFIG_XFS_DEBUG is not set
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_OCFS2_FS_O2CB=m
+CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
+CONFIG_OCFS2_FS_STATS=y
+CONFIG_OCFS2_DEBUG_MASKLOG=y
+# CONFIG_OCFS2_DEBUG_FS is not set
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
+# CONFIG_BTRFS_DEBUG is not set
+# CONFIG_BTRFS_ASSERT is not set
+# CONFIG_BTRFS_FS_REF_VERIFY is not set
+CONFIG_NILFS2_FS=m
+CONFIG_F2FS_FS=m
+CONFIG_F2FS_STAT_FS=y
+CONFIG_F2FS_FS_XATTR=y
+CONFIG_F2FS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_CHECK_FS=y
+# CONFIG_F2FS_FAULT_INJECTION is not set
+CONFIG_F2FS_FS_COMPRESSION=y
+CONFIG_F2FS_FS_LZO=y
+CONFIG_F2FS_FS_LZORLE=y
+CONFIG_F2FS_FS_LZ4=y
+CONFIG_F2FS_FS_LZ4HC=y
+CONFIG_F2FS_FS_ZSTD=y
+CONFIG_F2FS_IOSTAT=y
+CONFIG_F2FS_UNFAIR_RWSEM=y
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_TESTS is not set
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN=y
+CONFIG_ZONEFS_FS=m
+CONFIG_FS_DAX=y
+CONFIG_FS_DAX_PMD=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_EXPORTFS=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FILE_LOCKING=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_ENCRYPTION_ALGS=m
+CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_QUOTA_DEBUG is not set
+CONFIG_QUOTA_TREE=m
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=y
+CONFIG_FUSE_DAX=y
+CONFIG_FUSE_PASSTHROUGH=y
+CONFIG_OVERLAY_FS=m
+CONFIG_OVERLAY_FS_REDIRECT_DIR=y
+# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
+CONFIG_OVERLAY_FS_INDEX=y
+CONFIG_OVERLAY_FS_XINO_AUTO=y
+CONFIG_OVERLAY_FS_METACOPY=y
+# CONFIG_OVERLAY_FS_DEBUG is not set
+
+#
+# Caches
+#
+CONFIG_NETFS_SUPPORT=m
+CONFIG_NETFS_STATS=y
+CONFIG_NETFS_DEBUG=y
+CONFIG_FSCACHE=y
+CONFIG_FSCACHE_STATS=y
+CONFIG_CACHEFILES=m
+# CONFIG_CACHEFILES_DEBUG is not set
+# CONFIG_CACHEFILES_ERROR_INJECTION is not set
+CONFIG_CACHEFILES_ONDEMAND=y
+# end of Caches
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+# end of CD-ROM/DVD Filesystems
+
+#
+# DOS/FAT/EXFAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_FAT_DEFAULT_UTF8=y
+CONFIG_EXFAT_FS=m
+CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
+CONFIG_NTFS3_FS=m
+# CONFIG_NTFS3_64BIT_CLUSTER is not set
+CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_FS_POSIX_ACL=y
+# CONFIG_NTFS_FS is not set
+# end of DOS/FAT/EXFAT/NT Filesystems
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_VMCORE=y
+CONFIG_PROC_VMCORE_DEVICE_DUMP=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_PROC_CHILDREN=y
+CONFIG_PROC_PID_ARCH_STATUS=y
+CONFIG_PROC_CPU_RESCTRL=y
+CONFIG_KERNFS=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_INODE64=y
+CONFIG_TMPFS_QUOTA=y
+CONFIG_HUGETLBFS=y
+# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
+CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_EFIVAR_FS=y
+# end of Pseudo filesystems
+
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_ORANGEFS_FS=m
+# CONFIG_ADFS_FS is not set
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=m
+# CONFIG_ECRYPT_FS_MESSAGING is not set
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_FS_POSIX_ACL=y
+CONFIG_JFFS2_FS_SECURITY=y
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_RTIME=y
+CONFIG_UBIFS_FS=m
+# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_ZLIB=y
+CONFIG_UBIFS_FS_ZSTD=y
+CONFIG_UBIFS_ATIME_SUPPORT=y
+CONFIG_UBIFS_FS_XATTR=y
+CONFIG_UBIFS_FS_SECURITY=y
+CONFIG_UBIFS_FS_AUTHENTICATION=y
+CONFIG_CRAMFS=m
+CONFIG_CRAMFS_BLOCKDEV=y
+CONFIG_CRAMFS_MTD=y
+CONFIG_SQUASHFS=m
+# CONFIG_SQUASHFS_FILE_CACHE is not set
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_DECOMP_SINGLE=y
+CONFIG_SQUASHFS_DECOMP_MULTI=y
+CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS=y
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_ZLIB=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
+# CONFIG_SQUASHFS_EMBEDDED is not set
+CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
+# CONFIG_VXFS_FS is not set
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX6FS_FS is not set
+CONFIG_ROMFS_FS=m
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240
+CONFIG_PSTORE_COMPRESS=y
+# CONFIG_PSTORE_CONSOLE is not set
+# CONFIG_PSTORE_PMSG is not set
+# CONFIG_PSTORE_FTRACE is not set
+CONFIG_PSTORE_RAM=m
+CONFIG_PSTORE_ZONE=m
+CONFIG_PSTORE_BLK=m
+CONFIG_PSTORE_BLK_BLKDEV=""
+CONFIG_PSTORE_BLK_KMSG_SIZE=64
+CONFIG_PSTORE_BLK_MAX_REASON=2
+# CONFIG_SYSV_FS is not set
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_UFS_DEBUG is not set
+CONFIG_EROFS_FS=m
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS_XATTR=y
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_ZIP=y
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_ZIP_DEFLATE=y
+CONFIG_EROFS_FS_ZIP_ZSTD=y
+CONFIG_EROFS_FS_ONDEMAND=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
+CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI=y
+CONFIG_VBOXSF_FS=m
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
+CONFIG_NFS_V3=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_PNFS_FILE_LAYOUT=m
+CONFIG_PNFS_BLOCK=m
+CONFIG_PNFS_FLEXFILE_LAYOUT=m
+CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
+# CONFIG_NFS_V4_1_MIGRATION is not set
+CONFIG_NFS_V4_SECURITY_LABEL=y
+CONFIG_NFS_FSCACHE=y
+# CONFIG_NFS_USE_LEGACY_DNS is not set
+CONFIG_NFS_USE_KERNEL_DNS=y
+CONFIG_NFS_DEBUG=y
+# CONFIG_NFS_DISABLE_UDP_SUPPORT is not set
+# CONFIG_NFS_V4_2_READ_PLUS is not set
+CONFIG_NFSD=m
+# CONFIG_NFSD_V2 is not set
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_PNFS=y
+CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_NFSD_SCSILAYOUT=y
+# CONFIG_NFSD_FLEXFILELAYOUT is not set
+CONFIG_NFSD_V4_2_INTER_SSC=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
+CONFIG_GRACE_PERIOD=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_NFS_V4_2_SSC_HELPER=y
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_SUNRPC_BACKCHANNEL=y
+CONFIG_SUNRPC_SWAP=y
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_SUNRPC_XPRT_RDMA=m
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS2 is not set
+CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DEBUG=y
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
+CONFIG_CIFS_SMB_DIRECT=y
+CONFIG_CIFS_FSCACHE=y
+CONFIG_SMB_SERVER=m
+CONFIG_SMB_SERVER_SMBDIRECT=y
+CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN=y
+CONFIG_SMB_SERVER_KERBEROS5=y
+CONFIG_SMBFS=m
+CONFIG_CODA_FS=m
+CONFIG_AFS_FS=m
+# CONFIG_AFS_DEBUG is not set
+CONFIG_AFS_FSCACHE=y
+# CONFIG_AFS_DEBUG_CURSOR is not set
+CONFIG_9P_FS=m
+CONFIG_9P_FSCACHE=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_MAC_ROMAN=m
+CONFIG_NLS_MAC_CELTIC=m
+CONFIG_NLS_MAC_CENTEURO=m
+CONFIG_NLS_MAC_CROATIAN=m
+CONFIG_NLS_MAC_CYRILLIC=m
+CONFIG_NLS_MAC_GAELIC=m
+CONFIG_NLS_MAC_GREEK=m
+CONFIG_NLS_MAC_ICELAND=m
+CONFIG_NLS_MAC_INUIT=m
+CONFIG_NLS_MAC_ROMANIAN=m
+CONFIG_NLS_MAC_TURKISH=m
+CONFIG_NLS_UTF8=m
+CONFIG_NLS_UCS2_UTILS=m
+CONFIG_DLM=m
+CONFIG_DLM_DEBUG=y
+CONFIG_UNICODE=y
+# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
+CONFIG_IO_WQ=y
+# end of File systems
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+CONFIG_KEYS_REQUEST_CACHE=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_TRUSTED_KEYS=m
+CONFIG_HAVE_TRUSTED_KEYS=y
+CONFIG_TRUSTED_KEYS_TPM=y
+CONFIG_TRUSTED_KEYS_TEE=y
+CONFIG_ENCRYPTED_KEYS=m
+# CONFIG_USER_DECRYPTED_DATA is not set
+CONFIG_KEY_DH_OPERATIONS=y
+CONFIG_KEY_NOTIFICATIONS=y
+CONFIG_SECURITY_DMESG_RESTRICT=y
+CONFIG_PROC_MEM_ALWAYS_FORCE=y
+# CONFIG_PROC_MEM_FORCE_PTRACE is not set
+# CONFIG_PROC_MEM_NO_FORCE is not set
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_INFINIBAND=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_SECURITY_PATH=y
+# CONFIG_INTEL_TXT is not set
+CONFIG_LSM_MMAP_MIN_ADDR=65536
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+# CONFIG_STATIC_USERMODEHELPER is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
+CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
+# CONFIG_SECURITY_SELINUX_DEBUG is not set
+CONFIG_SECURITY_SMACK=y
+CONFIG_SECURITY_SMACK_BRINGUP=y
+CONFIG_SECURITY_SMACK_NETFILTER=y
+CONFIG_SECURITY_SMACK_APPEND_SIGNALS=y
+CONFIG_SECURITY_TOMOYO=y
+CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=2048
+CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=1024
+# CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER is not set
+CONFIG_SECURITY_TOMOYO_POLICY_LOADER="/usr/bin/tomoyo-init"
+CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER="/usr/lib/systemd/systemd"
+# CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING is not set
+CONFIG_SECURITY_APPARMOR=y
+# CONFIG_SECURITY_APPARMOR_DEBUG is not set
+CONFIG_SECURITY_APPARMOR_INTROSPECT_POLICY=y
+CONFIG_SECURITY_APPARMOR_HASH=y
+CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
+CONFIG_SECURITY_APPARMOR_EXPORT_BINARY=y
+CONFIG_SECURITY_APPARMOR_PARANOID_LOAD=y
+CONFIG_SECURITY_LOADPIN=y
+CONFIG_SECURITY_LOADPIN_ENFORCE=y
+CONFIG_SECURITY_YAMA=y
+CONFIG_SECURITY_SAFESETID=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+# CONFIG_SECURITY_LOCKDOWN_LSM_EARLY is not set
+CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y
+# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set
+# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_TRUSTED_KEYRING=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
+CONFIG_INTEGRITY_MACHINE_KEYRING=y
+# CONFIG_INTEGRITY_CA_MACHINE_KEYRING is not set
+CONFIG_LOAD_UEFI_KEYS=y
+CONFIG_INTEGRITY_AUDIT=y
+# CONFIG_IMA is not set
+# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
+# CONFIG_EVM is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_LSM="landlock,lockdown,yama,integrity,bpf"
+
+#
+# Kernel hardening options
+#
+
+#
+# Memory initialization
+#
+CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
+CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
+CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
+# CONFIG_INIT_STACK_NONE is not set
+# CONFIG_INIT_STACK_ALL_PATTERN is not set
+CONFIG_INIT_STACK_ALL_ZERO=y
+# CONFIG_GCC_PLUGIN_STACKLEAK is not set
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
+# CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON is not set
+CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y
+# CONFIG_ZERO_CALL_USED_REGS is not set
+# end of Memory initialization
+
+#
+# Hardening of kernel data structures
+#
+CONFIG_LIST_HARDENED=y
+# CONFIG_BUG_ON_DATA_CORRUPTION is not set
+# end of Hardening of kernel data structures
+
+CONFIG_RANDSTRUCT_NONE=y
+# CONFIG_RANDSTRUCT_FULL is not set
+# CONFIG_RANDSTRUCT_PERFORMANCE is not set
+# end of Kernel hardening options
+# end of Security options
+
+CONFIG_XOR_BLOCKS=m
+CONFIG_ASYNC_CORE=m
+CONFIG_ASYNC_MEMCPY=m
+CONFIG_ASYNC_XOR=m
+CONFIG_ASYNC_PQ=m
+CONFIG_ASYNC_RAID6_RECOV=m
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD=m
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_SIG=y
+CONFIG_CRYPTO_SIG2=y
+CONFIG_CRYPTO_SKCIPHER=y
+CONFIG_CRYPTO_SKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_RNG_DEFAULT=y
+CONFIG_CRYPTO_AKCIPHER2=y
+CONFIG_CRYPTO_AKCIPHER=y
+CONFIG_CRYPTO_KPP2=y
+CONFIG_CRYPTO_KPP=y
+CONFIG_CRYPTO_ACOMP2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_NULL2=m
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_SIMD=m
+CONFIG_CRYPTO_ENGINE=m
+# end of Crypto core or helper
+
+#
+# Public-key cryptography
+#
+CONFIG_CRYPTO_RSA=y
+CONFIG_CRYPTO_DH=y
+CONFIG_CRYPTO_DH_RFC7919_GROUPS=y
+CONFIG_CRYPTO_ECC=y
+CONFIG_CRYPTO_ECDH=y
+CONFIG_CRYPTO_ECDSA=y
+CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
+# end of Public-key cryptography
+
+#
+# Block ciphers
+#
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ARIA=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_BLOWFISH_COMMON=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST_COMMON=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
+# end of Block ciphers
+
+#
+# Length-preserving ciphers and modes
+#
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_CHACHA20=m
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_CTR=y
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XCTR=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_NHPOLY1305=m
+# end of Length-preserving ciphers and modes
+
+#
+# AEAD (authenticated encryption with associated data) ciphers
+#
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_GENIV=m
+CONFIG_CRYPTO_SEQIV=m
+CONFIG_CRYPTO_ECHAINIV=m
+CONFIG_CRYPTO_ESSIV=m
+# end of AEAD (authenticated encryption with associated data) ciphers
+
+#
+# Hashes, digests, and MACs
+#
+CONFIG_CRYPTO_BLAKE2B=m
+CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_GHASH=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_POLYVAL=m
+CONFIG_CRYPTO_POLY1305=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_SHA3=y
+CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_STREEBOG=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_XXHASH=m
+# end of Hashes, digests, and MACs
+
+#
+# CRCs (cyclic redundancy checks)
+#
+CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_CRCT10DIF=y
+CONFIG_CRYPTO_CRC64_ROCKSOFT=y
+# end of CRCs (cyclic redundancy checks)
+
+#
+# Compression
+#
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_LZO=y
+CONFIG_CRYPTO_842=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ZSTD=y
+# end of Compression
+
+#
+# Random number generation
+#
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_DRBG_MENU=y
+CONFIG_CRYPTO_DRBG_HMAC=y
+CONFIG_CRYPTO_DRBG_HASH=y
+CONFIG_CRYPTO_DRBG_CTR=y
+CONFIG_CRYPTO_DRBG=y
+CONFIG_CRYPTO_JITTERENTROPY=y
+CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64
+CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
+CONFIG_CRYPTO_KDF800108_CTR=y
+# end of Random number generation
+
+#
+# Userspace interface
+#
+CONFIG_CRYPTO_USER_API=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set
+CONFIG_CRYPTO_USER_API_AEAD=m
+# CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE is not set
+# end of Userspace interface
+
+CONFIG_CRYPTO_HASH_INFO=y
+
+#
+# Accelerated Cryptographic Algorithms for CPU (x86)
+#
+CONFIG_CRYPTO_CURVE25519_X86=m
+CONFIG_CRYPTO_AES_NI_INTEL=m
+CONFIG_CRYPTO_BLOWFISH_X86_64=m
+CONFIG_CRYPTO_CAMELLIA_X86_64=m
+CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
+CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
+CONFIG_CRYPTO_CAST5_AVX_X86_64=m
+CONFIG_CRYPTO_CAST6_AVX_X86_64=m
+CONFIG_CRYPTO_DES3_EDE_X86_64=m
+CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
+CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
+CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
+CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64=m
+CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64=m
+CONFIG_CRYPTO_TWOFISH_X86_64=m
+CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
+CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
+CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64=m
+CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64=m
+CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64=m
+CONFIG_CRYPTO_CHACHA20_X86_64=m
+CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
+CONFIG_CRYPTO_NHPOLY1305_SSE2=m
+CONFIG_CRYPTO_NHPOLY1305_AVX2=m
+CONFIG_CRYPTO_BLAKE2S_X86=y
+CONFIG_CRYPTO_POLYVAL_CLMUL_NI=m
+CONFIG_CRYPTO_POLY1305_X86_64=m
+CONFIG_CRYPTO_SHA1_SSSE3=m
+CONFIG_CRYPTO_SHA256_SSSE3=m
+CONFIG_CRYPTO_SHA512_SSSE3=m
+CONFIG_CRYPTO_SM3_AVX_X86_64=m
+CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
+CONFIG_CRYPTO_CRC32C_INTEL=m
+CONFIG_CRYPTO_CRC32_PCLMUL=m
+CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
+# end of Accelerated Cryptographic Algorithms for CPU (x86)
+
+CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_DEV_PADLOCK=m
+CONFIG_CRYPTO_DEV_PADLOCK_AES=m
+CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
+CONFIG_CRYPTO_DEV_ATMEL_I2C=m
+CONFIG_CRYPTO_DEV_ATMEL_ECC=m
+CONFIG_CRYPTO_DEV_ATMEL_SHA204A=m
+CONFIG_CRYPTO_DEV_CCP=y
+CONFIG_CRYPTO_DEV_CCP_DD=m
+CONFIG_CRYPTO_DEV_SP_CCP=y
+CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
+CONFIG_CRYPTO_DEV_SP_PSP=y
+CONFIG_CRYPTO_DEV_CCP_DEBUGFS=y
+CONFIG_CRYPTO_DEV_NITROX=m
+CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
+CONFIG_CRYPTO_DEV_QAT=m
+CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
+CONFIG_CRYPTO_DEV_QAT_C3XXX=m
+CONFIG_CRYPTO_DEV_QAT_C62X=m
+CONFIG_CRYPTO_DEV_QAT_4XXX=m
+CONFIG_CRYPTO_DEV_QAT_420XX=m
+CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
+CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
+CONFIG_CRYPTO_DEV_QAT_C62XVF=m
+# CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION is not set
+CONFIG_CRYPTO_DEV_IAA_CRYPTO=m
+# CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS is not set
+CONFIG_CRYPTO_DEV_CHELSIO=m
+CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_CRYPTO_DEV_SAFEXCEL=m
+CONFIG_CRYPTO_DEV_AMLOGIC_GXL=m
+CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS8_PRIVATE_KEY_PARSER=m
+CONFIG_PKCS7_MESSAGE_PARSER=y
+# CONFIG_PKCS7_TEST_KEY is not set
+CONFIG_SIGNED_PE_FILE_VERIFICATION=y
+# CONFIG_FIPS_SIGNATURE_SELFTEST is not set
+
+#
+# Certificates for signature checking
+#
+CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
+# CONFIG_MODULE_SIG_KEY_TYPE_RSA is not set
+CONFIG_MODULE_SIG_KEY_TYPE_ECDSA=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_TRUSTED_KEYS=""
+# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
+CONFIG_SECONDARY_TRUSTED_KEYRING=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
+CONFIG_SYSTEM_REVOCATION_LIST=y
+CONFIG_SYSTEM_REVOCATION_KEYS=""
+CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE=y
+# end of Certificates for signature checking
+
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_RAID6_PQ=m
+# CONFIG_RAID6_PQ_BENCHMARK is not set
+CONFIG_LINEAR_RANGES=y
+CONFIG_PACKING=y
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_STRNCPY_FROM_USER=y
+CONFIG_GENERIC_STRNLEN_USER=y
+CONFIG_GENERIC_NET_UTILS=y
+CONFIG_CORDIC=m
+# CONFIG_PRIME_NUMBERS is not set
+CONFIG_RATIONAL=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
+CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
+CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
+
+#
+# Crypto library routines
+#
+CONFIG_CRYPTO_LIB_UTILS=y
+CONFIG_CRYPTO_LIB_AES=y
+CONFIG_CRYPTO_LIB_AESCFB=y
+CONFIG_CRYPTO_LIB_ARC4=m
+CONFIG_CRYPTO_LIB_GF128MUL=m
+CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=y
+CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
+CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m
+CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
+CONFIG_CRYPTO_LIB_CHACHA=m
+CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_DES=m
+CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
+CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m
+CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
+CONFIG_CRYPTO_LIB_POLY1305=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
+CONFIG_CRYPTO_LIB_SHA1=y
+CONFIG_CRYPTO_LIB_SHA256=y
+# end of Crypto library routines
+
+CONFIG_CRC_CCITT=y
+CONFIG_CRC16=m
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC64_ROCKSOFT=y
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC32=y
+# CONFIG_CRC32_SELFTEST is not set
+CONFIG_CRC32_SLICEBY8=y
+# CONFIG_CRC32_SLICEBY4 is not set
+# CONFIG_CRC32_SARWATE is not set
+# CONFIG_CRC32_BIT is not set
+CONFIG_CRC64=y
+CONFIG_CRC4=m
+CONFIG_CRC7=m
+CONFIG_LIBCRC32C=m
+CONFIG_CRC8=m
+CONFIG_XXHASH=y
+# CONFIG_RANDOM32_SELFTEST is not set
+CONFIG_842_COMPRESS=m
+CONFIG_842_DECOMPRESS=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_LZ4_COMPRESS=m
+CONFIG_LZ4HC_COMPRESS=m
+CONFIG_LZ4_DECOMPRESS=y
+CONFIG_ZSTD_COMMON=y
+CONFIG_ZSTD_COMPRESS=y
+CONFIG_ZSTD_DECOMPRESS=y
+CONFIG_XZ_DEC=y
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_XZ_DEC_MICROLZMA=y
+CONFIG_XZ_DEC_BCJ=y
+# CONFIG_XZ_DEC_TEST is not set
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_XZ=y
+CONFIG_DECOMPRESS_LZO=y
+CONFIG_DECOMPRESS_LZ4=y
+CONFIG_DECOMPRESS_ZSTD=y
+CONFIG_GENERIC_ALLOCATOR=y
+CONFIG_REED_SOLOMON=m
+CONFIG_REED_SOLOMON_ENC8=y
+CONFIG_REED_SOLOMON_DEC8=y
+CONFIG_BCH=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_BTREE=y
+CONFIG_INTERVAL_TREE=y
+CONFIG_INTERVAL_TREE_SPAN_ITER=y
+CONFIG_XARRAY_MULTI=y
+CONFIG_ASSOCIATIVE_ARRAY=y
+CONFIG_CLOSURES=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_IOPORT_MAP=y
+CONFIG_HAS_DMA=y
+CONFIG_DMA_OPS=y
+CONFIG_NEED_SG_DMA_FLAGS=y
+CONFIG_NEED_SG_DMA_LENGTH=y
+CONFIG_NEED_DMA_MAP_STATE=y
+CONFIG_ARCH_DMA_ADDR_T_64BIT=y
+CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
+CONFIG_SWIOTLB=y
+# CONFIG_SWIOTLB_DYNAMIC is not set
+CONFIG_DMA_NEED_SYNC=y
+CONFIG_DMA_COHERENT_POOL=y
+CONFIG_DMA_CMA=y
+# CONFIG_DMA_NUMA_CMA is not set
+
+#
+# Default contiguous memory area size:
+#
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CMA_SIZE_SEL_MBYTES=y
+# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set
+# CONFIG_CMA_SIZE_SEL_MIN is not set
+# CONFIG_CMA_SIZE_SEL_MAX is not set
+CONFIG_CMA_ALIGNMENT=8
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_DMA_MAP_BENCHMARK is not set
+CONFIG_SGL_ALLOC=y
+CONFIG_CHECK_SIGNATURE=y
+CONFIG_CPU_RMAP=y
+CONFIG_DQL=y
+CONFIG_GLOB=y
+# CONFIG_GLOB_SELFTEST is not set
+CONFIG_NLATTR=y
+CONFIG_LRU_CACHE=m
+CONFIG_CLZ_TAB=y
+CONFIG_IRQ_POLL=y
+CONFIG_MPILIB=y
+CONFIG_SIGNATURE=y
+CONFIG_DIMLIB=y
+CONFIG_OID_REGISTRY=y
+CONFIG_UCS2_STRING=y
+CONFIG_HAVE_GENERIC_VDSO=y
+CONFIG_GENERIC_GETTIMEOFDAY=y
+CONFIG_GENERIC_VDSO_TIME_NS=y
+CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT=y
+CONFIG_VDSO_GETRANDOM=y
+CONFIG_FONT_SUPPORT=y
+CONFIG_FONTS=y
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_7x14 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_6x10 is not set
+# CONFIG_FONT_10x18 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+CONFIG_FONT_TER16x32=y
+# CONFIG_FONT_6x8 is not set
+CONFIG_SG_POOL=y
+CONFIG_ARCH_HAS_PMEM_API=y
+CONFIG_MEMREGION=y
+CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION=y
+CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
+CONFIG_ARCH_HAS_COPY_MC=y
+CONFIG_ARCH_STACKWALK=y
+CONFIG_STACKDEPOT=y
+CONFIG_STACKDEPOT_MAX_FRAMES=64
+CONFIG_SBITMAP=y
+CONFIG_PARMAN=m
+CONFIG_OBJAGG=m
+# CONFIG_LWQ_TEST is not set
+# end of Library routines
+
+CONFIG_PLDMFW=y
+CONFIG_ASN1_ENCODER=m
+CONFIG_POLYNOMIAL=m
+CONFIG_FIRMWARE_TABLE=y
+
+#
+# Kernel hacking
+#
+
+#
+# printk and dmesg options
+#
+CONFIG_PRINTK_TIME=y
+# CONFIG_PRINTK_CALLER is not set
+CONFIG_STACKTRACE_BUILD_ID=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
+CONFIG_CONSOLE_LOGLEVEL_QUIET=1
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
+CONFIG_BOOT_PRINTK_DELAY=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DYNAMIC_DEBUG_CORE=y
+CONFIG_SYMBOLIC_ERRNAME=y
+CONFIG_DEBUG_BUGVERBOSE=y
+# end of printk and dmesg options
+
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_MISC is not set
+
+#
+# Compile-time checks and compiler options
+#
+CONFIG_DEBUG_INFO=y
+CONFIG_AS_HAS_NON_CONST_ULEB128=y
+# CONFIG_DEBUG_INFO_NONE is not set
+# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
+# CONFIG_DEBUG_INFO_DWARF4 is not set
+CONFIG_DEBUG_INFO_DWARF5=y
+# CONFIG_DEBUG_INFO_REDUCED is not set
+CONFIG_DEBUG_INFO_COMPRESSED_NONE=y
+# CONFIG_DEBUG_INFO_COMPRESSED_ZLIB is not set
+# CONFIG_DEBUG_INFO_COMPRESSED_ZSTD is not set
+# CONFIG_DEBUG_INFO_SPLIT is not set
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_PAHOLE_HAS_SPLIT_BTF=y
+CONFIG_PAHOLE_HAS_BTF_TAG=y
+CONFIG_PAHOLE_HAS_LANG_EXCLUDE=y
+CONFIG_DEBUG_INFO_BTF_MODULES=y
+# CONFIG_MODULE_ALLOW_BTF_MISMATCH is not set
+CONFIG_GDB_SCRIPTS=y
+CONFIG_FRAME_WARN=2048
+CONFIG_STRIP_ASM_SYMS=y
+# CONFIG_READABLE_ASM is not set
+# CONFIG_HEADERS_INSTALL is not set
+# CONFIG_DEBUG_SECTION_MISMATCH is not set
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_OBJTOOL=y
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
+# end of Compile-time checks and compiler options
+
+#
+# Generic Kernel Debugging Instruments
+#
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
+CONFIG_MAGIC_SYSRQ_SERIAL=y
+CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_FS_ALLOW_ALL=y
+# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set
+# CONFIG_DEBUG_FS_ALLOW_NONE is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARCH_HAS_UBSAN=y
+# CONFIG_UBSAN is not set
+CONFIG_HAVE_ARCH_KCSAN=y
+CONFIG_HAVE_KCSAN_COMPILER=y
+# CONFIG_KCSAN is not set
+# end of Generic Kernel Debugging Instruments
+
+#
+# Networking Debugging
+#
+# CONFIG_NET_DEV_REFCNT_TRACKER is not set
+# CONFIG_NET_NS_REFCNT_TRACKER is not set
+# CONFIG_DEBUG_NET is not set
+# end of Networking Debugging
+
+#
+# Memory Debugging
+#
+# CONFIG_PAGE_EXTENSION is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_PAGE_OWNER is not set
+# CONFIG_PAGE_TABLE_CHECK is not set
+CONFIG_PAGE_POISONING=y
+# CONFIG_DEBUG_PAGE_REF is not set
+CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_ARCH_HAS_DEBUG_WX=y
+CONFIG_DEBUG_WX=y
+CONFIG_GENERIC_PTDUMP=y
+CONFIG_PTDUMP_CORE=y
+# CONFIG_PTDUMP_DEBUGFS is not set
+CONFIG_HAVE_DEBUG_KMEMLEAK=y
+# CONFIG_DEBUG_KMEMLEAK is not set
+# CONFIG_PER_VMA_LOCK_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+CONFIG_SHRINKER_DEBUG=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_VM_PGTABLE is not set
+CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
+# CONFIG_DEBUG_VIRTUAL is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_PER_CPU_MAPS is not set
+CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y
+# CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is not set
+# CONFIG_MEM_ALLOC_PROFILING is not set
+CONFIG_HAVE_ARCH_KASAN=y
+CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
+CONFIG_CC_HAS_KASAN_GENERIC=y
+CONFIG_CC_HAS_KASAN_SW_TAGS=y
+CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
+# CONFIG_KASAN is not set
+CONFIG_HAVE_ARCH_KFENCE=y
+CONFIG_KFENCE=y
+CONFIG_KFENCE_SAMPLE_INTERVAL=100
+CONFIG_KFENCE_NUM_OBJECTS=255
+CONFIG_KFENCE_DEFERRABLE=y
+CONFIG_KFENCE_STRESS_TEST_FAULTS=0
+CONFIG_HAVE_ARCH_KMSAN=y
+CONFIG_HAVE_KMSAN_COMPILER=y
+# CONFIG_KMSAN is not set
+# end of Memory Debugging
+
+CONFIG_DEBUG_SHIRQ=y
+
+#
+# Debug Oops, Lockups and Hangs
+#
+# CONFIG_PANIC_ON_OOPS is not set
+CONFIG_PANIC_ON_OOPS_VALUE=0
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+# CONFIG_HARDLOCKUP_DETECTOR_PREFER_BUDDY is not set
+CONFIG_HARDLOCKUP_DETECTOR_PERF=y
+# CONFIG_HARDLOCKUP_DETECTOR_BUDDY is not set
+# CONFIG_HARDLOCKUP_DETECTOR_ARCH is not set
+CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER=y
+CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
+# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+# CONFIG_WQ_WATCHDOG is not set
+# CONFIG_WQ_CPU_INTENSIVE_REPORT is not set
+# CONFIG_TEST_LOCKUP is not set
+# end of Debug Oops, Lockups and Hangs
+
+#
+# Scheduler Debugging
+#
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+# end of Scheduler Debugging
+
+# CONFIG_DEBUG_TIMEKEEPING is not set
+# CONFIG_DEBUG_PREEMPT is not set
+
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+CONFIG_LOCK_DEBUGGING_SUPPORT=y
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_DEBUG_ATOMIC_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_LOCK_TORTURE_TEST is not set
+# CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_SCF_TORTURE_TEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
+# end of Lock Debugging (spinlocks, mutexes, etc...)
+
+# CONFIG_NMI_CHECK_CPU is not set
+# CONFIG_DEBUG_IRQFLAGS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+# CONFIG_DEBUG_KOBJECT is not set
+
+#
+# Debug kernel data structures
+#
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_PLIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CLOSURES is not set
+# CONFIG_DEBUG_MAPLE_TREE is not set
+# end of Debug kernel data structures
+
+#
+# RCU Debugging
+#
+# CONFIG_RCU_SCALE_TEST is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_REF_SCALE_TEST is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0
+# CONFIG_RCU_CPU_STALL_CPUTIME is not set
+# CONFIG_RCU_CPU_STALL_NOTIFIER is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_RCU_EQS_DEBUG is not set
+# end of RCU Debugging
+
+# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
+# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_DEBUG_CGROUP_REF is not set
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_RETHOOK=y
+CONFIG_RETHOOK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_RETVAL=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y
+CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
+CONFIG_HAVE_FENTRY=y
+CONFIG_HAVE_OBJTOOL_MCOUNT=y
+CONFIG_HAVE_OBJTOOL_NOP_MCOUNT=y
+CONFIG_HAVE_C_RECORDMCOUNT=y
+CONFIG_HAVE_BUILDTIME_MCOUNT_SORT=y
+CONFIG_BUILDTIME_MCOUNT_SORT=y
+CONFIG_TRACER_MAX_TRACE=y
+CONFIG_TRACE_CLOCK=y
+CONFIG_RING_BUFFER=y
+CONFIG_EVENT_TRACING=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+CONFIG_TRACING=y
+CONFIG_GENERIC_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_FTRACE=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
+CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y
+CONFIG_FPROBE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+CONFIG_SCHED_TRACER=y
+CONFIG_HWLAT_TRACER=y
+CONFIG_OSNOISE_TRACER=y
+CONFIG_TIMERLAT_TRACER=y
+CONFIG_MMIOTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT=y
+# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FPROBE_EVENTS=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
+CONFIG_KPROBE_EVENTS=y
+# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
+CONFIG_UPROBE_EVENTS=y
+CONFIG_BPF_EVENTS=y
+CONFIG_DYNAMIC_EVENTS=y
+CONFIG_PROBE_EVENTS=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_FTRACE_MCOUNT_RECORD=y
+CONFIG_FTRACE_MCOUNT_USE_CC=y
+CONFIG_TRACING_MAP=y
+CONFIG_SYNTH_EVENTS=y
+CONFIG_USER_EVENTS=y
+CONFIG_HIST_TRIGGERS=y
+# CONFIG_TRACE_EVENT_INJECT is not set
+# CONFIG_TRACEPOINT_BENCHMARK is not set
+# CONFIG_RING_BUFFER_BENCHMARK is not set
+# CONFIG_TRACE_EVAL_MAP_FILE is not set
+# CONFIG_FTRACE_RECORD_RECURSION is not set
+# CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_FTRACE_SORT_STARTUP_TEST is not set
+# CONFIG_RING_BUFFER_STARTUP_TEST is not set
+# CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is not set
+# CONFIG_MMIOTRACE_TEST is not set
+# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
+# CONFIG_SYNTH_EVENT_GEN_TEST is not set
+# CONFIG_KPROBE_EVENT_GEN_TEST is not set
+# CONFIG_HIST_TRIGGERS_DEBUG is not set
+# CONFIG_RV is not set
+# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
+CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
+CONFIG_STRICT_DEVMEM=y
+CONFIG_IO_STRICT_DEVMEM=y
+
+#
+# x86 Debugging
+#
+CONFIG_EARLY_PRINTK_USB=y
+# CONFIG_X86_VERBOSE_BOOTUP is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_EARLY_PRINTK_DBGP=y
+CONFIG_EARLY_PRINTK_USB_XDBC=y
+# CONFIG_EFI_PGT_DUMP is not set
+# CONFIG_DEBUG_TLBFLUSH is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
+CONFIG_X86_DECODER_SELFTEST=y
+CONFIG_IO_DELAY_0X80=y
+# CONFIG_IO_DELAY_0XED is not set
+# CONFIG_IO_DELAY_UDELAY is not set
+# CONFIG_IO_DELAY_NONE is not set
+CONFIG_DEBUG_BOOT_PARAMS=y
+# CONFIG_CPA_DEBUG is not set
+# CONFIG_DEBUG_ENTRY is not set
+# CONFIG_DEBUG_NMI_SELFTEST is not set
+# CONFIG_X86_DEBUG_FPU is not set
+# CONFIG_PUNIT_ATOM_DEBUG is not set
+CONFIG_UNWINDER_ORC=y
+# CONFIG_UNWINDER_FRAME_POINTER is not set
+# end of x86 Debugging
+
+#
+# Kernel Testing and Coverage
+#
+# CONFIG_KUNIT is not set
+# CONFIG_NOTIFIER_ERROR_INJECTION is not set
+CONFIG_FUNCTION_ERROR_INJECTION=y
+# CONFIG_FAULT_INJECTION is not set
+CONFIG_ARCH_HAS_KCOV=y
+CONFIG_CC_HAS_SANCOV_TRACE_PC=y
+# CONFIG_KCOV is not set
+CONFIG_RUNTIME_TESTING_MENU=y
+# CONFIG_TEST_DHRY is not set
+# CONFIG_LKDTM is not set
+# CONFIG_TEST_MIN_HEAP is not set
+# CONFIG_TEST_DIV64 is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_TEST_REF_TRACKER is not set
+# CONFIG_RBTREE_TEST is not set
+# CONFIG_REED_SOLOMON_TEST is not set
+# CONFIG_INTERVAL_TREE_TEST is not set
+# CONFIG_PERCPU_TEST is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
+CONFIG_ASYNC_RAID6_TEST=m
+# CONFIG_TEST_HEXDUMP is not set
+# CONFIG_TEST_KSTRTOX is not set
+# CONFIG_TEST_PRINTF is not set
+# CONFIG_TEST_SCANF is not set
+# CONFIG_TEST_BITMAP is not set
+# CONFIG_TEST_UUID is not set
+# CONFIG_TEST_XARRAY is not set
+# CONFIG_TEST_MAPLE_TREE is not set
+# CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_IDA is not set
+# CONFIG_TEST_PARMAN is not set
+# CONFIG_TEST_LKM is not set
+# CONFIG_TEST_BITOPS is not set
+# CONFIG_TEST_VMALLOC is not set
+# CONFIG_TEST_BPF is not set
+# CONFIG_TEST_BLACKHOLE_DEV is not set
+# CONFIG_FIND_BIT_BENCHMARK is not set
+# CONFIG_TEST_FIRMWARE is not set
+# CONFIG_TEST_SYSCTL is not set
+# CONFIG_TEST_UDELAY is not set
+# CONFIG_TEST_STATIC_KEYS is not set
+# CONFIG_TEST_DYNAMIC_DEBUG is not set
+# CONFIG_TEST_KMOD is not set
+# CONFIG_TEST_MEMCAT_P is not set
+# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_MEMINIT is not set
+# CONFIG_TEST_HMM is not set
+# CONFIG_TEST_FREE_PAGES is not set
+# CONFIG_TEST_FPU is not set
+# CONFIG_TEST_CLOCKSOURCE_WATCHDOG is not set
+# CONFIG_TEST_OBJPOOL is not set
+CONFIG_ARCH_USE_MEMTEST=y
+CONFIG_MEMTEST=y
+# CONFIG_HYPERV_TESTING is not set
+# end of Kernel Testing and Coverage
+
+#
+# Rust hacking
+#
+# end of Rust hacking
+# end of Kernel hacking
\ No newline at end of file
diff --git a/main.sh b/main.sh
new file mode 100755
index 0000000..28ab3b5
--- /dev/null
+++ b/main.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+
+# Move the debs to output
+mkdir -p ./output
+
+. ./scripts/source.sh
+. ../scripts/patch.sh
+. ../scripts/config.sh
+. ../scripts/build.sh
+. ../scripts/output.sh
diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch
new file mode 100644
index 0000000..cb5cb43
--- /dev/null
+++ b/patches/0001-cachyos-base-all.patch
@@ -0,0 +1,48099 @@
+From 5984a6b2cf95450f2f92610cfb69378b844da2a6 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:51:09 +0800
+Subject: [PATCH 01/13] address-masking
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ arch/x86/include/asm/uaccess_64.h | 11 +++++++++++
+ fs/select.c                       |  4 +++-
+ include/linux/uaccess.h           |  7 +++++++
+ lib/strncpy_from_user.c           |  9 +++++++++
+ lib/strnlen_user.c                |  9 +++++++++
+ 5 files changed, 39 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
+index 04789f45ab2b..afce8ee5d7b7 100644
+--- a/arch/x86/include/asm/uaccess_64.h
++++ b/arch/x86/include/asm/uaccess_64.h
+@@ -53,6 +53,17 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
+  */
+ #define valid_user_address(x) ((__force long)(x) >= 0)
+ 
++/*
++ * Masking the user address is an alternative to a conditional
++ * user_access_begin that can avoid the fencing. This only works
++ * for dense accesses starting at the address.
++ */
++#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
++#define masked_user_access_begin(x) ({				\
++	__auto_type __masked_ptr = (x);				\
++	__masked_ptr = mask_user_address(__masked_ptr);		\
++	__uaccess_begin(); __masked_ptr; })
++
+ /*
+  * User pointers can have tag bits on x86-64.  This scheme tolerates
+  * arbitrary values in those bits rather then masking them off.
+diff --git a/fs/select.c b/fs/select.c
+index 9515c3fa1a03..bc185d111436 100644
+--- a/fs/select.c
++++ b/fs/select.c
+@@ -780,7 +780,9 @@ static inline int get_sigset_argpack(struct sigset_argpack *to,
+ {
+ 	// the path is hot enough for overhead of copy_from_user() to matter
+ 	if (from) {
+-		if (!user_read_access_begin(from, sizeof(*from)))
++		if (can_do_masked_user_access())
++			from = masked_user_access_begin(from);
++		else if (!user_read_access_begin(from, sizeof(*from)))
+ 			return -EFAULT;
+ 		unsafe_get_user(to->p, &from->p, Efault);
+ 		unsafe_get_user(to->size, &from->size, Efault);
+diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
+index d8e4105a2f21..39c7cf82b0c2 100644
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -33,6 +33,13 @@
+ })
+ #endif
+ 
++#ifdef masked_user_access_begin
++ #define can_do_masked_user_access() 1
++#else
++ #define can_do_masked_user_access() 0
++ #define masked_user_access_begin(src) NULL
++#endif
++
+ /*
+  * Architectures should provide two primitives (raw_copy_{to,from}_user())
+  * and get rid of their private instances of copy_{to,from}_user() and
+diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
+index 6432b8c3e431..989a12a67872 100644
+--- a/lib/strncpy_from_user.c
++++ b/lib/strncpy_from_user.c
+@@ -120,6 +120,15 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
+ 	if (unlikely(count <= 0))
+ 		return 0;
+ 
++	if (can_do_masked_user_access()) {
++		long retval;
++
++		src = masked_user_access_begin(src);
++		retval = do_strncpy_from_user(dst, src, count, count);
++		user_read_access_end();
++		return retval;
++	}
++
+ 	max_addr = TASK_SIZE_MAX;
+ 	src_addr = (unsigned long)untagged_addr(src);
+ 	if (likely(src_addr < max_addr)) {
+diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
+index feeb935a2299..6e489f9e90f1 100644
+--- a/lib/strnlen_user.c
++++ b/lib/strnlen_user.c
+@@ -96,6 +96,15 @@ long strnlen_user(const char __user *str, long count)
+ 	if (unlikely(count <= 0))
+ 		return 0;
+ 
++	if (can_do_masked_user_access()) {
++		long retval;
++
++		str = masked_user_access_begin(str);
++		retval = do_strnlen_user(str, count, count);
++		user_read_access_end();
++		return retval;
++	}
++
+ 	max_addr = TASK_SIZE_MAX;
+ 	src_addr = (unsigned long)untagged_addr(str);
+ 	if (likely(src_addr < max_addr)) {
+-- 
+2.47.0.rc0
+
+From bd40ee69b53e1cb291f96d3ad1120698aea8e96b Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:51:22 +0800
+Subject: [PATCH 02/13] amd-cache-optimizer
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ .../sysfs-bus-platform-drivers-amd_x3d_vcache |  14 ++
+ MAINTAINERS                                   |   8 +
+ drivers/platform/x86/amd/Kconfig              |  12 ++
+ drivers/platform/x86/amd/Makefile             |   2 +
+ drivers/platform/x86/amd/x3d_vcache.c         | 193 ++++++++++++++++++
+ 5 files changed, 229 insertions(+)
+ create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
+ create mode 100644 drivers/platform/x86/amd/x3d_vcache.c
+
+diff --git a/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
+new file mode 100644
+index 000000000000..1aa6ed0c10d9
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
+@@ -0,0 +1,14 @@
++What:		/sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101\:00/amd_x3d_mode
++Date:           October 2024
++KernelVersion:	6.13
++Contact:	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
++Description:	(RW) AMD 3D V-Cache optimizer allows users to switch CPU core
++		rankings dynamically.
++
++		This file switches between these two modes:
++		- "frequency" cores within the faster CCD are prioritized before
++		those in the slower CCD.
++		- "cache" cores within the larger L3 CCD are prioritized before
++		those in the smaller L3 CCD.
++
++		Format: %s.
+diff --git a/MAINTAINERS b/MAINTAINERS
+index cc40a9d9b8cd..2ba00c0cd701 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -948,6 +948,14 @@ Q:	https://patchwork.kernel.org/project/linux-rdma/list/
+ F:	drivers/infiniband/hw/efa/
+ F:	include/uapi/rdma/efa-abi.h
+ 
++AMD 3D V-CACHE PERFORMANCE OPTIMIZER DRIVER
++M:	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
++R:	Mario Limonciello <mario.limonciello@amd.com>
++L:	platform-driver-x86@vger.kernel.org
++S:	Supported
++F:	Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
++F:	drivers/platform/x86/amd/x3d_vcache.c
++
+ AMD ADDRESS TRANSLATION LIBRARY (ATL)
+ M:	Yazen Ghannam <Yazen.Ghannam@amd.com>
+ L:	linux-edac@vger.kernel.org
+diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig
+index f88682d36447..d73f691020d0 100644
+--- a/drivers/platform/x86/amd/Kconfig
++++ b/drivers/platform/x86/amd/Kconfig
+@@ -6,6 +6,18 @@
+ source "drivers/platform/x86/amd/pmf/Kconfig"
+ source "drivers/platform/x86/amd/pmc/Kconfig"
+ 
++config AMD_3D_VCACHE
++	tristate "AMD 3D V-Cache Performance Optimizer Driver"
++	depends on X86_64 && ACPI
++	help
++	  The driver provides a sysfs interface, enabling the setting of a bias
++	  that alters CPU core reordering. This bias prefers cores with higher
++	  frequencies or larger L3 caches on processors supporting AMD 3D V-Cache
++	  technology.
++
++	  If you choose to compile this driver as a module the module will be
++	  called amd_3d_vcache.
++
+ config AMD_HSMP
+ 	tristate "AMD HSMP Driver"
+ 	depends on AMD_NB && X86_64 && ACPI
+diff --git a/drivers/platform/x86/amd/Makefile b/drivers/platform/x86/amd/Makefile
+index dcec0a46f8af..16e4cce02242 100644
+--- a/drivers/platform/x86/amd/Makefile
++++ b/drivers/platform/x86/amd/Makefile
+@@ -4,6 +4,8 @@
+ # AMD x86 Platform-Specific Drivers
+ #
+ 
++obj-$(CONFIG_AMD_3D_VCACHE)     += amd_3d_vcache.o
++amd_3d_vcache-objs              := x3d_vcache.o
+ obj-$(CONFIG_AMD_PMC)		+= pmc/
+ amd_hsmp-y			:= hsmp.o
+ obj-$(CONFIG_AMD_HSMP)		+= amd_hsmp.o
+diff --git a/drivers/platform/x86/amd/x3d_vcache.c b/drivers/platform/x86/amd/x3d_vcache.c
+new file mode 100644
+index 000000000000..679613d02b9a
+--- /dev/null
++++ b/drivers/platform/x86/amd/x3d_vcache.c
+@@ -0,0 +1,193 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * AMD 3D V-Cache Performance Optimizer Driver
++ *
++ * Copyright (c) 2024, Advanced Micro Devices, Inc.
++ * All Rights Reserved.
++ *
++ * Authors: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
++ *          Perry Yuan <perry.yuan@amd.com>
++ *          Mario Limonciello <mario.limonciello@amd.com>
++ *
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/acpi.h>
++#include <linux/device.h>
++#include <linux/errno.h>
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/platform_device.h>
++
++static char *x3d_mode = "frequency";
++module_param(x3d_mode, charp, 0444);
++MODULE_PARM_DESC(x3d_mode, "Initial 3D-VCache mode; 'frequency' (default) or 'cache'");
++
++#define DSM_REVISION_ID			0
++#define DSM_GET_FUNCS_SUPPORTED		0
++#define DSM_SET_X3D_MODE		1
++
++static guid_t x3d_guid = GUID_INIT(0xdff8e55f, 0xbcfd, 0x46fb, 0xba, 0x0a,
++				   0xef, 0xd0, 0x45, 0x0f, 0x34, 0xee);
++
++enum amd_x3d_mode_type {
++	MODE_INDEX_FREQ,
++	MODE_INDEX_CACHE,
++};
++
++static const char * const amd_x3d_mode_strings[] = {
++	[MODE_INDEX_FREQ] = "frequency",
++	[MODE_INDEX_CACHE] = "cache",
++};
++
++struct amd_x3d_dev {
++	struct device *dev;
++	acpi_handle ahandle;
++	/* To protect x3d mode setting */
++	struct mutex lock;
++	enum amd_x3d_mode_type curr_mode;
++};
++
++static int amd_x3d_mode_switch(struct amd_x3d_dev *data, int new_state)
++{
++	union acpi_object *out, argv;
++
++	guard(mutex)(&data->lock);
++	argv.type = ACPI_TYPE_INTEGER;
++	argv.integer.value = new_state;
++
++	out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID, DSM_SET_X3D_MODE,
++				&argv);
++	if (!out) {
++		dev_err(data->dev, "failed to evaluate _DSM\n");
++		return -EINVAL;
++	}
++
++	data->curr_mode = new_state;
++
++	ACPI_FREE(out);
++
++	return 0;
++}
++
++static ssize_t amd_x3d_mode_store(struct device *dev, struct device_attribute *attr,
++				  const char *buf, size_t count)
++{
++	struct amd_x3d_dev *data = dev_get_drvdata(dev);
++	int ret;
++
++	ret = sysfs_match_string(amd_x3d_mode_strings, buf);
++	if (ret < 0) {
++		dev_err(dev, "no matching mode to set %s\n", buf);
++		return ret;
++	}
++
++	ret = amd_x3d_mode_switch(data, ret);
++
++	return ret ? ret : count;
++}
++
++static ssize_t amd_x3d_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
++	struct amd_x3d_dev *data = dev_get_drvdata(dev);
++
++	if (data->curr_mode > MODE_INDEX_CACHE || data->curr_mode < MODE_INDEX_FREQ)
++		return -EINVAL;
++
++	return sysfs_emit(buf, "%s\n", amd_x3d_mode_strings[data->curr_mode]);
++}
++static DEVICE_ATTR_RW(amd_x3d_mode);
++
++static struct attribute *amd_x3d_attrs[] = {
++	&dev_attr_amd_x3d_mode.attr,
++	NULL
++};
++ATTRIBUTE_GROUPS(amd_x3d);
++
++static int amd_x3d_supported(struct amd_x3d_dev *data)
++{
++	union acpi_object *out;
++
++	out = acpi_evaluate_dsm(data->ahandle, &x3d_guid, DSM_REVISION_ID,
++				DSM_GET_FUNCS_SUPPORTED, NULL);
++	if (!out) {
++		dev_err(data->dev, "failed to evaluate _DSM\n");
++		return -ENODEV;
++	}
++
++	if (out->type != ACPI_TYPE_BUFFER) {
++		dev_err(data->dev, "invalid type %d\n", out->type);
++		ACPI_FREE(out);
++		return -EINVAL;
++	}
++
++	ACPI_FREE(out);
++	return 0;
++}
++
++static const struct acpi_device_id amd_x3d_acpi_ids[] = {
++	{"AMDI0101"},
++	{ },
++};
++MODULE_DEVICE_TABLE(acpi, amd_x3d_acpi_ids);
++
++static void amd_x3d_remove(void *context)
++{
++	struct amd_x3d_dev *data = context;
++
++	mutex_destroy(&data->lock);
++}
++
++static int amd_x3d_probe(struct platform_device *pdev)
++{
++	const struct acpi_device_id *id;
++	struct amd_x3d_dev *data;
++	acpi_handle handle;
++	int ret;
++
++	handle = ACPI_HANDLE(&pdev->dev);
++	if (!handle)
++		return -ENODEV;
++
++	id = acpi_match_device(amd_x3d_acpi_ids, &pdev->dev);
++	if (!id)
++		dev_err_probe(&pdev->dev, -ENODEV, "unable to match ACPI ID and data\n");
++
++	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	data->dev = &pdev->dev;
++	data->ahandle = handle;
++	platform_set_drvdata(pdev, data);
++
++	ret = amd_x3d_supported(data);
++	if (ret)
++		dev_err_probe(&pdev->dev, ret, "not supported on this platform\n");
++
++	ret = match_string(amd_x3d_mode_strings, ARRAY_SIZE(amd_x3d_mode_strings), x3d_mode);
++	if (ret < 0)
++		return dev_err_probe(&pdev->dev, -EINVAL, "invalid mode %s\n", x3d_mode);
++
++	mutex_init(&data->lock);
++
++	ret = amd_x3d_mode_switch(data, ret);
++	if (ret < 0)
++		return ret;
++
++	return devm_add_action_or_reset(&pdev->dev, amd_x3d_remove, data);
++}
++
++static struct platform_driver amd_3d_vcache_driver = {
++	.driver = {
++		.name = "amd_x3d_vcache",
++		.dev_groups = amd_x3d_groups,
++		.acpi_match_table = amd_x3d_acpi_ids,
++	},
++	.probe = amd_x3d_probe,
++};
++module_platform_driver(amd_3d_vcache_driver);
++
++MODULE_DESCRIPTION("AMD 3D V-Cache Performance Optimizer Driver");
++MODULE_LICENSE("GPL");
+-- 
+2.47.0.rc0
+
+From f6f25468febfb80b968b50dabfb3f5656c4524e8 Mon Sep 17 00:00:00 2001
+From: Peter Jung <admin@ptr1337.dev>
+Date: Fri, 25 Oct 2024 18:38:55 +0200
+Subject: [PATCH 03/13] amd-pstate
+
+Signed-off-by: Peter Jung <admin@ptr1337.dev>
+---
+ Documentation/admin-guide/pm/amd-pstate.rst |  15 +-
+ arch/x86/include/asm/cpufeatures.h          |   3 +-
+ arch/x86/include/asm/intel-family.h         |   6 +
+ arch/x86/include/asm/processor.h            |  21 +-
+ arch/x86/include/asm/topology.h             |   9 +
+ arch/x86/kernel/acpi/cppc.c                 | 195 +++++++++++++-
+ arch/x86/kernel/cpu/amd.c                   |  16 --
+ arch/x86/kernel/cpu/debugfs.c               |   1 +
+ arch/x86/kernel/cpu/scattered.c             |   3 +-
+ arch/x86/kernel/cpu/topology_amd.c          |   3 +
+ arch/x86/kernel/cpu/topology_common.c       |  34 +++
+ arch/x86/kernel/smpboot.c                   |   5 +-
+ drivers/cpufreq/acpi-cpufreq.c              |  12 +-
+ drivers/cpufreq/amd-pstate.c                | 265 +++++++-------------
+ include/acpi/cppc_acpi.h                    |  41 ++-
+ tools/arch/x86/include/asm/cpufeatures.h    |   2 +-
+ 16 files changed, 401 insertions(+), 230 deletions(-)
+
+diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
+index d0324d44f548..210a808b74ec 100644
+--- a/Documentation/admin-guide/pm/amd-pstate.rst
++++ b/Documentation/admin-guide/pm/amd-pstate.rst
+@@ -251,7 +251,9 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
+ In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
+ table, so we need to expose it to sysfs. If boost is not active, but
+ still supported, this maximum frequency will be larger than the one in
+-``cpuinfo``.
++``cpuinfo``. On systems that support preferred core, the driver will have
++different values for some cores than others and this will reflect the values
++advertised by the platform at bootup.
+ This attribute is read-only.
+ 
+ ``amd_pstate_lowest_nonlinear_freq``
+@@ -262,6 +264,17 @@ lowest non-linear performance in `AMD CPPC Performance Capability
+ <perf_cap_>`_.)
+ This attribute is read-only.
+ 
++``amd_pstate_hw_prefcore``
++
++Whether the platform supports the preferred core feature and it has been
++enabled. This attribute is read-only.
++
++``amd_pstate_prefcore_ranking``
++
++The performance ranking of the core. This number doesn't have any unit, but
++larger numbers are preferred at the time of reading. This can change at
++runtime based on platform conditions. This attribute is read-only.
++
+ ``energy_performance_available_preferences``
+ 
+ A list of all the supported EPP preferences that could be used for
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 913fd3a7bac6..51b38bc66796 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -473,7 +473,8 @@
+ #define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* BHI_DIS_S HW control available */
+ #define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* BHI_DIS_S HW control enabled */
+ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+-#define X86_FEATURE_FAST_CPPC		(21*32 + 5) /* AMD Fast CPPC */
++#define X86_FEATURE_AMD_FAST_CPPC	(21*32 + 5) /* Fast CPPC */
++#define X86_FEATURE_AMD_HETEROGENEOUS_CORES	(21*32 + 6) /* Heterogeneous Core Topology */
+ 
+ /*
+  * BUG word(s)
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index f81a851c46dc..a44a3e026c4f 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -257,4 +257,10 @@
+ #define INTEL_FAM5_QUARK_X1000		0x09 /* Quark X1000 SoC */
+ #define INTEL_QUARK_X1000		IFM(5, 0x09) /* Quark X1000 SoC */
+ 
++/* CPU core types */
++enum intel_cpu_type {
++	INTEL_CPU_TYPE_ATOM = 0x20,
++	INTEL_CPU_TYPE_CORE = 0x40,
++};
++
+ #endif /* _ASM_X86_INTEL_FAMILY_H */
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index a75a07f4931f..e17f4d733e44 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -105,6 +105,24 @@ struct cpuinfo_topology {
+ 	// Cache level topology IDs
+ 	u32			llc_id;
+ 	u32			l2c_id;
++
++	// Hardware defined CPU-type
++	union {
++		u32		cpu_type;
++		struct {
++			// CPUID.1A.EAX[23-0]
++			u32	intel_native_model_id	:24;
++			// CPUID.1A.EAX[31-24]
++			u32	intel_type		:8;
++		};
++		struct {
++			// CPUID 0x80000026.EBX
++			u32	amd_num_processors	:16,
++				amd_power_eff_ranking	:8,
++				amd_native_model_id	:4,
++				amd_type		:4;
++		};
++	};
+ };
+ 
+ struct cpuinfo_x86 {
+@@ -691,8 +709,6 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
+ }
+ 
+ #ifdef CONFIG_CPU_SUP_AMD
+-extern u32 amd_get_highest_perf(void);
+-
+ /*
+  * Issue a DIV 0/1 insn to clear any division data from previous DIV
+  * operations.
+@@ -705,7 +721,6 @@ static __always_inline void amd_clear_divider(void)
+ 
+ extern void amd_check_microcode(void);
+ #else
+-static inline u32 amd_get_highest_perf(void)		{ return 0; }
+ static inline void amd_clear_divider(void)		{ }
+ static inline void amd_check_microcode(void)		{ }
+ #endif
+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
+index abe3a8f22cbd..94d9832a5bc8 100644
+--- a/arch/x86/include/asm/topology.h
++++ b/arch/x86/include/asm/topology.h
+@@ -114,6 +114,12 @@ enum x86_topology_domains {
+ 	TOPO_MAX_DOMAIN,
+ };
+ 
++enum x86_topology_cpu_type {
++	TOPO_CPU_TYPE_PERFORMANCE,
++	TOPO_CPU_TYPE_EFFICIENCY,
++	TOPO_CPU_TYPE_UNKNOWN,
++};
++
+ struct x86_topology_system {
+ 	unsigned int	dom_shifts[TOPO_MAX_DOMAIN];
+ 	unsigned int	dom_size[TOPO_MAX_DOMAIN];
+@@ -149,6 +155,9 @@ extern unsigned int __max_threads_per_core;
+ extern unsigned int __num_threads_per_package;
+ extern unsigned int __num_cores_per_package;
+ 
++const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
++enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
++
+ static inline unsigned int topology_max_packages(void)
+ {
+ 	return __max_logical_packages;
+diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
+index ff8f25faca3d..59edf64ad9ed 100644
+--- a/arch/x86/kernel/acpi/cppc.c
++++ b/arch/x86/kernel/acpi/cppc.c
+@@ -9,6 +9,17 @@
+ #include <asm/processor.h>
+ #include <asm/topology.h>
+ 
++#define CPPC_HIGHEST_PERF_PERFORMANCE	196
++#define CPPC_HIGHEST_PERF_PREFCORE	166
++
++enum amd_pref_core {
++	AMD_PREF_CORE_UNKNOWN = 0,
++	AMD_PREF_CORE_SUPPORTED,
++	AMD_PREF_CORE_UNSUPPORTED,
++};
++static enum amd_pref_core amd_pref_core_detected;
++static u64 boost_numerator;
++
+ /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+ 
+ bool cpc_supported_by_cpu(void)
+@@ -69,31 +80,30 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+ static void amd_set_max_freq_ratio(void)
+ {
+ 	struct cppc_perf_caps perf_caps;
+-	u64 highest_perf, nominal_perf;
++	u64 numerator, nominal_perf;
+ 	u64 perf_ratio;
+ 	int rc;
+ 
+ 	rc = cppc_get_perf_caps(0, &perf_caps);
+ 	if (rc) {
+-		pr_debug("Could not retrieve perf counters (%d)\n", rc);
++		pr_warn("Could not retrieve perf counters (%d)\n", rc);
+ 		return;
+ 	}
+ 
+-	highest_perf = amd_get_highest_perf();
++	rc = amd_get_boost_ratio_numerator(0, &numerator);
++	if (rc) {
++		pr_warn("Could not retrieve highest performance (%d)\n", rc);
++		return;
++	}
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+-	if (!highest_perf || !nominal_perf) {
+-		pr_debug("Could not retrieve highest or nominal performance\n");
++	if (!nominal_perf) {
++		pr_warn("Could not retrieve nominal performance\n");
+ 		return;
+ 	}
+ 
+-	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+ 	/* midpoint between max_boost and max_P */
+-	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+-	if (!perf_ratio) {
+-		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+-		return;
+-	}
++	perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1;
+ 
+ 	freq_invariance_set_perf_ratio(perf_ratio, false);
+ }
+@@ -116,3 +126,166 @@ void init_freq_invariance_cppc(void)
+ 	init_done = true;
+ 	mutex_unlock(&freq_invariance_lock);
+ }
++
++/*
++ * Get the highest performance register value.
++ * @cpu: CPU from which to get highest performance.
++ * @highest_perf: Return address for highest performance value.
++ *
++ * Return: 0 for success, negative error code otherwise.
++ */
++int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
++{
++	u64 val;
++	int ret;
++
++	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
++		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
++		if (ret)
++			goto out;
++
++		val = AMD_CPPC_HIGHEST_PERF(val);
++	} else {
++		ret = cppc_get_highest_perf(cpu, &val);
++		if (ret)
++			goto out;
++	}
++
++	WRITE_ONCE(*highest_perf, (u32)val);
++out:
++	return ret;
++}
++EXPORT_SYMBOL_GPL(amd_get_highest_perf);
++
++/**
++ * amd_detect_prefcore: Detect if CPUs in the system support preferred cores
++ * @detected: Output variable for the result of the detection.
++ *
++ * Determine whether CPUs in the system support preferred cores. On systems
++ * that support preferred cores, different highest perf values will be found
++ * on different cores. On other systems, the highest perf value will be the
++ * same on all cores.
++ *
++ * The result of the detection will be stored in the 'detected' parameter.
++ *
++ * Return: 0 for success, negative error code otherwise
++ */
++int amd_detect_prefcore(bool *detected)
++{
++	int cpu, count = 0;
++	u64 highest_perf[2] = {0};
++
++	if (WARN_ON(!detected))
++		return -EINVAL;
++
++	switch (amd_pref_core_detected) {
++	case AMD_PREF_CORE_SUPPORTED:
++		*detected = true;
++		return 0;
++	case AMD_PREF_CORE_UNSUPPORTED:
++		*detected = false;
++		return 0;
++	default:
++		break;
++	}
++
++	for_each_present_cpu(cpu) {
++		u32 tmp;
++		int ret;
++
++		ret = amd_get_highest_perf(cpu, &tmp);
++		if (ret)
++			return ret;
++
++		if (!count || (count == 1 && tmp != highest_perf[0]))
++			highest_perf[count++] = tmp;
++
++		if (count == 2)
++			break;
++	}
++
++	*detected = (count == 2);
++	boost_numerator = highest_perf[0];
++
++	amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED :
++					     AMD_PREF_CORE_UNSUPPORTED;
++
++	pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n",
++		 *detected ? "" : "un", highest_perf[0]);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(amd_detect_prefcore);
++
++/**
++ * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
++ * @cpu: CPU to get numerator for.
++ * @numerator: Output variable for numerator.
++ *
++ * Determine the numerator to use for calculating the boost ratio on
++ * a CPU. On systems that support preferred cores, this will be a hardcoded
++ * value. On other systems this will the highest performance register value.
++ *
++ * If booting the system with amd-pstate enabled but preferred cores disabled then
++ * the correct boost numerator will be returned to match hardware capabilities
++ * even if the preferred cores scheduling hints are not enabled.
++ *
++ * Return: 0 for success, negative error code otherwise.
++ */
++int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
++{
++	enum x86_topology_cpu_type core_type = get_topology_cpu_type(&cpu_data(cpu));
++	bool prefcore;
++	int ret;
++	u32 tmp;
++
++	ret = amd_detect_prefcore(&prefcore);
++	if (ret)
++		return ret;
++
++	/* without preferred cores, return the highest perf register value */
++	if (!prefcore) {
++		*numerator = boost_numerator;
++		return 0;
++	}
++
++	/*
++	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
++	 * the highest performance level is set to 196.
++	 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
++	 */
++	if (cpu_feature_enabled(X86_FEATURE_ZEN4)) {
++		switch (boot_cpu_data.x86_model) {
++		case 0x70 ... 0x7f:
++			*numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
++			return 0;
++		default:
++			break;
++		}
++	}
++
++	/* detect if running on heterogeneous design */
++	if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) {
++		switch (core_type) {
++		case TOPO_CPU_TYPE_UNKNOWN:
++			pr_warn("Undefined core type found for cpu %d\n", cpu);
++			break;
++		case TOPO_CPU_TYPE_PERFORMANCE:
++			/* use the max scale for performance cores */
++			*numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
++			return 0;
++		case TOPO_CPU_TYPE_EFFICIENCY:
++			/* use the highest perf value for efficiency cores */
++			ret = amd_get_highest_perf(cpu, &tmp);
++			if (ret)
++				return ret;
++			*numerator = tmp;
++			return 0;
++		}
++	}
++
++	*numerator = CPPC_HIGHEST_PERF_PREFCORE;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator);
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index f01b72052f79..fab5caec0b72 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr)
+ }
+ EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);
+ 
+-u32 amd_get_highest_perf(void)
+-{
+-	struct cpuinfo_x86 *c = &boot_cpu_data;
+-
+-	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+-			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+-		return 166;
+-
+-	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+-			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+-		return 166;
+-
+-	return 255;
+-}
+-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+-
+ static void zenbleed_check_cpu(void *unused)
+ {
+ 	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
+index 3baf3e435834..10719aba6276 100644
+--- a/arch/x86/kernel/cpu/debugfs.c
++++ b/arch/x86/kernel/cpu/debugfs.c
+@@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
+ 	seq_printf(m, "die_id:              %u\n", c->topo.die_id);
+ 	seq_printf(m, "cu_id:               %u\n", c->topo.cu_id);
+ 	seq_printf(m, "core_id:             %u\n", c->topo.core_id);
++	seq_printf(m, "cpu_type:            %s\n", get_topology_cpu_type_name(c));
+ 	seq_printf(m, "logical_pkg_id:      %u\n", c->topo.logical_pkg_id);
+ 	seq_printf(m, "logical_die_id:      %u\n", c->topo.logical_die_id);
+ 	seq_printf(m, "llc_id:              %u\n", c->topo.llc_id);
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index c84c30188fdf..307a91741534 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -45,13 +45,14 @@ static const struct cpuid_bit cpuid_bits[] = {
+ 	{ X86_FEATURE_HW_PSTATE,	CPUID_EDX,  7, 0x80000007, 0 },
+ 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
+ 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
+-	{ X86_FEATURE_FAST_CPPC, 	CPUID_EDX, 15, 0x80000007, 0 },
++	{ X86_FEATURE_AMD_FAST_CPPC,	CPUID_EDX, 15, 0x80000007, 0 },
+ 	{ X86_FEATURE_MBA,		CPUID_EBX,  6, 0x80000008, 0 },
+ 	{ X86_FEATURE_SMBA,		CPUID_EBX,  2, 0x80000020, 0 },
+ 	{ X86_FEATURE_BMEC,		CPUID_EBX,  3, 0x80000020, 0 },
+ 	{ X86_FEATURE_PERFMON_V2,	CPUID_EAX,  0, 0x80000022, 0 },
+ 	{ X86_FEATURE_AMD_LBR_V2,	CPUID_EAX,  1, 0x80000022, 0 },
+ 	{ X86_FEATURE_AMD_LBR_PMC_FREEZE,	CPUID_EAX,  2, 0x80000022, 0 },
++	{ X86_FEATURE_AMD_HETEROGENEOUS_CORES,	CPUID_EAX,  30, 0x80000026, 0 },
+ 	{ 0, 0, 0, 0, 0 }
+ };
+ 
+diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
+index 7d476fa697ca..03b3c9c3a45e 100644
+--- a/arch/x86/kernel/cpu/topology_amd.c
++++ b/arch/x86/kernel/cpu/topology_amd.c
+@@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan)
+ 	if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
+ 		has_topoext = cpu_parse_topology_ext(tscan);
+ 
++	if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
++		tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
++
+ 	if (!has_topoext && !parse_8000_0008(tscan))
+ 		return;
+ 
+diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
+index 9a6069e7133c..8277c64f88db 100644
+--- a/arch/x86/kernel/cpu/topology_common.c
++++ b/arch/x86/kernel/cpu/topology_common.c
+@@ -3,6 +3,7 @@
+ 
+ #include <xen/xen.h>
+ 
++#include <asm/intel-family.h>
+ #include <asm/apic.h>
+ #include <asm/processor.h>
+ #include <asm/smp.h>
+@@ -27,6 +28,36 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ 	}
+ }
+ 
++enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c)
++{
++	if (c->x86_vendor == X86_VENDOR_INTEL) {
++		switch (c->topo.intel_type) {
++		case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY;
++		case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE;
++		}
++	}
++	if (c->x86_vendor == X86_VENDOR_AMD) {
++		switch (c->topo.amd_type) {
++		case 0:	return TOPO_CPU_TYPE_PERFORMANCE;
++		case 1:	return TOPO_CPU_TYPE_EFFICIENCY;
++		}
++	}
++
++	return TOPO_CPU_TYPE_UNKNOWN;
++}
++
++const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c)
++{
++	switch (get_topology_cpu_type(c)) {
++	case TOPO_CPU_TYPE_PERFORMANCE:
++		return "performance";
++	case TOPO_CPU_TYPE_EFFICIENCY:
++		return "efficiency";
++	default:
++		return "unknown";
++	}
++}
++
+ static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c)
+ {
+ 	struct {
+@@ -87,6 +118,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
+ 		.cu_id			= 0xff,
+ 		.llc_id			= BAD_APICID,
+ 		.l2c_id			= BAD_APICID,
++		.cpu_type		= TOPO_CPU_TYPE_UNKNOWN,
+ 	};
+ 	struct cpuinfo_x86 *c = tscan->c;
+ 	struct {
+@@ -132,6 +164,8 @@ static void parse_topology(struct topo_scan *tscan, bool early)
+ 	case X86_VENDOR_INTEL:
+ 		if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan))
+ 			parse_legacy(tscan);
++		if (c->cpuid_level >= 0x1a)
++			c->topo.cpu_type = cpuid_eax(0x1a);
+ 		break;
+ 	case X86_VENDOR_HYGON:
+ 		if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 390e4fe7433e..9ee84f58f3b4 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -497,8 +497,9 @@ static int x86_cluster_flags(void)
+ 
+ static int x86_die_flags(void)
+ {
+-	if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+-	       return x86_sched_itmt_flags();
++	if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) ||
++	    cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
++		return x86_sched_itmt_flags();
+ 
+ 	return 0;
+ }
+diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
+index a8ca625a98b8..0f04feb6cafa 100644
+--- a/drivers/cpufreq/acpi-cpufreq.c
++++ b/drivers/cpufreq/acpi-cpufreq.c
+@@ -642,10 +642,16 @@ static u64 get_max_boost_ratio(unsigned int cpu)
+ 		return 0;
+ 	}
+ 
+-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+-		highest_perf = amd_get_highest_perf();
+-	else
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
++		ret = amd_get_boost_ratio_numerator(cpu, &highest_perf);
++		if (ret) {
++			pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n",
++				 cpu, ret);
++			return 0;
++		}
++	} else {
+ 		highest_perf = perf_caps.highest_perf;
++	}
+ 
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
+index 589fde37ccd7..fb0a72ccff79 100644
+--- a/drivers/cpufreq/amd-pstate.c
++++ b/drivers/cpufreq/amd-pstate.c
+@@ -52,8 +52,6 @@
+ #define AMD_PSTATE_TRANSITION_LATENCY	20000
+ #define AMD_PSTATE_TRANSITION_DELAY	1000
+ #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
+-#define CPPC_HIGHEST_PERF_PERFORMANCE	196
+-#define CPPC_HIGHEST_PERF_DEFAULT	166
+ 
+ #define AMD_CPPC_EPP_PERFORMANCE		0x00
+ #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
+@@ -239,7 +237,7 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+ 	return index;
+ }
+ 
+-static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
++static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+ 			       u32 des_perf, u32 max_perf, bool fast_switch)
+ {
+ 	if (fast_switch)
+@@ -249,7 +247,7 @@ static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+ 			      READ_ONCE(cpudata->cppc_req_cached));
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
++DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
+ 
+ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
+ 					  u32 min_perf, u32 des_perf,
+@@ -312,7 +310,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
+ 	return ret;
+ }
+ 
+-static inline int pstate_enable(bool enable)
++static inline int msr_enable(bool enable)
+ {
+ 	int ret, cpu;
+ 	unsigned long logical_proc_id_mask = 0;
+@@ -338,7 +336,7 @@ static inline int pstate_enable(bool enable)
+ 	return 0;
+ }
+ 
+-static int cppc_enable(bool enable)
++static int shmem_enable(bool enable)
+ {
+ 	int cpu, ret = 0;
+ 	struct cppc_perf_ctrls perf_ctrls;
+@@ -365,50 +363,24 @@ static int cppc_enable(bool enable)
+ 	return ret;
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
++DEFINE_STATIC_CALL(amd_pstate_enable, msr_enable);
+ 
+ static inline int amd_pstate_enable(bool enable)
+ {
+ 	return static_call(amd_pstate_enable)(enable);
+ }
+ 
+-static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
+-{
+-	struct cpuinfo_x86 *c = &cpu_data(0);
+-
+-	/*
+-	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
+-	 * the highest performance level is set to 196.
+-	 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
+-	 */
+-	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
+-		return CPPC_HIGHEST_PERF_PERFORMANCE;
+-
+-	return CPPC_HIGHEST_PERF_DEFAULT;
+-}
+-
+-static int pstate_init_perf(struct amd_cpudata *cpudata)
++static int msr_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	u64 cap1;
+-	u32 highest_perf;
+ 
+ 	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
+ 				     &cap1);
+ 	if (ret)
+ 		return ret;
+ 
+-	/* For platforms that do not support the preferred core feature, the
+-	 * highest_pef may be configured with 166 or 255, to avoid max frequency
+-	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+-	 * the default max perf.
+-	 */
+-	if (cpudata->hw_prefcore)
+-		highest_perf = amd_pstate_highest_perf_set(cpudata);
+-	else
+-		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
+-
+-	WRITE_ONCE(cpudata->highest_perf, highest_perf);
+-	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
++	WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
++	WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
+ 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
+ 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+@@ -417,22 +389,16 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
+ 	return 0;
+ }
+ 
+-static int cppc_init_perf(struct amd_cpudata *cpudata)
++static int shmem_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	struct cppc_perf_caps cppc_perf;
+-	u32 highest_perf;
+ 
+ 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+ 	if (ret)
+ 		return ret;
+ 
+-	if (cpudata->hw_prefcore)
+-		highest_perf = amd_pstate_highest_perf_set(cpudata);
+-	else
+-		highest_perf = cppc_perf.highest_perf;
+-
+-	WRITE_ONCE(cpudata->highest_perf, highest_perf);
+-	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
++	WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
++	WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf);
+ 	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
+ 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
+ 		   cppc_perf.lowest_nonlinear_perf);
+@@ -458,14 +424,14 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
+ 	return ret;
+ }
+ 
+-DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
++DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf);
+ 
+ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	return static_call(amd_pstate_init_perf)(cpudata);
+ }
+ 
+-static void cppc_update_perf(struct amd_cpudata *cpudata,
++static void shmem_update_perf(struct amd_cpudata *cpudata,
+ 			     u32 min_perf, u32 des_perf,
+ 			     u32 max_perf, bool fast_switch)
+ {
+@@ -565,20 +531,44 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
+ 	cpufreq_cpu_put(policy);
+ }
+ 
+-static int amd_pstate_verify(struct cpufreq_policy_data *policy)
++static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
+ {
+-	cpufreq_verify_within_cpu_limits(policy);
++	/*
++	 * Initialize lower frequency limit (i.e.policy->min) with
++	 * lowest_nonlinear_frequency which is the most energy efficient
++	 * frequency. Override the initial value set by cpufreq core and
++	 * amd-pstate qos_requests.
++	 */
++	if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
++		struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
++		struct amd_cpudata *cpudata;
++
++		if (!policy)
++			return -EINVAL;
++
++		cpudata = policy->driver_data;
++		policy_data->min = cpudata->lowest_nonlinear_freq;
++		cpufreq_cpu_put(policy);
++	}
++
++	cpufreq_verify_within_cpu_limits(policy_data);
++	pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
+ 
+ 	return 0;
+ }
+ 
+ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+ {
+-	u32 max_limit_perf, min_limit_perf, lowest_perf;
++	u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf;
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+-	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+-	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
++	if (cpudata->boost_supported && !policy->boost_enabled)
++		max_perf = READ_ONCE(cpudata->nominal_perf);
++	else
++		max_perf = READ_ONCE(cpudata->highest_perf);
++
++	max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
++	min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
+ 
+ 	lowest_perf = READ_ONCE(cpudata->lowest_perf);
+ 	if (min_limit_perf < lowest_perf)
+@@ -659,12 +649,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ 	unsigned long max_perf, min_perf, des_perf,
+ 		      cap_perf, lowest_nonlinear_perf;
+ 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+-	struct amd_cpudata *cpudata;
+-
+-	if (!policy)
+-		return;
+-
+-	cpudata = policy->driver_data;
++	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+ 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+ 		amd_pstate_update_min_max_limit(policy);
+@@ -698,34 +683,12 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+-	struct cppc_perf_ctrls perf_ctrls;
+-	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
++	u32 nominal_freq, max_freq;
+ 	int ret = 0;
+ 
+-	highest_perf = READ_ONCE(cpudata->highest_perf);
+-	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ 	nominal_freq = READ_ONCE(cpudata->nominal_freq);
+ 	max_freq = READ_ONCE(cpudata->max_freq);
+ 
+-	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+-		u64 value = READ_ONCE(cpudata->cppc_req_cached);
+-
+-		value &= ~GENMASK_ULL(7, 0);
+-		value |= on ? highest_perf : nominal_perf;
+-		WRITE_ONCE(cpudata->cppc_req_cached, value);
+-
+-		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+-	} else {
+-		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
+-		ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
+-		if (ret) {
+-			cpufreq_cpu_release(policy);
+-			pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
+-				cpudata->cpu, ret);
+-			return ret;
+-		}
+-	}
+-
+ 	if (on)
+ 		policy->cpuinfo.max_freq = max_freq;
+ 	else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
+@@ -811,66 +774,22 @@ static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
+ }
+ static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
+ 
+-/*
+- * Get the highest performance register value.
+- * @cpu: CPU from which to get highest performance.
+- * @highest_perf: Return address.
+- *
+- * Return: 0 for success, -EIO otherwise.
+- */
+-static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
+-{
+-	int ret;
+-
+-	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+-		u64 cap1;
+-
+-		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+-		if (ret)
+-			return ret;
+-		WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+-	} else {
+-		u64 cppc_highest_perf;
+-
+-		ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
+-		if (ret)
+-			return ret;
+-		WRITE_ONCE(*highest_perf, cppc_highest_perf);
+-	}
+-
+-	return (ret);
+-}
+-
+ #define CPPC_MAX_PERF	U8_MAX
+ 
+ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+ {
+-	int ret, prio;
+-	u32 highest_perf;
+-
+-	ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
+-	if (ret)
++	/* user disabled or not detected */
++	if (!amd_pstate_prefcore)
+ 		return;
+ 
+ 	cpudata->hw_prefcore = true;
+-	/* check if CPPC preferred core feature is enabled*/
+-	if (highest_perf < CPPC_MAX_PERF)
+-		prio = (int)highest_perf;
+-	else {
+-		pr_debug("AMD CPPC preferred core is unsupported!\n");
+-		cpudata->hw_prefcore = false;
+-		return;
+-	}
+-
+-	if (!amd_pstate_prefcore)
+-		return;
+ 
+ 	/*
+ 	 * The priorities can be set regardless of whether or not
+ 	 * sched_set_itmt_support(true) has been called and it is valid to
+ 	 * update them at any time after it has been called.
+ 	 */
+-	sched_set_itmt_core_prio(prio, cpudata->cpu);
++	sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu);
+ 
+ 	schedule_work(&sched_prefcore_work);
+ }
+@@ -878,27 +797,22 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+ static void amd_pstate_update_limits(unsigned int cpu)
+ {
+ 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+-	struct amd_cpudata *cpudata;
++	struct amd_cpudata *cpudata = policy->driver_data;
+ 	u32 prev_high = 0, cur_high = 0;
+ 	int ret;
+ 	bool highest_perf_changed = false;
+ 
+-	if (!policy)
++	if (!amd_pstate_prefcore)
+ 		return;
+ 
+-	cpudata = policy->driver_data;
+-
+ 	mutex_lock(&amd_pstate_driver_lock);
+-	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+-		goto free_cpufreq_put;
+-
+-	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
++	ret = amd_get_highest_perf(cpu, &cur_high);
+ 	if (ret)
+ 		goto free_cpufreq_put;
+ 
+ 	prev_high = READ_ONCE(cpudata->prefcore_ranking);
+-	if (prev_high != cur_high) {
+-		highest_perf_changed = true;
++	highest_perf_changed = (prev_high != cur_high);
++	if (highest_perf_changed) {
+ 		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
+ 
+ 		if (cur_high < CPPC_MAX_PERF)
+@@ -924,7 +838,7 @@ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
+ 
+ 	transition_delay_ns = cppc_get_transition_latency(cpu);
+ 	if (transition_delay_ns == CPUFREQ_ETERNAL) {
+-		if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
++		if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC))
+ 			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
+ 		else
+ 			return AMD_PSTATE_TRANSITION_DELAY;
+@@ -962,8 +876,8 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu)
+ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+ {
+ 	int ret;
+-	u32 min_freq;
+-	u32 highest_perf, max_freq;
++	u32 min_freq, max_freq;
++	u64 numerator;
+ 	u32 nominal_perf, nominal_freq;
+ 	u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
+ 	u32 boost_ratio, lowest_nonlinear_ratio;
+@@ -985,8 +899,10 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+ 
+ 	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ 
+-	highest_perf = READ_ONCE(cpudata->highest_perf);
+-	boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
++	ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
++	if (ret)
++		return ret;
++	boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf);
+ 	max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+ 
+ 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+@@ -1041,12 +957,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ 
+ 	cpudata->cpu = policy->cpu;
+ 
+-	amd_pstate_init_prefcore(cpudata);
+-
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+ 
++	amd_pstate_init_prefcore(cpudata);
++
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+@@ -1076,7 +992,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ 		policy->fast_switch_possible = true;
+ 
+ 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
+-				   FREQ_QOS_MIN, policy->cpuinfo.min_freq);
++				   FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
+ 	if (ret < 0) {
+ 		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
+ 		goto free_cpudata1;
+@@ -1281,11 +1197,21 @@ static int amd_pstate_register_driver(int mode)
+ 		return -EINVAL;
+ 
+ 	cppc_state = mode;
++
++	ret = amd_pstate_enable(true);
++	if (ret) {
++		pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
++		       ret);
++		amd_pstate_driver_cleanup();
++		return ret;
++	}
++
+ 	ret = cpufreq_register_driver(current_pstate_driver);
+ 	if (ret) {
+ 		amd_pstate_driver_cleanup();
+ 		return ret;
+ 	}
++
+ 	return 0;
+ }
+ 
+@@ -1496,12 +1422,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ 	cpudata->cpu = policy->cpu;
+ 	cpudata->epp_policy = 0;
+ 
+-	amd_pstate_init_prefcore(cpudata);
+-
+ 	ret = amd_pstate_init_perf(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+ 
++	amd_pstate_init_prefcore(cpudata);
++
+ 	ret = amd_pstate_init_freq(cpudata);
+ 	if (ret)
+ 		goto free_cpudata1;
+@@ -1571,23 +1497,13 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+-	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
++	u32 max_perf, min_perf;
+ 	u64 value;
+ 	s16 epp;
+ 
+ 	max_perf = READ_ONCE(cpudata->highest_perf);
+ 	min_perf = READ_ONCE(cpudata->lowest_perf);
+-	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+-	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+-
+-	if (min_limit_perf < min_perf)
+-		min_limit_perf = min_perf;
+-
+-	if (max_limit_perf < min_limit_perf)
+-		max_limit_perf = min_limit_perf;
+-
+-	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+-	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
++	amd_pstate_update_min_max_limit(policy);
+ 
+ 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
+ 			cpudata->max_limit_perf);
+@@ -1624,12 +1540,6 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+ 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ 		epp = 0;
+ 
+-	/* Set initial EPP value */
+-	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+-		value &= ~GENMASK_ULL(31, 24);
+-		value |= (u64)epp << 24;
+-	}
+-
+ 	WRITE_ONCE(cpudata->cppc_req_cached, value);
+ 	amd_pstate_set_epp(cpudata, epp);
+ }
+@@ -1737,13 +1647,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
+ 	return 0;
+ }
+ 
+-static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+-{
+-	cpufreq_verify_within_cpu_limits(policy);
+-	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+-	return 0;
+-}
+-
+ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
+ {
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+@@ -1799,7 +1702,7 @@ static struct cpufreq_driver amd_pstate_driver = {
+ 
+ static struct cpufreq_driver amd_pstate_epp_driver = {
+ 	.flags		= CPUFREQ_CONST_LOOPS,
+-	.verify		= amd_pstate_epp_verify_policy,
++	.verify		= amd_pstate_verify,
+ 	.setpolicy	= amd_pstate_epp_set_policy,
+ 	.init		= amd_pstate_epp_cpu_init,
+ 	.exit		= amd_pstate_epp_cpu_exit,
+@@ -1832,7 +1735,7 @@ static int __init amd_pstate_set_driver(int mode_idx)
+ 	return -EINVAL;
+ }
+ 
+-/**
++/*
+  * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
+  * show the debug message that helps to check if the CPU has CPPC support for loading issue.
+  */
+@@ -1955,9 +1858,15 @@ static int __init amd_pstate_init(void)
+ 			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
+ 	} else {
+ 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
+-		static_call_update(amd_pstate_enable, cppc_enable);
+-		static_call_update(amd_pstate_init_perf, cppc_init_perf);
+-		static_call_update(amd_pstate_update_perf, cppc_update_perf);
++		static_call_update(amd_pstate_enable, shmem_enable);
++		static_call_update(amd_pstate_init_perf, shmem_init_perf);
++		static_call_update(amd_pstate_update_perf, shmem_update_perf);
++	}
++
++	if (amd_pstate_prefcore) {
++		ret = amd_detect_prefcore(&amd_pstate_prefcore);
++		if (ret)
++			return ret;
+ 	}
+ 
+ 	/* enable amd pstate feature */
+diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
+index e1720d930666..76e44e102780 100644
+--- a/include/acpi/cppc_acpi.h
++++ b/include/acpi/cppc_acpi.h
+@@ -161,34 +161,37 @@ extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf);
+ extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
+ extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
+ extern int cppc_set_auto_sel(int cpu, bool enable);
++extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
++extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
++extern int amd_detect_prefcore(bool *detected);
+ #else /* !CONFIG_ACPI_CPPC_LIB */
+ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_enable(int cpu, bool enable)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline bool cppc_perf_ctrs_in_pcc(void)
+ {
+@@ -212,27 +215,39 @@ static inline bool cpc_ffh_supported(void)
+ }
+ static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_set_auto_sel(int cpu, bool enable)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
+ }
+ static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
+ {
+-	return -ENOTSUPP;
++	return -EOPNOTSUPP;
++}
++static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
++{
++	return -ENODEV;
++}
++static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
++{
++	return -EOPNOTSUPP;
++}
++static inline int amd_detect_prefcore(bool *detected)
++{
++	return -ENODEV;
+ }
+ #endif /* !CONFIG_ACPI_CPPC_LIB */
+ 
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
+index dd4682857c12..23698d0f4bb4 100644
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -472,7 +472,7 @@
+ #define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* BHI_DIS_S HW control available */
+ #define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* BHI_DIS_S HW control enabled */
+ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+-#define X86_FEATURE_FAST_CPPC		(21*32 + 5) /* AMD Fast CPPC */
++#define X86_FEATURE_AMD_FAST_CPPC		(21*32 + 5) /* AMD Fast CPPC */
+ 
+ /*
+  * BUG word(s)
+-- 
+2.47.0.rc0
+
+From f3e882f80066e4cfda6767e245e95ca280db8bc0 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:52:00 +0800
+Subject: [PATCH 04/13] bbr3
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ include/linux/tcp.h                |    4 +-
+ include/net/inet_connection_sock.h |    4 +-
+ include/net/tcp.h                  |   72 +-
+ include/uapi/linux/inet_diag.h     |   23 +
+ include/uapi/linux/rtnetlink.h     |    4 +-
+ include/uapi/linux/tcp.h           |    1 +
+ net/ipv4/Kconfig                   |   21 +-
+ net/ipv4/bpf_tcp_ca.c              |    9 +-
+ net/ipv4/tcp.c                     |    3 +
+ net/ipv4/tcp_bbr.c                 | 2230 +++++++++++++++++++++-------
+ net/ipv4/tcp_cong.c                |    1 +
+ net/ipv4/tcp_input.c               |   40 +-
+ net/ipv4/tcp_minisocks.c           |    2 +
+ net/ipv4/tcp_output.c              |   48 +-
+ net/ipv4/tcp_rate.c                |   30 +-
+ net/ipv4/tcp_timer.c               |    1 +
+ 16 files changed, 1940 insertions(+), 553 deletions(-)
+
+diff --git a/include/linux/tcp.h b/include/linux/tcp.h
+index 6a5e08b937b3..27aab715490e 100644
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -369,7 +369,9 @@ struct tcp_sock {
+ 	u8	compressed_ack;
+ 	u8	dup_ack_counter:2,
+ 		tlp_retrans:1,	/* TLP is a retransmission */
+-		unused:5;
++		fast_ack_mode:2, /* which fast ack mode ? */
++		tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */
++		unused:2;
+ 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */
+ 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
+ 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index c0deaafebfdc..d53f042d936e 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -137,8 +137,8 @@ struct inet_connection_sock {
+ 	u32			  icsk_probes_tstamp;
+ 	u32			  icsk_user_timeout;
+ 
+-	u64			  icsk_ca_priv[104 / sizeof(u64)];
+-#define ICSK_CA_PRIV_SIZE	  sizeof_field(struct inet_connection_sock, icsk_ca_priv)
++#define ICSK_CA_PRIV_SIZE      (144)
++	u64			  icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)];
+ };
+ 
+ #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 196c148fce8a..f37256b8abfd 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -375,6 +375,8 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
+ #define	TCP_ECN_QUEUE_CWR	2
+ #define	TCP_ECN_DEMAND_CWR	4
+ #define	TCP_ECN_SEEN		8
++#define	TCP_ECN_LOW		16
++#define	TCP_ECN_ECT_PERMANENT	32
+ 
+ enum tcp_tw_status {
+ 	TCP_TW_SUCCESS = 0,
+@@ -779,6 +781,15 @@ static inline void tcp_fast_path_check(struct sock *sk)
+ 
+ u32 tcp_delack_max(const struct sock *sk);
+ 
++static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
++					    const struct dst_entry *dst)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
++		tp->ecn_flags |= TCP_ECN_LOW;
++}
++
+ /* Compute the actual rto_min value */
+ static inline u32 tcp_rto_min(const struct sock *sk)
+ {
+@@ -884,6 +895,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+ 	return max_t(s64, t1 - t0, 0);
+ }
+ 
++static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
++{
++	return max_t(s32, t1 - t0, 0);
++}
++
+ /* provide the departure time in us unit */
+ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
+ {
+@@ -973,9 +989,14 @@ struct tcp_skb_cb {
+ 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+ 			__u32 delivered;
+ 			/* start of send pipeline phase */
+-			u64 first_tx_mstamp;
++			u32 first_tx_mstamp;
+ 			/* when we reached the "delivered" count */
+-			u64 delivered_mstamp;
++			u32 delivered_mstamp;
++#define TCPCB_IN_FLIGHT_BITS 20
++#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
++			u32 in_flight:20,   /* packets in flight at transmit */
++			    unused2:12;
++			u32 lost;	/* packets lost so far upon tx of skb */
+ 		} tx;   /* only used for outgoing skbs */
+ 		union {
+ 			struct inet_skb_parm	h4;
+@@ -1087,6 +1108,7 @@ enum tcp_ca_event {
+ 	CA_EVENT_LOSS,		/* loss timeout */
+ 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
+ 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
++	CA_EVENT_TLP_RECOVERY,	/* a lost segment was repaired by TLP probe */
+ };
+ 
+ /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
+@@ -1109,7 +1131,11 @@ enum tcp_ca_ack_event_flags {
+ #define TCP_CONG_NON_RESTRICTED 0x1
+ /* Requires ECN/ECT set on all packets */
+ #define TCP_CONG_NEEDS_ECN	0x2
+-#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
++/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
++#define TCP_CONG_WANTS_CE_EVENTS	0x4
++#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | \
++			 TCP_CONG_NEEDS_ECN | \
++			 TCP_CONG_WANTS_CE_EVENTS)
+ 
+ union tcp_cc_info;
+ 
+@@ -1129,10 +1155,13 @@ struct ack_sample {
+  */
+ struct rate_sample {
+ 	u64  prior_mstamp; /* starting timestamp for interval */
++	u32  prior_lost;	/* tp->lost at "prior_mstamp" */
+ 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
+ 	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
++	u32 tx_in_flight;	/* packets in flight at starting timestamp */
++	s32  lost;		/* number of packets lost over interval */
+ 	s32  delivered;		/* number of packets delivered over interval */
+-	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
++	s32  delivered_ce;	/* packets delivered w/ CE mark over interval */
+ 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+ 	u32 snd_interval_us;	/* snd interval for delivered packets */
+ 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
+@@ -1143,7 +1172,9 @@ struct rate_sample {
+ 	u32  last_end_seq;	/* end_seq of most recently ACKed packet */
+ 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
+ 	bool is_retrans;	/* is sample from retransmission? */
++	bool is_acking_tlp_retrans_seq;  /* ACKed a TLP retransmit sequence? */
+ 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
++	bool is_ece;		/* did this ACK have ECN marked? */
+ };
+ 
+ struct tcp_congestion_ops {
+@@ -1167,8 +1198,11 @@ struct tcp_congestion_ops {
+ 	/* hook for packet ack accounting (optional) */
+ 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+ 
+-	/* override sysctl_tcp_min_tso_segs */
+-	u32 (*min_tso_segs)(struct sock *sk);
++	/* pick target number of segments per TSO/GSO skb (optional): */
++	u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
++
++	/* react to a specific lost skb (optional) */
++	void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
+ 
+ 	/* call when packets are delivered to update cwnd and pacing rate,
+ 	 * after all the ca_state processing. (optional)
+@@ -1234,6 +1268,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+ }
+ #endif
+ 
++static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
++{
++	const struct inet_connection_sock *icsk = inet_csk(sk);
++
++	return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
++					   TCP_CONG_WANTS_CE_EVENTS);
++}
++
+ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
+ {
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+@@ -1253,6 +1295,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
+ void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
+ 
+ /* From tcp_rate.c */
++void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
+ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
+ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+ 			    struct rate_sample *rs);
+@@ -1265,6 +1308,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
+ 	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+ }
+ 
++/* If a retransmit failed due to local qdisc congestion or other local issues,
++ * then we may have called tcp_set_skb_tso_segs() to increase the number of
++ * segments in the skb without increasing the tx.in_flight. In all other cases,
++ * the tx.in_flight should be at least as big as the pcount of the sk_buff.  We
++ * do not have the state to know whether a retransmit failed due to local qdisc
++ * congestion or other local issues, so to avoid spurious warnings we consider
++ * that any skb marked lost may have suffered that fate.
++ */
++static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
++						      u32 skb_sacked_flags,
++						      u32 tx_in_flight)
++{
++	return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
++}
++
+ /* These functions determine how the current flow behaves in respect of SACK
+  * handling. SACK is negotiated with the peer, and therefore it can vary
+  * between different flows.
+@@ -2416,7 +2474,7 @@ struct tcp_plb_state {
+ 	u8	consec_cong_rounds:5, /* consecutive congested rounds */
+ 		unused:3;
+ 	u32	pause_until; /* jiffies32 when PLB can resume rerouting */
+-};
++} __attribute__ ((__packed__));
+ 
+ static inline void tcp_plb_init(const struct sock *sk,
+ 				struct tcp_plb_state *plb)
+diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
+index 50655de04c9b..82f8bd8f0d16 100644
+--- a/include/uapi/linux/inet_diag.h
++++ b/include/uapi/linux/inet_diag.h
+@@ -229,6 +229,29 @@ struct tcp_bbr_info {
+ 	__u32	bbr_min_rtt;		/* min-filtered RTT in uSec */
+ 	__u32	bbr_pacing_gain;	/* pacing gain shifted left 8 bits */
+ 	__u32	bbr_cwnd_gain;		/* cwnd gain shifted left 8 bits */
++	__u32	bbr_bw_hi_lsb;		/* lower 32 bits of bw_hi */
++	__u32	bbr_bw_hi_msb;		/* upper 32 bits of bw_hi */
++	__u32	bbr_bw_lo_lsb;		/* lower 32 bits of bw_lo */
++	__u32	bbr_bw_lo_msb;		/* upper 32 bits of bw_lo */
++	__u8	bbr_mode;		/* current bbr_mode in state machine */
++	__u8	bbr_phase;		/* current state machine phase */
++	__u8	unused1;		/* alignment padding; not used yet */
++	__u8	bbr_version;		/* BBR algorithm version */
++	__u32	bbr_inflight_lo;	/* lower short-term data volume bound */
++	__u32	bbr_inflight_hi;	/* higher long-term data volume bound */
++	__u32	bbr_extra_acked;	/* max excess packets ACKed in epoch */
++};
++
++/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */
++enum tcp_bbr_phase {
++	BBR_PHASE_INVALID		= 0,
++	BBR_PHASE_STARTUP		= 1,
++	BBR_PHASE_DRAIN			= 2,
++	BBR_PHASE_PROBE_RTT		= 3,
++	BBR_PHASE_PROBE_BW_UP		= 4,
++	BBR_PHASE_PROBE_BW_DOWN		= 5,
++	BBR_PHASE_PROBE_BW_CRUISE	= 6,
++	BBR_PHASE_PROBE_BW_REFILL	= 7,
+ };
+ 
+ union tcp_cc_info {
+diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
+index 3b687d20c9ed..a7c30c243b54 100644
+--- a/include/uapi/linux/rtnetlink.h
++++ b/include/uapi/linux/rtnetlink.h
+@@ -507,12 +507,14 @@ enum {
+ #define RTAX_FEATURE_TIMESTAMP		(1 << 2) /* unused */
+ #define RTAX_FEATURE_ALLFRAG		(1 << 3) /* unused */
+ #define RTAX_FEATURE_TCP_USEC_TS	(1 << 4)
++#define RTAX_FEATURE_ECN_LOW		(1 << 5)
+ 
+ #define RTAX_FEATURE_MASK	(RTAX_FEATURE_ECN |		\
+ 				 RTAX_FEATURE_SACK |		\
+ 				 RTAX_FEATURE_TIMESTAMP |	\
+ 				 RTAX_FEATURE_ALLFRAG |		\
+-				 RTAX_FEATURE_TCP_USEC_TS)
++				 RTAX_FEATURE_TCP_USEC_TS |	\
++				 RTAX_FEATURE_ECN_LOW)
+ 
+ struct rta_session {
+ 	__u8	proto;
+diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
+index dbf896f3146c..4702cd2f1ffc 100644
+--- a/include/uapi/linux/tcp.h
++++ b/include/uapi/linux/tcp.h
+@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
+ #define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
+ #define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
+ #define TCPI_OPT_USEC_TS	64 /* usec timestamps */
++#define TCPI_OPT_ECN_LOW	128 /* Low-latency ECN configured at init */
+ 
+ /*
+  * Sender's congestion state indicating normal or abnormal situations
+diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
+index 8e94ed7c56a0..50dc9970cad2 100644
+--- a/net/ipv4/Kconfig
++++ b/net/ipv4/Kconfig
+@@ -668,15 +668,18 @@ config TCP_CONG_BBR
+ 	default n
+ 	help
+ 
+-	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
+-	  maximize network utilization and minimize queues. It builds an explicit
+-	  model of the bottleneck delivery rate and path round-trip propagation
+-	  delay. It tolerates packet loss and delay unrelated to congestion. It
+-	  can operate over LAN, WAN, cellular, wifi, or cable modem links. It can
+-	  coexist with flows that use loss-based congestion control, and can
+-	  operate with shallow buffers, deep buffers, bufferbloat, policers, or
+-	  AQM schemes that do not provide a delay signal. It requires the fq
+-	  ("Fair Queue") pacing packet scheduler.
++	  BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a
++	  model-based congestion control algorithm that aims to maximize
++	  network utilization, keep queues and retransmit rates low, and to be
++	  able to coexist with Reno/CUBIC in common scenarios. It builds an
++	  explicit model of the network path.  It tolerates a targeted degree
++	  of random packet loss and delay. It can operate over LAN, WAN,
++	  cellular, wifi, or cable modem links, and can use shallow-threshold
++	  ECN signals. It can coexist to some degree with flows that use
++	  loss-based congestion control, and can operate with shallow buffers,
++	  deep buffers, bufferbloat, policers, or AQM schemes that do not
++	  provide a delay signal. It requires pacing, using either TCP internal
++	  pacing or the fq ("Fair Queue") pacing packet scheduler.
+ 
+ choice
+ 	prompt "Default TCP congestion control"
+diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
+index 3f88d0961e5b..4273cac333f6 100644
+--- a/net/ipv4/bpf_tcp_ca.c
++++ b/net/ipv4/bpf_tcp_ca.c
+@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *samp
+ {
+ }
+ 
+-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
++static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	return 0;
+ }
+ 
++static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
++{
++}
++
+ static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
+ 				    const struct rate_sample *rs)
+ {
+@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
+ 	.cwnd_event = bpf_tcp_ca_cwnd_event,
+ 	.in_ack_event = bpf_tcp_ca_in_ack_event,
+ 	.pkts_acked = bpf_tcp_ca_pkts_acked,
+-	.min_tso_segs = bpf_tcp_ca_min_tso_segs,
++	.tso_segs = bpf_tcp_ca_tso_segs,
++	.skb_marked_lost = bpf_tcp_ca_skb_marked_lost,
+ 	.cong_control = bpf_tcp_ca_cong_control,
+ 	.undo_cwnd = bpf_tcp_ca_undo_cwnd,
+ 	.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 831a18dc7aa6..d9faa8fef55e 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3123,6 +3123,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+ 	tp->rx_opt.dsack = 0;
+ 	tp->rx_opt.num_sacks = 0;
+ 	tp->rcv_ooopack = 0;
++	tp->fast_ack_mode = 0;
+ 
+ 
+ 	/* Clean up fastopen related fields */
+@@ -3849,6 +3850,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
+ 		info->tcpi_options |= TCPI_OPT_ECN;
+ 	if (tp->ecn_flags & TCP_ECN_SEEN)
+ 		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
++	if (tp->ecn_flags & TCP_ECN_LOW)
++		info->tcpi_options |= TCPI_OPT_ECN_LOW;
+ 	if (tp->syn_data_acked)
+ 		info->tcpi_options |= TCPI_OPT_SYN_DATA;
+ 	if (tp->tcp_usec_ts)
+diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
+index 760941e55153..a180fa648d5e 100644
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -1,18 +1,19 @@
+-/* Bottleneck Bandwidth and RTT (BBR) congestion control
++/* BBR (Bottleneck Bandwidth and RTT) congestion control
+  *
+- * BBR congestion control computes the sending rate based on the delivery
+- * rate (throughput) estimated from ACKs. In a nutshell:
++ * BBR is a model-based congestion control algorithm that aims for low queues,
++ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the
++ * network path, it uses measurements of bandwidth and RTT, as well as (if they
++ * occur) packet loss and/or shallow-threshold ECN signals. Note that although
++ * it can use ECN or loss signals explicitly, it does not require either; it
++ * can bound its in-flight data based on its estimate of the BDP.
+  *
+- *   On each ACK, update our model of the network path:
+- *      bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
+- *      min_rtt = windowed_min(rtt, 10 seconds)
+- *   pacing_rate = pacing_gain * bottleneck_bandwidth
+- *   cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
+- *
+- * The core algorithm does not react directly to packet losses or delays,
+- * although BBR may adjust the size of next send per ACK when loss is
+- * observed, or adjust the sending rate if it estimates there is a
+- * traffic policer, in order to keep the drop rate reasonable.
++ * The model has both higher and lower bounds for the operating range:
++ *   lo: bw_lo, inflight_lo: conservative short-term lower bound
++ *   hi: bw_hi, inflight_hi: robust long-term upper bound
++ * The bandwidth-probing time scale is (a) extended dynamically based on
++ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by
++ * an interactive wall-clock time-scale to be more scalable and responsive
++ * than Reno and CUBIC.
+  *
+  * Here is a state transition diagram for BBR:
+  *
+@@ -65,6 +66,13 @@
+ #include <linux/random.h>
+ #include <linux/win_minmax.h>
+ 
++#include <trace/events/tcp.h>
++#include "tcp_dctcp.h"
++
++#define BBR_VERSION		3
++
++#define bbr_param(sk,name)	(bbr_ ## name)
++
+ /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
+  * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
+  * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
+@@ -85,36 +93,41 @@ enum bbr_mode {
+ 	BBR_PROBE_RTT,	/* cut inflight to min to probe min_rtt */
+ };
+ 
++/* How does the incoming ACK stream relate to our bandwidth probing? */
++enum bbr_ack_phase {
++	BBR_ACKS_INIT,		  /* not probing; not getting probe feedback */
++	BBR_ACKS_REFILLING,	  /* sending at est. bw to fill pipe */
++	BBR_ACKS_PROBE_STARTING,  /* inflight rising to probe bw */
++	BBR_ACKS_PROBE_FEEDBACK,  /* getting feedback from bw probing */
++	BBR_ACKS_PROBE_STOPPING,  /* stopped probing; still getting feedback */
++};
++
+ /* BBR congestion control block */
+ struct bbr {
+ 	u32	min_rtt_us;	        /* min RTT in min_rtt_win_sec window */
+ 	u32	min_rtt_stamp;	        /* timestamp of min_rtt_us */
+ 	u32	probe_rtt_done_stamp;   /* end time for BBR_PROBE_RTT mode */
+-	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
+-	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
++	u32	probe_rtt_min_us;	/* min RTT in probe_rtt_win_ms win */
++	u32	probe_rtt_min_stamp;	/* timestamp of probe_rtt_min_us*/
+ 	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
+ 	u64	cycle_mstamp;	     /* time of this cycle phase start */
+-	u32     mode:3,		     /* current bbr_mode in state machine */
++	u32     mode:2,		     /* current bbr_mode in state machine */
+ 		prev_ca_state:3,     /* CA state on previous ACK */
+-		packet_conservation:1,  /* use packet conservation? */
+ 		round_start:1,	     /* start of packet-timed tx->ack round? */
++		ce_state:1,          /* If most recent data has CE bit set */
++		bw_probe_up_rounds:5,   /* cwnd-limited rounds in PROBE_UP */
++		try_fast_path:1,	/* can we take fast path? */
+ 		idle_restart:1,	     /* restarting after idle? */
+ 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
+-		unused:13,
+-		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
+-		lt_rtt_cnt:7,	     /* round trips in long-term interval */
+-		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
+-	u32	lt_bw;		     /* LT est delivery rate in pkts/uS << 24 */
+-	u32	lt_last_delivered;   /* LT intvl start: tp->delivered */
+-	u32	lt_last_stamp;	     /* LT intvl start: tp->delivered_mstamp */
+-	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
++		init_cwnd:7,         /* initial cwnd */
++		unused_1:10;
+ 	u32	pacing_gain:10,	/* current gain for setting pacing rate */
+ 		cwnd_gain:10,	/* current gain for setting cwnd */
+ 		full_bw_reached:1,   /* reached full bw in Startup? */
+ 		full_bw_cnt:2,	/* number of rounds without large bw gains */
+-		cycle_idx:3,	/* current index in pacing_gain cycle array */
++		cycle_idx:2,	/* current index in pacing_gain cycle array */
+ 		has_seen_rtt:1, /* have we seen an RTT sample yet? */
+-		unused_b:5;
++		unused_2:6;
+ 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
+ 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
+ 
+@@ -124,19 +137,67 @@ struct bbr {
+ 	u32	ack_epoch_acked:20,	/* packets (S)ACKed in sampling epoch */
+ 		extra_acked_win_rtts:5,	/* age of extra_acked, in round trips */
+ 		extra_acked_win_idx:1,	/* current index in extra_acked array */
+-		unused_c:6;
++	/* BBR v3 state: */
++		full_bw_now:1,		/* recently reached full bw plateau? */
++		startup_ecn_rounds:2,	/* consecutive hi ECN STARTUP rounds */
++		loss_in_cycle:1,	/* packet loss in this cycle? */
++		ecn_in_cycle:1,		/* ECN in this cycle? */
++		unused_3:1;
++	u32	loss_round_delivered; /* scb->tx.delivered ending loss round */
++	u32	undo_bw_lo;	     /* bw_lo before latest losses */
++	u32	undo_inflight_lo;    /* inflight_lo before latest losses */
++	u32	undo_inflight_hi;    /* inflight_hi before latest losses */
++	u32	bw_latest;	 /* max delivered bw in last round trip */
++	u32	bw_lo;		 /* lower bound on sending bandwidth */
++	u32	bw_hi[2];	 /* max recent measured bw sample */
++	u32	inflight_latest; /* max delivered data in last round trip */
++	u32	inflight_lo;	 /* lower bound of inflight data range */
++	u32	inflight_hi;	 /* upper bound of inflight data range */
++	u32	bw_probe_up_cnt; /* packets delivered per inflight_hi incr */
++	u32	bw_probe_up_acks;  /* packets (S)ACKed since inflight_hi incr */
++	u32	probe_wait_us;	 /* PROBE_DOWN until next clock-driven probe */
++	u32	prior_rcv_nxt;	/* tp->rcv_nxt when CE state last changed */
++	u32	ecn_eligible:1,	/* sender can use ECN (RTT, handshake)? */
++		ecn_alpha:9,	/* EWMA delivered_ce/delivered; 0..256 */
++		bw_probe_samples:1,    /* rate samples reflect bw probing? */
++		prev_probe_too_high:1, /* did last PROBE_UP go too high? */
++		stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */
++		rounds_since_probe:8,  /* packet-timed rounds since probed bw */
++		loss_round_start:1,    /* loss_round_delivered round trip? */
++		loss_in_round:1,       /* loss marked in this round trip? */
++		ecn_in_round:1,	       /* ECN marked in this round trip? */
++		ack_phase:3,	       /* bbr_ack_phase: meaning of ACKs */
++		loss_events_in_round:4,/* losses in STARTUP round */
++		initialized:1;	       /* has bbr_init() been called? */
++	u32	alpha_last_delivered;	 /* tp->delivered    at alpha update */
++	u32	alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
++
++	u8	unused_4;		/* to preserve alignment */
++	struct tcp_plb_state plb;
+ };
+ 
+-#define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
++struct bbr_context {
++	u32 sample_bw;
++};
+ 
+-/* Window length of bw filter (in rounds): */
+-static const int bbr_bw_rtts = CYCLE_LEN + 2;
+ /* Window length of min_rtt filter (in sec): */
+ static const u32 bbr_min_rtt_win_sec = 10;
+ /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
+ static const u32 bbr_probe_rtt_mode_ms = 200;
+-/* Skip TSO below the following bandwidth (bits/sec): */
+-static const int bbr_min_tso_rate = 1200000;
++/* Window length of probe_rtt_min_us filter (in ms), and consequently the
++ * typical interval between PROBE_RTT mode entries. The default is 5000ms.
++ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC
++ */
++static const u32 bbr_probe_rtt_win_ms = 5000;
++/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */
++static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2;
++
++/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
++ * in bigger TSO bursts. We cut the RTT-based allowance in half
++ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
++ * is below 1500 bytes after 6 * ~500 usec = 3ms.
++ */
++static const u32 bbr_tso_rtt_shift = 9;
+ 
+ /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
+  * In order to help drive the network toward lower queues and low latency while
+@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000;
+  */
+ static const int bbr_pacing_margin_percent = 1;
+ 
+-/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
++/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value
+  * that will allow a smoothly increasing pacing rate that will double each RTT
+  * and send the same number of packets per RTT that an un-paced, slow-starting
+  * Reno or CUBIC flow would:
+  */
+-static const int bbr_high_gain  = BBR_UNIT * 2885 / 1000 + 1;
+-/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
++static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1;
++/* The gain for deriving startup cwnd: */
++static const int bbr_startup_cwnd_gain = BBR_UNIT * 2;
++/* The pacing gain in BBR_DRAIN is calculated to typically drain
+  * the queue created in BBR_STARTUP in a single round:
+  */
+ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
+@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
+ static const int bbr_cwnd_gain  = BBR_UNIT * 2;
+ /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
+ static const int bbr_pacing_gain[] = {
+-	BBR_UNIT * 5 / 4,	/* probe for more available bw */
+-	BBR_UNIT * 3 / 4,	/* drain queue and/or yield bw to other flows */
+-	BBR_UNIT, BBR_UNIT, BBR_UNIT,	/* cruise at 1.0*bw to utilize pipe, */
+-	BBR_UNIT, BBR_UNIT, BBR_UNIT	/* without creating excess queue... */
++	BBR_UNIT * 5 / 4,	/* UP: probe for more available bw */
++	BBR_UNIT * 91 / 100,	/* DOWN: drain queue and/or yield bw */
++	BBR_UNIT,		/* CRUISE: try to use pipe w/ some headroom */
++	BBR_UNIT,		/* REFILL: refill pipe to estimated 100% */
++};
++enum bbr_pacing_gain_phase {
++	BBR_BW_PROBE_UP		= 0,  /* push up inflight to probe for bw/vol */
++	BBR_BW_PROBE_DOWN	= 1,  /* drain excess inflight from the queue */
++	BBR_BW_PROBE_CRUISE	= 2,  /* use pipe, w/ headroom in queue/pipe */
++	BBR_BW_PROBE_REFILL	= 3,  /* v2: refill the pipe again to 100% */
+ };
+-/* Randomize the starting gain cycling phase over N phases: */
+-static const u32 bbr_cycle_rand = 7;
+ 
+ /* Try to keep at least this many packets in flight, if things go smoothly. For
+  * smooth functioning, a sliding window protocol ACKing every other packet
+@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7;
+  */
+ static const u32 bbr_cwnd_min_target = 4;
+ 
+-/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
++/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */
+ /* If bw has increased significantly (1.25x), there may be more bw available: */
+ static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
+ /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
+ static const u32 bbr_full_bw_cnt = 3;
+ 
+-/* "long-term" ("LT") bandwidth estimator parameters... */
+-/* The minimum number of rounds in an LT bw sampling interval: */
+-static const u32 bbr_lt_intvl_min_rtts = 4;
+-/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
+-static const u32 bbr_lt_loss_thresh = 50;
+-/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
+-static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
+-/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
+-static const u32 bbr_lt_bw_diff = 4000 / 8;
+-/* If we estimate we're policed, use lt_bw for this many round trips: */
+-static const u32 bbr_lt_bw_max_rtts = 48;
+-
+ /* Gain factor for adding extra_acked to target cwnd: */
+ static const int bbr_extra_acked_gain = BBR_UNIT;
+ /* Window length of extra_acked window. */
+@@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
+ /* Time period for clamping cwnd increment due to ack aggregation */
+ static const u32 bbr_extra_acked_max_us = 100 * 1000;
+ 
++/* Flags to control BBR ECN-related behavior... */
++
++/* Ensure ACKs only ACK packets with consistent ECN CE status? */
++static const bool bbr_precise_ece_ack = true;
++
++/* Max RTT (in usec) at which to use sender-side ECN logic.
++ * Disabled when 0 (ECN allowed at any RTT).
++ */
++static const u32 bbr_ecn_max_rtt_us = 5000;
++
++/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE.
++ * No loss response when 0.
++ */
++static const u32 bbr_beta = BBR_UNIT * 30 / 100;
++
++/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */
++static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16;
++
++/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly
++ * to congestion if the bottleneck is congested when the flow starts up.
++ */
++static const u32 bbr_ecn_alpha_init = BBR_UNIT;
++
++/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE.
++ * No ECN based bounding when 0.
++ */
++static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3;	 /* 1/3 = 33% */
++
++/* Estimate bw probing has gone too far if CE ratio exceeds this threshold.
++ * Scaled by BBR_SCALE. Disabled when 0.
++ */
++static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2;  /* 1/2 = 50% */
++
++/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN
++ * clears then make the first round's increment to inflight_hi the following
++ * fraction of inflight_hi.
++ */
++static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2;
++
++/* Estimate bw probing has gone too far if loss rate exceeds this level. */
++static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100;  /* 2% loss */
++
++/* Slow down for a packet loss recovered by TLP? */
++static const bool bbr_loss_probe_recovery = true;
++
++/* Exit STARTUP if number of loss marking events in a Recovery round is >= N,
++ * and loss rate is higher than bbr_loss_thresh.
++ * Disabled if 0.
++ */
++static const u32 bbr_full_loss_cnt = 6;
++
++/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh
++ * meets this count.
++ */
++static const u32 bbr_full_ecn_cnt = 2;
++
++/* Fraction of unutilized headroom to try to leave in path upon high loss. */
++static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100;
++
++/* How much do we increase cwnd_gain when probing for bandwidth in
++ * BBR_BW_PROBE_UP? This specifies the increment in units of
++ * BBR_UNIT/4. The default is 1, meaning 0.25.
++ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75).
++ */
++static const u32 bbr_bw_probe_cwnd_gain = 1;
++
++/* Max number of packet-timed rounds to wait before probing for bandwidth.  If
++ * we want to tolerate 1% random loss per round, and not have this cut our
++ * inflight too much, we must probe for bw periodically on roughly this scale.
++ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance.
++ * We aim to be fair with Reno/CUBIC up to a BDP of at least:
++ *  BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
++ */
++static const u32 bbr_bw_probe_max_rounds = 63;
++
++/* Max amount of randomness to inject in round counting for Reno-coexistence.
++ */
++static const u32 bbr_bw_probe_rand_rounds = 2;
++
++/* Use BBR-native probe time scale starting at this many usec.
++ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least:
++ *  BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs
++ */
++static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC;  /* 2 secs */
++
++/* Use BBR-native probes spread over this many usec: */
++static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC;  /* 1 secs */
++
++/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */
++static const bool bbr_fast_path = true;
++
++/* Use fast ack mode? */
++static const bool bbr_fast_ack_mode = true;
++
++static u32 bbr_max_bw(const struct sock *sk);
++static u32 bbr_bw(const struct sock *sk);
++static void bbr_exit_probe_rtt(struct sock *sk);
++static void bbr_reset_congestion_signals(struct sock *sk);
++static void bbr_run_loss_probe_recovery(struct sock *sk);
++
+ static void bbr_check_probe_rtt_done(struct sock *sk);
+ 
++/* This connection can use ECN if both endpoints have signaled ECN support in
++ * the handshake and the per-route settings indicated this is a
++ * shallow-threshold ECN environment, meaning both:
++ *  (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and
++ *  (b) TCP endpoints provide precise ACKs that only ACK data segments
++ *      with consistent ECN CE status
++ */
++static bool bbr_can_use_ecn(const struct sock *sk)
++{
++	return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) &&
++	       (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW);
++}
++
+ /* Do we estimate that STARTUP filled the pipe? */
+ static bool bbr_full_bw_reached(const struct sock *sk)
+ {
+@@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const struct sock *sk)
+ /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
+ static u32 bbr_max_bw(const struct sock *sk)
+ {
+-	struct bbr *bbr = inet_csk_ca(sk);
++	const struct bbr *bbr = inet_csk_ca(sk);
+ 
+-	return minmax_get(&bbr->bw);
++	return max(bbr->bw_hi[0], bbr->bw_hi[1]);
+ }
+ 
+ /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
+ static u32 bbr_bw(const struct sock *sk)
+ {
+-	struct bbr *bbr = inet_csk_ca(sk);
++	const struct bbr *bbr = inet_csk_ca(sk);
+ 
+-	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
++	return min(bbr_max_bw(sk), bbr->bw_lo);
+ }
+ 
+ /* Return maximum extra acked in past k-2k round trips,
+@@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct sock *sk)
+  * The order here is chosen carefully to avoid overflow of u64. This should
+  * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
+  */
+-static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
++static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain,
++				  int margin)
+ {
+ 	unsigned int mss = tcp_sk(sk)->mss_cache;
+ 
+ 	rate *= mss;
+ 	rate *= gain;
+ 	rate >>= BBR_SCALE;
+-	rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
+-	return rate >> BW_SCALE;
++	rate *= USEC_PER_SEC / 100 * (100 - margin);
++	rate >>= BW_SCALE;
++	rate = max(rate, 1ULL);
++	return rate;
++}
++
++static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate)
++{
++	return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0);
+ }
+ 
+ /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+@@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
+ 	u64 rate = bw;
+ 
+-	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
++	rate = bbr_rate_bytes_per_sec(sk, rate, gain,
++				      bbr_pacing_margin_percent);
+ 	rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate));
+ 	return rate;
+ }
+ 
+-/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
++/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */
+ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+@@ -279,7 +456,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+ 	bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
+ 	do_div(bw, rtt_us);
+ 	WRITE_ONCE(sk->sk_pacing_rate,
+-		   bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain));
++		   bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain)));
+ }
+ 
+ /* Pace using current bw estimate and a gain factor. */
+@@ -295,26 +472,48 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ 		WRITE_ONCE(sk->sk_pacing_rate, rate);
+ }
+ 
+-/* override sysctl_tcp_min_tso_segs */
+-__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
++/* Return the number of segments BBR would like in a TSO/GSO skb, given a
++ * particular max gso size as a constraint. TODO: make this simpler and more
++ * consistent by switching bbr to just call tcp_tso_autosize().
++ */
++static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
++				u32 gso_max_size)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 segs, r;
++	u64 bytes;
++
++	/* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
++	bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
++
++	/* Budget a TSO/GSO burst size allowance based on min_rtt. For every
++	 * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst.
++	 * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K)
++	 */
++	if (bbr_param(sk, tso_rtt_shift)) {
++		r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift);
++		if (r < BITS_PER_TYPE(u32))   /* prevent undefined behavior */
++			bytes += GSO_LEGACY_MAX_SIZE >> r;
++	}
++
++	bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
++	segs = max_t(u32, bytes / mss_now,
++		     sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
++	return segs;
++}
++
++/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
++__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+-	return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
++	return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
+ }
+ 
++/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
+ static u32 bbr_tso_segs_goal(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+-	u32 segs, bytes;
+-
+-	/* Sort of tcp_tso_autosize() but ignoring
+-	 * driver provided sk_gso_max_size.
+-	 */
+-	bytes = min_t(unsigned long,
+-		      READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
+-		      GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
+-	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
+ 
+-	return min(segs, 0x7FU);
++	return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE);
+ }
+ 
+ /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
+@@ -334,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+ 
+-	if (event == CA_EVENT_TX_START && tp->app_limited) {
++	if (event == CA_EVENT_TX_START) {
++		if (!tp->app_limited)
++			return;
+ 		bbr->idle_restart = 1;
+ 		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+ 		bbr->ack_epoch_acked = 0;
+@@ -345,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+ 			bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
+ 		else if (bbr->mode == BBR_PROBE_RTT)
+ 			bbr_check_probe_rtt_done(sk);
++	} else if ((event == CA_EVENT_ECN_IS_CE ||
++		    event == CA_EVENT_ECN_NO_CE) &&
++		   bbr_can_use_ecn(sk) &&
++		   bbr_param(sk, precise_ece_ack)) {
++		u32 state = bbr->ce_state;
++		dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state);
++		bbr->ce_state = state;
++	} else if (event == CA_EVENT_TLP_RECOVERY &&
++		   bbr_param(sk, loss_probe_recovery)) {
++		bbr_run_loss_probe_recovery(sk);
+ 	}
+ }
+ 
+@@ -367,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
+ 	 * default. This should only happen when the connection is not using TCP
+ 	 * timestamps and has retransmitted all of the SYN/SYNACK/data packets
+ 	 * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
+-	 * case we need to slow-start up toward something safe: TCP_INIT_CWND.
++	 * case we need to slow-start up toward something safe: initial cwnd.
+ 	 */
+ 	if (unlikely(bbr->min_rtt_us == ~0U))	 /* no valid RTT samples yet? */
+-		return TCP_INIT_CWND;  /* be safe: cap at default initial cwnd*/
++		return bbr->init_cwnd;  /* be safe: cap at initial cwnd */
+ 
+ 	w = (u64)bw * bbr->min_rtt_us;
+ 
+@@ -387,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
+  *   - one skb in sending host Qdisc,
+  *   - one skb in sending host TSO/GSO engine
+  *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
+- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
+- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
++ * Don't worry, at low rates this won't bloat cwnd because
++ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets,
+  * which allows 2 outstanding 2-packet sequences, to try to keep pipe
+  * full even with ACK-every-other-packet delayed ACKs.
+  */
+ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
+ {
+ 	struct bbr *bbr = inet_csk_ca(sk);
++	u32 tso_segs_goal;
+ 
+-	/* Allow enough full-sized skbs in flight to utilize end systems. */
+-	cwnd += 3 * bbr_tso_segs_goal(sk);
+-
+-	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
+-	cwnd = (cwnd + 1) & ~1U;
++	tso_segs_goal = 3 * bbr_tso_segs_goal(sk);
+ 
++	/* Allow enough full-sized skbs in flight to utilize end systems. */
++	cwnd = max_t(u32, cwnd, tso_segs_goal);
++	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
+ 	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
+-	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
++	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
+ 		cwnd += 2;
+ 
+ 	return cwnd;
+@@ -458,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
+ {
+ 	u32 max_aggr_cwnd, aggr_cwnd = 0;
+ 
+-	if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) {
++	if (bbr_param(sk, extra_acked_gain)) {
+ 		max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
+ 				/ BW_UNIT;
+-		aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk))
++		aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk))
+ 			     >> BBR_SCALE;
+ 		aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
+ 	}
+@@ -469,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
+ 	return aggr_cwnd;
+ }
+ 
+-/* An optimization in BBR to reduce losses: On the first round of recovery, we
+- * follow the packet conservation principle: send P packets per P packets acked.
+- * After that, we slow-start and send at most 2*P packets per P packets acked.
+- * After recovery finishes, or upon undo, we restore the cwnd we had when
+- * recovery started (capped by the target cwnd based on estimated BDP).
+- *
+- * TODO(ycheng/ncardwell): implement a rate-based approach.
+- */
+-static bool bbr_set_cwnd_to_recover_or_restore(
+-	struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
++/* Returns the cwnd for PROBE_RTT mode. */
++static u32 bbr_probe_rtt_cwnd(struct sock *sk)
+ {
+-	struct tcp_sock *tp = tcp_sk(sk);
+-	struct bbr *bbr = inet_csk_ca(sk);
+-	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
+-	u32 cwnd = tcp_snd_cwnd(tp);
+-
+-	/* An ACK for P pkts should release at most 2*P packets. We do this
+-	 * in two steps. First, here we deduct the number of lost packets.
+-	 * Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
+-	 */
+-	if (rs->losses > 0)
+-		cwnd = max_t(s32, cwnd - rs->losses, 1);
+-
+-	if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
+-		/* Starting 1st round of Recovery, so do packet conservation. */
+-		bbr->packet_conservation = 1;
+-		bbr->next_rtt_delivered = tp->delivered;  /* start round now */
+-		/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
+-		cwnd = tcp_packets_in_flight(tp) + acked;
+-	} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
+-		/* Exiting loss recovery; restore cwnd saved before recovery. */
+-		cwnd = max(cwnd, bbr->prior_cwnd);
+-		bbr->packet_conservation = 0;
+-	}
+-	bbr->prev_ca_state = state;
+-
+-	if (bbr->packet_conservation) {
+-		*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
+-		return true;	/* yes, using packet conservation */
+-	}
+-	*new_cwnd = cwnd;
+-	return false;
++	return max_t(u32, bbr_param(sk, cwnd_min_target),
++		     bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain)));
+ }
+ 
+ /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
+  * has drawn us down below target), or snap down to target if we're above it.
+  */
+ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+-			 u32 acked, u32 bw, int gain)
++			 u32 acked, u32 bw, int gain, u32 cwnd,
++			 struct bbr_context *ctx)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+-	u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
++	u32 target_cwnd = 0;
+ 
+ 	if (!acked)
+ 		goto done;  /* no packet fully ACKed; just apply caps */
+ 
+-	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
+-		goto done;
+-
+ 	target_cwnd = bbr_bdp(sk, bw, gain);
+ 
+ 	/* Increment the cwnd to account for excess ACKed data that seems
+@@ -537,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+ 	target_cwnd += bbr_ack_aggregation_cwnd(sk);
+ 	target_cwnd = bbr_quantization_budget(sk, target_cwnd);
+ 
+-	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
+-	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
+-		cwnd = min(cwnd + acked, target_cwnd);
+-	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
+-		cwnd = cwnd + acked;
+-	cwnd = max(cwnd, bbr_cwnd_min_target);
++	/* Update cwnd and enable fast path if cwnd reaches target_cwnd. */
++	bbr->try_fast_path = 0;
++	if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */
++		cwnd += acked;
++		if (cwnd >= target_cwnd) {
++			cwnd = target_cwnd;
++			bbr->try_fast_path = 1;
++		}
++	} else if (cwnd < target_cwnd || cwnd  < 2 * bbr->init_cwnd) {
++		cwnd += acked;
++	} else {
++		bbr->try_fast_path = 1;
++	}
+ 
++	cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target));
+ done:
+-	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));	/* apply global cap */
++	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));  /* global cap */
+ 	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
+-		tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
+-}
+-
+-/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
+-static bool bbr_is_next_cycle_phase(struct sock *sk,
+-				    const struct rate_sample *rs)
+-{
+-	struct tcp_sock *tp = tcp_sk(sk);
+-	struct bbr *bbr = inet_csk_ca(sk);
+-	bool is_full_length =
+-		tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
+-		bbr->min_rtt_us;
+-	u32 inflight, bw;
+-
+-	/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
+-	 * use the pipe without increasing the queue.
+-	 */
+-	if (bbr->pacing_gain == BBR_UNIT)
+-		return is_full_length;		/* just use wall clock time */
+-
+-	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
+-	bw = bbr_max_bw(sk);
+-
+-	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
+-	 * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
+-	 * small (e.g. on a LAN). We do not persist if packets are lost, since
+-	 * a path with small buffers may not hold that much.
+-	 */
+-	if (bbr->pacing_gain > BBR_UNIT)
+-		return is_full_length &&
+-			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
+-			 inflight >= bbr_inflight(sk, bw, bbr->pacing_gain));
+-
+-	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
+-	 * probing didn't find more bw. If inflight falls to match BDP then we
+-	 * estimate queue is drained; persisting would underutilize the pipe.
+-	 */
+-	return is_full_length ||
+-		inflight <= bbr_inflight(sk, bw, BBR_UNIT);
+-}
+-
+-static void bbr_advance_cycle_phase(struct sock *sk)
+-{
+-	struct tcp_sock *tp = tcp_sk(sk);
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
+-	bbr->cycle_mstamp = tp->delivered_mstamp;
+-}
+-
+-/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
+-static void bbr_update_cycle_phase(struct sock *sk,
+-				   const struct rate_sample *rs)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
+-		bbr_advance_cycle_phase(sk);
++		tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp),
++					   bbr_probe_rtt_cwnd(sk)));
+ }
+ 
+ static void bbr_reset_startup_mode(struct sock *sk)
+@@ -614,191 +738,49 @@ static void bbr_reset_startup_mode(struct sock *sk)
+ 	bbr->mode = BBR_STARTUP;
+ }
+ 
+-static void bbr_reset_probe_bw_mode(struct sock *sk)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	bbr->mode = BBR_PROBE_BW;
+-	bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand);
+-	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
+-}
+-
+-static void bbr_reset_mode(struct sock *sk)
+-{
+-	if (!bbr_full_bw_reached(sk))
+-		bbr_reset_startup_mode(sk);
+-	else
+-		bbr_reset_probe_bw_mode(sk);
+-}
+-
+-/* Start a new long-term sampling interval. */
+-static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
+-{
+-	struct tcp_sock *tp = tcp_sk(sk);
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
+-	bbr->lt_last_delivered = tp->delivered;
+-	bbr->lt_last_lost = tp->lost;
+-	bbr->lt_rtt_cnt = 0;
+-}
+-
+-/* Completely reset long-term bandwidth sampling. */
+-static void bbr_reset_lt_bw_sampling(struct sock *sk)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	bbr->lt_bw = 0;
+-	bbr->lt_use_bw = 0;
+-	bbr->lt_is_sampling = false;
+-	bbr_reset_lt_bw_sampling_interval(sk);
+-}
+-
+-/* Long-term bw sampling interval is done. Estimate whether we're policed. */
+-static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-	u32 diff;
+-
+-	if (bbr->lt_bw) {  /* do we have bw from a previous interval? */
+-		/* Is new bw close to the lt_bw from the previous interval? */
+-		diff = abs(bw - bbr->lt_bw);
+-		if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
+-		    (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
+-		     bbr_lt_bw_diff)) {
+-			/* All criteria are met; estimate we're policed. */
+-			bbr->lt_bw = (bw + bbr->lt_bw) >> 1;  /* avg 2 intvls */
+-			bbr->lt_use_bw = 1;
+-			bbr->pacing_gain = BBR_UNIT;  /* try to avoid drops */
+-			bbr->lt_rtt_cnt = 0;
+-			return;
+-		}
+-	}
+-	bbr->lt_bw = bw;
+-	bbr_reset_lt_bw_sampling_interval(sk);
+-}
+-
+-/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
+- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
+- * explicitly models their policed rate, to reduce unnecessary losses. We
+- * estimate that we're policed if we see 2 consecutive sampling intervals with
+- * consistent throughput and high packet loss. If we think we're being policed,
+- * set lt_bw to the "long-term" average delivery rate from those 2 intervals.
++/* See if we have reached next round trip. Upon start of the new round,
++ * returns packets delivered since previous round start plus this ACK.
+  */
+-static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
+-{
+-	struct tcp_sock *tp = tcp_sk(sk);
+-	struct bbr *bbr = inet_csk_ca(sk);
+-	u32 lost, delivered;
+-	u64 bw;
+-	u32 t;
+-
+-	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
+-		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
+-		    ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
+-			bbr_reset_lt_bw_sampling(sk);    /* stop using lt_bw */
+-			bbr_reset_probe_bw_mode(sk);  /* restart gain cycling */
+-		}
+-		return;
+-	}
+-
+-	/* Wait for the first loss before sampling, to let the policer exhaust
+-	 * its tokens and estimate the steady-state rate allowed by the policer.
+-	 * Starting samples earlier includes bursts that over-estimate the bw.
+-	 */
+-	if (!bbr->lt_is_sampling) {
+-		if (!rs->losses)
+-			return;
+-		bbr_reset_lt_bw_sampling_interval(sk);
+-		bbr->lt_is_sampling = true;
+-	}
+-
+-	/* To avoid underestimates, reset sampling if we run out of data. */
+-	if (rs->is_app_limited) {
+-		bbr_reset_lt_bw_sampling(sk);
+-		return;
+-	}
+-
+-	if (bbr->round_start)
+-		bbr->lt_rtt_cnt++;	/* count round trips in this interval */
+-	if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
+-		return;		/* sampling interval needs to be longer */
+-	if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
+-		bbr_reset_lt_bw_sampling(sk);  /* interval is too long */
+-		return;
+-	}
+-
+-	/* End sampling interval when a packet is lost, so we estimate the
+-	 * policer tokens were exhausted. Stopping the sampling before the
+-	 * tokens are exhausted under-estimates the policed rate.
+-	 */
+-	if (!rs->losses)
+-		return;
+-
+-	/* Calculate packets lost and delivered in sampling interval. */
+-	lost = tp->lost - bbr->lt_last_lost;
+-	delivered = tp->delivered - bbr->lt_last_delivered;
+-	/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
+-	if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
+-		return;
+-
+-	/* Find average delivery rate in this sampling interval. */
+-	t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+-	if ((s32)t < 1)
+-		return;		/* interval is less than one ms, so wait */
+-	/* Check if can multiply without overflow */
+-	if (t >= ~0U / USEC_PER_MSEC) {
+-		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
+-		return;
+-	}
+-	t *= USEC_PER_MSEC;
+-	bw = (u64)delivered * BW_UNIT;
+-	do_div(bw, t);
+-	bbr_lt_bw_interval_done(sk, bw);
+-}
+-
+-/* Estimate the bandwidth based on how fast packets are delivered */
+-static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
++static u32 bbr_update_round_start(struct sock *sk,
++		const struct rate_sample *rs, struct bbr_context *ctx)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+-	u64 bw;
++	u32 round_delivered = 0;
+ 
+ 	bbr->round_start = 0;
+-	if (rs->delivered < 0 || rs->interval_us <= 0)
+-		return; /* Not a valid observation */
+ 
+ 	/* See if we've reached the next RTT */
+-	if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
++	if (rs->interval_us > 0 &&
++	    !before(rs->prior_delivered, bbr->next_rtt_delivered)) {
++		round_delivered = tp->delivered - bbr->next_rtt_delivered;
+ 		bbr->next_rtt_delivered = tp->delivered;
+-		bbr->rtt_cnt++;
+ 		bbr->round_start = 1;
+-		bbr->packet_conservation = 0;
+ 	}
++	return round_delivered;
++}
+ 
+-	bbr_lt_bw_sampling(sk, rs);
++/* Calculate the bandwidth based on how fast packets are delivered */
++static void bbr_calculate_bw_sample(struct sock *sk,
++			const struct rate_sample *rs, struct bbr_context *ctx)
++{
++	u64 bw = 0;
+ 
+ 	/* Divide delivered by the interval to find a (lower bound) bottleneck
+ 	 * bandwidth sample. Delivered is in packets and interval_us in uS and
+ 	 * ratio will be <<1 for most connections. So delivered is first scaled.
++	 * Round up to allow growth at low rates, even with integer division.
+ 	 */
+-	bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us);
+-
+-	/* If this sample is application-limited, it is likely to have a very
+-	 * low delivered count that represents application behavior rather than
+-	 * the available network rate. Such a sample could drag down estimated
+-	 * bw, causing needless slow-down. Thus, to continue to send at the
+-	 * last measured network rate, we filter out app-limited samples unless
+-	 * they describe the path bw at least as well as our bw model.
+-	 *
+-	 * So the goal during app-limited phase is to proceed with the best
+-	 * network rate no matter how long. We automatically leave this
+-	 * phase when app writes faster than the network can deliver :)
+-	 */
+-	if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
+-		/* Incorporate new sample into our max bw filter. */
+-		minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
++	if (rs->interval_us > 0) {
++		if (WARN_ONCE(rs->delivered < 0,
++			      "negative delivered: %d interval_us: %ld\n",
++			      rs->delivered, rs->interval_us))
++			return;
++
++		bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us);
+ 	}
++
++	ctx->sample_bw = bw;
+ }
+ 
+ /* Estimates the windowed max degree of ack aggregation.
+@@ -812,7 +794,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
+  *
+  * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
+  * Max filter is an approximate sliding window of 5-10 (packet timed) round
+- * trips.
++ * trips for non-startup phase, and 1-2 round trips for startup.
+  */
+ static void bbr_update_ack_aggregation(struct sock *sk,
+ 				       const struct rate_sample *rs)
+@@ -820,15 +802,19 @@ static void bbr_update_ack_aggregation(struct sock *sk,
+ 	u32 epoch_us, expected_acked, extra_acked;
+ 	struct bbr *bbr = inet_csk_ca(sk);
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts);
+ 
+-	if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 ||
++	if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 ||
+ 	    rs->delivered < 0 || rs->interval_us <= 0)
+ 		return;
+ 
+ 	if (bbr->round_start) {
+ 		bbr->extra_acked_win_rtts = min(0x1F,
+ 						bbr->extra_acked_win_rtts + 1);
+-		if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) {
++		if (!bbr_full_bw_reached(sk))
++			extra_acked_win_rtts_thresh = 1;
++		if (bbr->extra_acked_win_rtts >=
++		    extra_acked_win_rtts_thresh) {
+ 			bbr->extra_acked_win_rtts = 0;
+ 			bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
+ 						   0 : 1;
+@@ -862,49 +848,6 @@ static void bbr_update_ack_aggregation(struct sock *sk,
+ 		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
+ }
+ 
+-/* Estimate when the pipe is full, using the change in delivery rate: BBR
+- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
+- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
+- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
+- * higher rwin, 3: we get higher delivery rate samples. Or transient
+- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
+- * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
+- */
+-static void bbr_check_full_bw_reached(struct sock *sk,
+-				      const struct rate_sample *rs)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-	u32 bw_thresh;
+-
+-	if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
+-		return;
+-
+-	bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
+-	if (bbr_max_bw(sk) >= bw_thresh) {
+-		bbr->full_bw = bbr_max_bw(sk);
+-		bbr->full_bw_cnt = 0;
+-		return;
+-	}
+-	++bbr->full_bw_cnt;
+-	bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
+-}
+-
+-/* If pipe is probably full, drain the queue and then enter steady-state. */
+-static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
+-{
+-	struct bbr *bbr = inet_csk_ca(sk);
+-
+-	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
+-		bbr->mode = BBR_DRAIN;	/* drain queue we created */
+-		tcp_sk(sk)->snd_ssthresh =
+-				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+-	}	/* fall through to check if in-flight is already small: */
+-	if (bbr->mode == BBR_DRAIN &&
+-	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
+-	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
+-		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
+-}
+-
+ static void bbr_check_probe_rtt_done(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+@@ -914,9 +857,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
+ 	      after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
+ 		return;
+ 
+-	bbr->min_rtt_stamp = tcp_jiffies32;  /* wait a while until PROBE_RTT */
++	bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */
+ 	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
+-	bbr_reset_mode(sk);
++	bbr_exit_probe_rtt(sk);
+ }
+ 
+ /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
+@@ -942,23 +885,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+-	bool filter_expired;
++	bool probe_rtt_expired, min_rtt_expired;
++	u32 expire;
+ 
+-	/* Track min RTT seen in the min_rtt_win_sec filter window: */
+-	filter_expired = after(tcp_jiffies32,
+-			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
++	/* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */
++	expire = bbr->probe_rtt_min_stamp +
++		 msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms));
++	probe_rtt_expired = after(tcp_jiffies32, expire);
+ 	if (rs->rtt_us >= 0 &&
+-	    (rs->rtt_us < bbr->min_rtt_us ||
+-	     (filter_expired && !rs->is_ack_delayed))) {
+-		bbr->min_rtt_us = rs->rtt_us;
+-		bbr->min_rtt_stamp = tcp_jiffies32;
++	    (rs->rtt_us < bbr->probe_rtt_min_us ||
++	     (probe_rtt_expired && !rs->is_ack_delayed))) {
++		bbr->probe_rtt_min_us = rs->rtt_us;
++		bbr->probe_rtt_min_stamp = tcp_jiffies32;
++	}
++	/* Track min RTT seen in the min_rtt_win_sec filter window: */
++	expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ;
++	min_rtt_expired = after(tcp_jiffies32, expire);
++	if (bbr->probe_rtt_min_us <= bbr->min_rtt_us ||
++	    min_rtt_expired) {
++		bbr->min_rtt_us = bbr->probe_rtt_min_us;
++		bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
+ 	}
+ 
+-	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
++	if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired &&
+ 	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
+ 		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
+ 		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
+ 		bbr->probe_rtt_done_stamp = 0;
++		bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
++		bbr->next_rtt_delivered = tp->delivered;
+ 	}
+ 
+ 	if (bbr->mode == BBR_PROBE_RTT) {
+@@ -967,9 +922,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
+ 			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
+ 		/* Maintain min packets in flight for max(200 ms, 1 round). */
+ 		if (!bbr->probe_rtt_done_stamp &&
+-		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
++		    tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) {
+ 			bbr->probe_rtt_done_stamp = tcp_jiffies32 +
+-				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
++				msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms));
+ 			bbr->probe_rtt_round_done = 0;
+ 			bbr->next_rtt_delivered = tp->delivered;
+ 		} else if (bbr->probe_rtt_done_stamp) {
+@@ -990,18 +945,20 @@ static void bbr_update_gains(struct sock *sk)
+ 
+ 	switch (bbr->mode) {
+ 	case BBR_STARTUP:
+-		bbr->pacing_gain = bbr_high_gain;
+-		bbr->cwnd_gain	 = bbr_high_gain;
++		bbr->pacing_gain = bbr_param(sk, startup_pacing_gain);
++		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);
+ 		break;
+ 	case BBR_DRAIN:
+-		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */
+-		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */
++		bbr->pacing_gain = bbr_param(sk, drain_gain);  /* slow, to drain */
++		bbr->cwnd_gain	 = bbr_param(sk, startup_cwnd_gain);  /* keep cwnd */
+ 		break;
+ 	case BBR_PROBE_BW:
+-		bbr->pacing_gain = (bbr->lt_use_bw ?
+-				    BBR_UNIT :
+-				    bbr_pacing_gain[bbr->cycle_idx]);
+-		bbr->cwnd_gain	 = bbr_cwnd_gain;
++		bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
++		bbr->cwnd_gain	 = bbr_param(sk, cwnd_gain);
++		if (bbr_param(sk, bw_probe_cwnd_gain) &&
++		    bbr->cycle_idx == BBR_BW_PROBE_UP)
++			bbr->cwnd_gain +=
++				BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4;
+ 		break;
+ 	case BBR_PROBE_RTT:
+ 		bbr->pacing_gain = BBR_UNIT;
+@@ -1013,144 +970,1387 @@ static void bbr_update_gains(struct sock *sk)
+ 	}
+ }
+ 
+-static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
++__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
+ {
+-	bbr_update_bw(sk, rs);
+-	bbr_update_ack_aggregation(sk, rs);
+-	bbr_update_cycle_phase(sk, rs);
+-	bbr_check_full_bw_reached(sk, rs);
+-	bbr_check_drain(sk, rs);
+-	bbr_update_min_rtt(sk, rs);
+-	bbr_update_gains(sk);
++	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
++	return 3;
+ }
+ 
+-__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
++/* Incorporate a new bw sample into the current window of our max filter. */
++static void bbr_take_max_bw_sample(struct sock *sk, u32 bw)
+ {
+ 	struct bbr *bbr = inet_csk_ca(sk);
+-	u32 bw;
+-
+-	bbr_update_model(sk, rs);
+ 
+-	bw = bbr_bw(sk);
+-	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
+-	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
++	bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]);
+ }
+ 
+-__bpf_kfunc static void bbr_init(struct sock *sk)
++/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */
++static void bbr_advance_max_bw_filter(struct sock *sk)
+ {
+-	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+ 
+-	bbr->prior_cwnd = 0;
+-	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+-	bbr->rtt_cnt = 0;
+-	bbr->next_rtt_delivered = tp->delivered;
+-	bbr->prev_ca_state = TCP_CA_Open;
+-	bbr->packet_conservation = 0;
+-
+-	bbr->probe_rtt_done_stamp = 0;
+-	bbr->probe_rtt_round_done = 0;
+-	bbr->min_rtt_us = tcp_min_rtt(tp);
+-	bbr->min_rtt_stamp = tcp_jiffies32;
+-
+-	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
++	if (!bbr->bw_hi[1])
++		return;  /* no samples in this window; remember old window */
++	bbr->bw_hi[0] = bbr->bw_hi[1];
++	bbr->bw_hi[1] = 0;
++}
+ 
+-	bbr->has_seen_rtt = 0;
+-	bbr_init_pacing_rate_from_rtt(sk);
++/* Reset the estimator for reaching full bandwidth based on bw plateau. */
++static void bbr_reset_full_bw(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
+ 
+-	bbr->round_start = 0;
+-	bbr->idle_restart = 0;
+-	bbr->full_bw_reached = 0;
+ 	bbr->full_bw = 0;
+ 	bbr->full_bw_cnt = 0;
+-	bbr->cycle_mstamp = 0;
+-	bbr->cycle_idx = 0;
+-	bbr_reset_lt_bw_sampling(sk);
+-	bbr_reset_startup_mode(sk);
++	bbr->full_bw_now = 0;
++}
+ 
+-	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+-	bbr->ack_epoch_acked = 0;
+-	bbr->extra_acked_win_rtts = 0;
+-	bbr->extra_acked_win_idx = 0;
+-	bbr->extra_acked[0] = 0;
+-	bbr->extra_acked[1] = 0;
++/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */
++static u32 bbr_target_inflight(struct sock *sk)
++{
++	u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT);
+ 
+-	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
++	return min(bdp, tcp_sk(sk)->snd_cwnd);
+ }
+ 
+-__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
++static bool bbr_is_probing_bandwidth(struct sock *sk)
+ {
+-	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
+-	return 3;
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	return (bbr->mode == BBR_STARTUP) ||
++		(bbr->mode == BBR_PROBE_BW &&
++		 (bbr->cycle_idx == BBR_BW_PROBE_REFILL ||
++		  bbr->cycle_idx == BBR_BW_PROBE_UP));
++}
++
++/* Has the given amount of time elapsed since we marked the phase start? */
++static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us)
++{
++	const struct tcp_sock *tp = tcp_sk(sk);
++	const struct bbr *bbr = inet_csk_ca(sk);
++
++	return tcp_stamp_us_delta(tp->tcp_mstamp,
++				  bbr->cycle_mstamp + interval_us) > 0;
++}
++
++static void bbr_handle_queue_too_high_in_startup(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 bdp;  /* estimated BDP in packets, with quantization budget */
++
++	bbr->full_bw_reached = 1;
++
++	bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
++	bbr->inflight_hi = max(bdp, bbr->inflight_latest);
++}
++
++/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */
++static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible ||
++	    !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh))
++		return;
++
++	if (ce_ratio >= bbr_param(sk, ecn_thresh))
++		bbr->startup_ecn_rounds++;
++	else
++		bbr->startup_ecn_rounds = 0;
++
++	if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) {
++		bbr_handle_queue_too_high_in_startup(sk);
++		return;
++	}
++}
++
++/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */
++static int bbr_update_ecn_alpha(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct net *net = sock_net(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	s32 delivered, delivered_ce;
++	u64 alpha, ce_ratio;
++	u32 gain;
++	bool want_ecn_alpha;
++
++	/* See if we should use ECN sender logic for this connection. */
++	if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) &&
++	    bbr_param(sk, ecn_factor) &&
++	    (bbr->min_rtt_us <= bbr_ecn_max_rtt_us ||
++	     !bbr_ecn_max_rtt_us))
++		bbr->ecn_eligible = 1;
++
++	/* Skip updating alpha only if not ECN-eligible and PLB is disabled. */
++	want_ecn_alpha = (bbr->ecn_eligible ||
++			  (bbr_can_use_ecn(sk) &&
++			   READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)));
++	if (!want_ecn_alpha)
++		return -1;
++
++	delivered = tp->delivered - bbr->alpha_last_delivered;
++	delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
++
++	if (delivered == 0 ||		/* avoid divide by zero */
++	    WARN_ON_ONCE(delivered < 0 || delivered_ce < 0))  /* backwards? */
++		return -1;
++
++	BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE);
++	ce_ratio = (u64)delivered_ce << BBR_SCALE;
++	do_div(ce_ratio, delivered);
++
++	gain = bbr_param(sk, ecn_alpha_gain);
++	alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE;
++	alpha += (gain * ce_ratio) >> BBR_SCALE;
++	bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT);
++
++	bbr->alpha_last_delivered = tp->delivered;
++	bbr->alpha_last_delivered_ce = tp->delivered_ce;
++
++	bbr_check_ecn_too_high_in_startup(sk, ce_ratio);
++	return (int)ce_ratio;
+ }
+ 
+-/* In theory BBR does not need to undo the cwnd since it does not
+- * always reduce cwnd on losses (see bbr_main()). Keep it for now.
++/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6
++ * flow label) if it encounters sustained congestion in the form of ECN marks.
+  */
+-__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
++static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (bbr->round_start && ce_ratio >= 0)
++		tcp_plb_update_state(sk, &bbr->plb, ce_ratio);
++
++	tcp_plb_check_rehash(sk, &bbr->plb);
++}
++
++/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
++static void bbr_raise_inflight_hi_slope(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 growth_this_round, cnt;
++
++	/* Calculate "slope": packets S/Acked per inflight_hi increment. */
++	growth_this_round = 1 << bbr->bw_probe_up_rounds;
++	bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30);
++	cnt = tcp_snd_cwnd(tp) / growth_this_round;
++	cnt = max(cnt, 1U);
++	bbr->bw_probe_up_cnt = cnt;
++}
++
++/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */
++static void bbr_probe_inflight_hi_upward(struct sock *sk,
++					  const struct rate_sample *rs)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 delta;
++
++	if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi)
++		return;  /* not fully using inflight_hi, so don't grow it */
++
++	/* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */
++	bbr->bw_probe_up_acks += rs->acked_sacked;
++	if (bbr->bw_probe_up_acks >=  bbr->bw_probe_up_cnt) {
++		delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt;
++		bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt;
++		bbr->inflight_hi += delta;
++		bbr->try_fast_path = 0;  /* Need to update cwnd */
++	}
++
++	if (bbr->round_start)
++		bbr_raise_inflight_hi_slope(sk);
++}
++
++/* Does loss/ECN rate for this sample say inflight is "too high"?
++ * This is used by both the bbr_check_loss_too_high_in_startup() function,
++ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which
++ * uses it to notice when loss/ECN rates suggest inflight is too high.
++ */
++static bool bbr_is_inflight_too_high(const struct sock *sk,
++				      const struct rate_sample *rs)
++{
++	const struct bbr *bbr = inet_csk_ca(sk);
++	u32 loss_thresh, ecn_thresh;
++
++	if (rs->lost > 0 && rs->tx_in_flight) {
++		loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >>
++				BBR_SCALE;
++		if (rs->lost > loss_thresh) {
++			return true;
++		}
++	}
++
++	if (rs->delivered_ce > 0 && rs->delivered > 0 &&
++	    bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) {
++		ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >>
++				BBR_SCALE;
++		if (rs->delivered_ce > ecn_thresh) {
++			return true;
++		}
++	}
++
++	return false;
++}
++
++/* Calculate the tx_in_flight level that corresponded to excessive loss.
++ * We find "lost_prefix" segs of the skb where loss rate went too high,
++ * by solving for "lost_prefix" in the following equation:
++ *   lost                     /  inflight                     >= loss_thresh
++ *  (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh
++ * Then we take that equation, convert it to fixed point, and
++ * round up to the nearest packet.
++ */
++static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk,
++					  const struct rate_sample *rs,
++					  const struct sk_buff *skb)
++{
++	const struct tcp_sock *tp = tcp_sk(sk);
++	u32 loss_thresh  = bbr_param(sk, loss_thresh);
++	u32 pcount, divisor, inflight_hi;
++	s32 inflight_prev, lost_prev;
++	u64 loss_budget, lost_prefix;
++
++	pcount = tcp_skb_pcount(skb);
++
++	/* How much data was in flight before this skb? */
++	inflight_prev = rs->tx_in_flight - pcount;
++	if (inflight_prev < 0) {
++		WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
++				  pcount,
++				  TCP_SKB_CB(skb)->sacked,
++				  rs->tx_in_flight),
++			  "tx_in_flight: %u pcount: %u reneg: %u",
++			  rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg);
++		return ~0U;
++	}
++
++	/* How much inflight data was marked lost before this skb? */
++	lost_prev = rs->lost - pcount;
++	if (WARN_ONCE(lost_prev < 0,
++		      "cwnd: %u ca: %d out: %u lost: %u pif: %u "
++		      "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d "
++		      "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u",
++		      tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state,
++		      tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp),
++		      rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost,
++		      rs->lost, lost_prev, pcount,
++		      TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
++		      tp->is_sack_reneg))
++		return ~0U;
++
++	/* At what prefix of this lost skb did losss rate exceed loss_thresh? */
++	loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1;
++	loss_budget >>= BBR_SCALE;
++	if (lost_prev >= loss_budget) {
++		lost_prefix = 0;   /* previous losses crossed loss_thresh */
++	} else {
++		lost_prefix = loss_budget - lost_prev;
++		lost_prefix <<= BBR_SCALE;
++		divisor = BBR_UNIT - loss_thresh;
++		if (WARN_ON_ONCE(!divisor))  /* loss_thresh is 8 bits */
++			return ~0U;
++		do_div(lost_prefix, divisor);
++	}
++
++	inflight_hi = inflight_prev + lost_prefix;
++	return inflight_hi;
++}
++
++/* If loss/ECN rates during probing indicated we may have overfilled a
++ * buffer, return an operating point that tries to leave unutilized headroom in
++ * the path for other flows, for fairness convergence and lower RTTs and loss.
++ */
++static u32 bbr_inflight_with_headroom(const struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 headroom, headroom_fraction;
++
++	if (bbr->inflight_hi == ~0U)
++		return ~0U;
++
++	headroom_fraction = bbr_param(sk, inflight_headroom);
++	headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE;
++	headroom = max(headroom, 1U);
++	return max_t(s32, bbr->inflight_hi - headroom,
++		     bbr_param(sk, cwnd_min_target));
++}
++
++/* Bound cwnd to a sensible level, based on our current probing state
++ * machine phase and model of a good inflight level (inflight_lo, inflight_hi).
++ */
++static void bbr_bound_cwnd_for_inflight_model(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 cap;
++
++	/* tcp_rcv_synsent_state_process() currently calls tcp_ack()
++	 * and thus cong_control() without first initializing us(!).
++	 */
++	if (!bbr->initialized)
++		return;
++
++	cap = ~0U;
++	if (bbr->mode == BBR_PROBE_BW &&
++	    bbr->cycle_idx != BBR_BW_PROBE_CRUISE) {
++		/* Probe to see if more packets fit in the path. */
++		cap = bbr->inflight_hi;
++	} else {
++		if (bbr->mode == BBR_PROBE_RTT ||
++		    (bbr->mode == BBR_PROBE_BW &&
++		     bbr->cycle_idx == BBR_BW_PROBE_CRUISE))
++			cap = bbr_inflight_with_headroom(sk);
++	}
++	/* Adapt to any loss/ECN since our last bw probe. */
++	cap = min(cap, bbr->inflight_lo);
++
++	cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target));
++	tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp)));
++}
++
++/* How should we multiplicatively cut bw or inflight limits based on ECN? */
++static u32 bbr_ecn_cut(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	return BBR_UNIT -
++		((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE);
++}
++
++/* Init lower bounds if have not inited yet. */
++static void bbr_init_lower_bounds(struct sock *sk, bool init_bw)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (init_bw && bbr->bw_lo == ~0U)
++		bbr->bw_lo = bbr_max_bw(sk);
++	if (bbr->inflight_lo == ~0U)
++		bbr->inflight_lo = tcp_snd_cwnd(tp);
++}
++
++/* Reduce bw and inflight to (1 - beta). */
++static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight)
++{
++	struct bbr* bbr = inet_csk_ca(sk);
++	u32 loss_cut = BBR_UNIT - bbr_param(sk, beta);
++
++	*bw = max_t(u32, bbr->bw_latest,
++		    (u64)bbr->bw_lo * loss_cut >> BBR_SCALE);
++	*inflight = max_t(u32, bbr->inflight_latest,
++			  (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE);
++}
++
++/* Reduce inflight to (1 - alpha*ecn_factor). */
++static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 ecn_cut = bbr_ecn_cut(sk);
++
++	*inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE;
++}
++
++/* Estimate a short-term lower bound on the capacity available now, based
++ * on measurements of the current delivery process and recent history. When we
++ * are seeing loss/ECN at times when we are not probing bw, then conservatively
++ * move toward flow balance by multiplicatively cutting our short-term
++ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a
++ * multiplicative decrease in order to converge to a lower capacity in time
++ * logarithmic in the magnitude of the decrease.
++ *
++ * However, we do not cut our short-term estimates lower than the current rate
++ * and volume of delivered data from this round trip, since from the current
++ * delivery process we can estimate the measured capacity available now.
++ *
++ * Anything faster than that approach would knowingly risk high loss, which can
++ * cause low bw for Reno/CUBIC and high loss recovery latency for
++ * request/response flows using any congestion control.
++ */
++static void bbr_adapt_lower_bounds(struct sock *sk,
++				    const struct rate_sample *rs)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 ecn_inflight_lo = ~0U;
++
++	/* We only use lower-bound estimates when not probing bw.
++	 * When probing we need to push inflight higher to probe bw.
++	 */
++	if (bbr_is_probing_bandwidth(sk))
++		return;
++
++	/* ECN response. */
++	if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) {
++		bbr_init_lower_bounds(sk, false);
++		bbr_ecn_lower_bounds(sk, &ecn_inflight_lo);
++	}
++
++	/* Loss response. */
++	if (bbr->loss_in_round) {
++		bbr_init_lower_bounds(sk, true);
++		bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo);
++	}
++
++	/* Adjust to the lower of the levels implied by loss/ECN. */
++	bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo);
++	bbr->bw_lo = max(1U, bbr->bw_lo);
++}
++
++/* Reset any short-term lower-bound adaptation to congestion, so that we can
++ * push our inflight up.
++ */
++static void bbr_reset_lower_bounds(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->bw_lo = ~0U;
++	bbr->inflight_lo = ~0U;
++}
++
++/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state
++ * machine phase where we adapt our lower bound based on congestion signals.
++ */
++static void bbr_reset_congestion_signals(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->loss_in_round = 0;
++	bbr->ecn_in_round = 0;
++	bbr->loss_in_cycle = 0;
++	bbr->ecn_in_cycle = 0;
++	bbr->bw_latest = 0;
++	bbr->inflight_latest = 0;
++}
++
++static void bbr_exit_loss_recovery(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
++	bbr->try_fast_path = 0; /* bound cwnd using latest model */
++}
++
++/* Update rate and volume of delivered data from latest round trip. */
++static void bbr_update_latest_delivery_signals(
++	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->loss_round_start = 0;
++	if (rs->interval_us <= 0 || !rs->acked_sacked)
++		return; /* Not a valid observation */
++
++	bbr->bw_latest       = max_t(u32, bbr->bw_latest,       ctx->sample_bw);
++	bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered);
++
++	if (!before(rs->prior_delivered, bbr->loss_round_delivered)) {
++		bbr->loss_round_delivered = tp->delivered;
++		bbr->loss_round_start = 1;  /* mark start of new round trip */
++	}
++}
++
++/* Once per round, reset filter for latest rate and volume of delivered data. */
++static void bbr_advance_latest_delivery_signals(
++	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	/* If ACK matches a TLP retransmit, persist the filter. If we detect
++	 * that a TLP retransmit plugged a tail loss, we'll want to remember
++	 * how much data the path delivered before the tail loss.
++	 */
++	if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) {
++		bbr->bw_latest = ctx->sample_bw;
++		bbr->inflight_latest = rs->delivered;
++	}
++}
++
++/* Update (most of) our congestion signals: track the recent rate and volume of
++ * delivered data, presence of loss, and EWMA degree of ECN marking.
++ */
++static void bbr_update_congestion_signals(
++	struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
+ {
+ 	struct bbr *bbr = inet_csk_ca(sk);
++	u64 bw;
++
++	if (rs->interval_us <= 0 || !rs->acked_sacked)
++		return; /* Not a valid observation */
++	bw = ctx->sample_bw;
+ 
+-	bbr->full_bw = 0;   /* spurious slow-down; reset full pipe detection */
++	if (!rs->is_app_limited || bw >= bbr_max_bw(sk))
++		bbr_take_max_bw_sample(sk, bw);
++
++	bbr->loss_in_round |= (rs->losses > 0);
++
++	if (!bbr->loss_round_start)
++		return;		/* skip the per-round-trip updates */
++	/* Now do per-round-trip updates. */
++	bbr_adapt_lower_bounds(sk, rs);
++
++	bbr->loss_in_round = 0;
++	bbr->ecn_in_round  = 0;
++}
++
++/* Bandwidth probing can cause loss. To help coexistence with loss-based
++ * congestion control we spread out our probing in a Reno-conscious way. Due to
++ * the shape of the Reno sawtooth, the time required between loss epochs for an
++ * idealized Reno flow is a number of round trips that is the BDP of that
++ * flow. We count packet-timed round trips directly, since measured RTT can
++ * vary widely, and Reno is driven by packet-timed round trips.
++ */
++static bool bbr_is_reno_coexistence_probe_time(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 rounds;
++
++	/* Random loss can shave some small percentage off of our inflight
++	 * in each round. To survive this, flows need robust periodic probes.
++	 */
++	rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk));
++	return bbr->rounds_since_probe >= rounds;
++}
++
++/* How long do we want to wait before probing for bandwidth (and risking
++ * loss)? We randomize the wait, for better mixing and fairness convergence.
++ *
++ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips.
++ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow,
++ * (eg 4K video to a broadband user):
++ *   BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
++ *
++ * We bound the BBR-native inter-bw-probe wall clock time to be:
++ *  (a) higher than 2 sec: to try to avoid causing loss for a long enough time
++ *      to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must
++ *      be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs
++ *  (b) lower than 3 sec: to ensure flows can start probing in a reasonable
++ *      amount of time to discover unutilized bw on human-scale interactive
++ *      time-scales (e.g. perhaps traffic from a web page download that we
++ *      were competing with is now complete).
++ */
++static void bbr_pick_probe_wait(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	/* Decide the random round-trip bound for wait until probe: */
++	bbr->rounds_since_probe =
++		get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds));
++	/* Decide the random wall clock bound for wait until probe: */
++	bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) +
++			     get_random_u32_below(bbr_param(sk, bw_probe_rand_us));
++}
++
++static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->cycle_idx = cycle_idx;
++	/* New phase, so need to update cwnd and pacing rate. */
++	bbr->try_fast_path = 0;
++}
++
++/* Send at estimated bw to fill the pipe, but not queue. We need this phase
++ * before PROBE_UP, because as soon as we send faster than the available bw
++ * we will start building a queue, and if the buffer is shallow we can cause
++ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and
++ * inflight_hi estimates will underestimate.
++ */
++static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr_reset_lower_bounds(sk);
++	bbr->bw_probe_up_rounds = bw_probe_up_rounds;
++	bbr->bw_probe_up_acks = 0;
++	bbr->stopped_risky_probe = 0;
++	bbr->ack_phase = BBR_ACKS_REFILLING;
++	bbr->next_rtt_delivered = tp->delivered;
++	bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL);
++}
++
++/* Now probe max deliverable data rate and volume. */
++static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->ack_phase = BBR_ACKS_PROBE_STARTING;
++	bbr->next_rtt_delivered = tp->delivered;
++	bbr->cycle_mstamp = tp->tcp_mstamp;
++	bbr_reset_full_bw(sk);
++	bbr->full_bw = ctx->sample_bw;
++	bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP);
++	bbr_raise_inflight_hi_slope(sk);
++}
++
++/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall
++ * clock time at which to probe beyond an inflight that we think to be
++ * safe. This will knowingly risk packet loss, so we want to do this rarely, to
++ * keep packet loss rates low. Also start a round-trip counter, to probe faster
++ * if we estimate a Reno flow at our BDP would probe faster.
++ */
++static void bbr_start_bw_probe_down(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr_reset_congestion_signals(sk);
++	bbr->bw_probe_up_cnt = ~0U;     /* not growing inflight_hi any more */
++	bbr_pick_probe_wait(sk);
++	bbr->cycle_mstamp = tp->tcp_mstamp;		/* start wall clock */
++	bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
++	bbr->next_rtt_delivered = tp->delivered;
++	bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN);
++}
++
++/* Cruise: maintain what we estimate to be a neutral, conservative
++ * operating point, without attempting to probe up for bandwidth or down for
++ * RTT, and only reducing inflight in response to loss/ECN signals.
++ */
++static void bbr_start_bw_probe_cruise(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (bbr->inflight_lo != ~0U)
++		bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi);
++
++	bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE);
++}
++
++/* Loss and/or ECN rate is too high while probing.
++ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle.
++ */
++static void bbr_handle_inflight_too_high(struct sock *sk,
++					  const struct rate_sample *rs)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	const u32 beta = bbr_param(sk, beta);
++
++	bbr->prev_probe_too_high = 1;
++	bbr->bw_probe_samples = 0;  /* only react once per probe */
++	/* If we are app-limited then we are not robustly
++	 * probing the max volume of inflight data we think
++	 * might be safe (analogous to how app-limited bw
++	 * samples are not known to be robustly probing bw).
++	 */
++	if (!rs->is_app_limited) {
++		bbr->inflight_hi = max_t(u32, rs->tx_in_flight,
++					 (u64)bbr_target_inflight(sk) *
++					 (BBR_UNIT - beta) >> BBR_SCALE);
++	}
++	if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
++		bbr_start_bw_probe_down(sk);
++}
++
++/* If we're seeing bw and loss samples reflecting our bw probing, adapt
++ * using the signals we see. If loss or ECN mark rate gets too high, then adapt
++ * inflight_hi downward. If we're able to push inflight higher without such
++ * signals, push higher: adapt inflight_hi upward.
++ */
++static bool bbr_adapt_upper_bounds(struct sock *sk,
++				    const struct rate_sample *rs,
++				    struct bbr_context *ctx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	/* Track when we'll see bw/loss samples resulting from our bw probes. */
++	if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start)
++		bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK;
++	if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) {
++		/* End of samples from bw probing phase. */
++		bbr->bw_probe_samples = 0;
++		bbr->ack_phase = BBR_ACKS_INIT;
++		/* At this point in the cycle, our current bw sample is also
++		 * our best recent chance at finding the highest available bw
++		 * for this flow. So now is the best time to forget the bw
++		 * samples from the previous cycle, by advancing the window.
++		 */
++		if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited)
++			bbr_advance_max_bw_filter(sk);
++		/* If we had an inflight_hi, then probed and pushed inflight all
++		 * the way up to hit that inflight_hi without seeing any
++		 * high loss/ECN in all the resulting ACKs from that probing,
++		 * then probe up again, this time letting inflight persist at
++		 * inflight_hi for a round trip, then accelerating beyond.
++		 */
++		if (bbr->mode == BBR_PROBE_BW &&
++		    bbr->stopped_risky_probe && !bbr->prev_probe_too_high) {
++			bbr_start_bw_probe_refill(sk, 0);
++			return true;  /* yes, decided state transition */
++		}
++	}
++	if (bbr_is_inflight_too_high(sk, rs)) {
++		if (bbr->bw_probe_samples)  /*  sample is from bw probing? */
++			bbr_handle_inflight_too_high(sk, rs);
++	} else {
++		/* Loss/ECN rate is declared safe. Adjust upper bound upward. */
++
++		if (bbr->inflight_hi == ~0U)
++			return false;   /* no excess queue signals yet */
++
++		/* To be resilient to random loss, we must raise bw/inflight_hi
++		 * if we observe in any phase that a higher level is safe.
++		 */
++		if (rs->tx_in_flight > bbr->inflight_hi) {
++			bbr->inflight_hi = rs->tx_in_flight;
++		}
++
++		if (bbr->mode == BBR_PROBE_BW &&
++		    bbr->cycle_idx == BBR_BW_PROBE_UP)
++			bbr_probe_inflight_hi_upward(sk, rs);
++	}
++
++	return false;
++}
++
++/* Check if it's time to probe for bandwidth now, and if so, kick it off. */
++static bool bbr_check_time_to_probe_bw(struct sock *sk,
++					const struct rate_sample *rs)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 n;
++
++	/* If we seem to be at an operating point where we are not seeing loss
++	 * but we are seeing ECN marks, then when the ECN marks cease we reprobe
++	 * quickly (in case cross-traffic has ceased and freed up bw).
++	 */
++	if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible &&
++	    bbr->ecn_in_cycle && !bbr->loss_in_cycle &&
++	    inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
++		/* Calculate n so that when bbr_raise_inflight_hi_slope()
++		 * computes growth_this_round as 2^n it will be roughly the
++		 * desired volume of data (inflight_hi*ecn_reprobe_gain).
++		 */
++		n = ilog2((((u64)bbr->inflight_hi *
++			    bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE));
++		bbr_start_bw_probe_refill(sk, n);
++		return true;
++	}
++
++	if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) ||
++	    bbr_is_reno_coexistence_probe_time(sk)) {
++		bbr_start_bw_probe_refill(sk, 0);
++		return true;
++	}
++	return false;
++}
++
++/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */
++static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw)
++{
++	/* Always need to pull inflight down to leave headroom in queue. */
++	if (inflight > bbr_inflight_with_headroom(sk))
++		return false;
++
++	return inflight <= bbr_inflight(sk, bw, BBR_UNIT);
++}
++
++/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */
++static void bbr_update_cycle_phase(struct sock *sk,
++				    const struct rate_sample *rs,
++				    struct bbr_context *ctx)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	bool is_bw_probe_done = false;
++	u32 inflight, bw;
++
++	if (!bbr_full_bw_reached(sk))
++		return;
++
++	/* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */
++	if (bbr_adapt_upper_bounds(sk, rs, ctx))
++		return;		/* already decided state transition */
++
++	if (bbr->mode != BBR_PROBE_BW)
++		return;
++
++	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
++	bw = bbr_max_bw(sk);
++
++	switch (bbr->cycle_idx) {
++	/* First we spend most of our time cruising with a pacing_gain of 1.0,
++	 * which paces at the estimated bw, to try to fully use the pipe
++	 * without building queue. If we encounter loss/ECN marks, we adapt
++	 * by slowing down.
++	 */
++	case BBR_BW_PROBE_CRUISE:
++		if (bbr_check_time_to_probe_bw(sk, rs))
++			return;		/* already decided state transition */
++		break;
++
++	/* After cruising, when it's time to probe, we first "refill": we send
++	 * at the estimated bw to fill the pipe, before probing higher and
++	 * knowingly risking overflowing the bottleneck buffer (causing loss).
++	 */
++	case BBR_BW_PROBE_REFILL:
++		if (bbr->round_start) {
++			/* After one full round trip of sending in REFILL, we
++			 * start to see bw samples reflecting our REFILL, which
++			 * may be putting too much data in flight.
++			 */
++			bbr->bw_probe_samples = 1;
++			bbr_start_bw_probe_up(sk, ctx);
++		}
++		break;
++
++	/* After we refill the pipe, we probe by using a pacing_gain > 1.0, to
++	 * probe for bw. If we have not seen loss/ECN, we try to raise inflight
++	 * to at least pacing_gain*BDP; note that this may take more than
++	 * min_rtt if min_rtt is small (e.g. on a LAN).
++	 *
++	 * We terminate PROBE_UP bandwidth probing upon any of the following:
++	 *
++	 * (1) We've pushed inflight up to hit the inflight_hi target set in the
++	 *     most recent previous bw probe phase. Thus we want to start
++	 *     draining the queue immediately because it's very likely the most
++	 *     recently sent packets will fill the queue and cause drops.
++	 * (2) If inflight_hi has not limited bandwidth growth recently, and
++	 *     yet delivered bandwidth has not increased much recently
++	 *     (bbr->full_bw_now).
++	 * (3) Loss filter says loss rate is "too high".
++	 * (4) ECN filter says ECN mark rate is "too high".
++	 *
++	 * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high()
++	 */
++	case BBR_BW_PROBE_UP:
++		if (bbr->prev_probe_too_high &&
++		    inflight >= bbr->inflight_hi) {
++			bbr->stopped_risky_probe = 1;
++			is_bw_probe_done = true;
++		} else {
++			if (tp->is_cwnd_limited &&
++			    tcp_snd_cwnd(tp) >= bbr->inflight_hi) {
++				/* inflight_hi is limiting bw growth */
++				bbr_reset_full_bw(sk);
++				bbr->full_bw = ctx->sample_bw;
++			} else if (bbr->full_bw_now) {
++				/* Plateau in estimated bw. Pipe looks full. */
++				is_bw_probe_done = true;
++			}
++		}
++		if (is_bw_probe_done) {
++			bbr->prev_probe_too_high = 0;  /* no loss/ECN (yet) */
++			bbr_start_bw_probe_down(sk);  /* restart w/ down */
++		}
++		break;
++
++	/* After probing in PROBE_UP, we have usually accumulated some data in
++	 * the bottleneck buffer (if bw probing didn't find more bw). We next
++	 * enter PROBE_DOWN to try to drain any excess data from the queue. To
++	 * do this, we use a pacing_gain < 1.0. We hold this pacing gain until
++	 * our inflight is less then that target cruising point, which is the
++	 * minimum of (a) the amount needed to leave headroom, and (b) the
++	 * estimated BDP. Once inflight falls to match the target, we estimate
++	 * the queue is drained; persisting would underutilize the pipe.
++	 */
++	case BBR_BW_PROBE_DOWN:
++		if (bbr_check_time_to_probe_bw(sk, rs))
++			return;		/* already decided state transition */
++		if (bbr_check_time_to_cruise(sk, inflight, bw))
++			bbr_start_bw_probe_cruise(sk);
++		break;
++
++	default:
++		WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx);
++	}
++}
++
++/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */
++static void bbr_exit_probe_rtt(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr_reset_lower_bounds(sk);
++	if (bbr_full_bw_reached(sk)) {
++		bbr->mode = BBR_PROBE_BW;
++		/* Raising inflight after PROBE_RTT may cause loss, so reset
++		 * the PROBE_BW clock and schedule the next bandwidth probe for
++		 * a friendly and randomized future point in time.
++		 */
++		bbr_start_bw_probe_down(sk);
++		/* Since we are exiting PROBE_RTT, we know inflight is
++		 * below our estimated BDP, so it is reasonable to cruise.
++		 */
++		bbr_start_bw_probe_cruise(sk);
++	} else {
++		bbr->mode = BBR_STARTUP;
++	}
++}
++
++/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until
++ * the end of the round in recovery to get a good estimate of how many packets
++ * have been lost, and how many we need to drain with a low pacing rate.
++ */
++static void bbr_check_loss_too_high_in_startup(struct sock *sk,
++						const struct rate_sample *rs)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (bbr_full_bw_reached(sk))
++		return;
++
++	/* For STARTUP exit, check the loss rate at the end of each round trip
++	 * of Recovery episodes in STARTUP. We check the loss rate at the end
++	 * of the round trip to filter out noisy/low loss and have a better
++	 * sense of inflight (extent of loss), so we can drain more accurately.
++	 */
++	if (rs->losses && bbr->loss_events_in_round < 0xf)
++		bbr->loss_events_in_round++;  /* update saturating counter */
++	if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start &&
++	    inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery &&
++	    bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) &&
++	    bbr_is_inflight_too_high(sk, rs)) {
++		bbr_handle_queue_too_high_in_startup(sk);
++		return;
++	}
++	if (bbr->loss_round_start)
++		bbr->loss_events_in_round = 0;
++}
++
++/* Estimate when the pipe is full, using the change in delivery rate: BBR
++ * estimates bw probing filled the pipe if the estimated bw hasn't changed by
++ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
++ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
++ * higher rwin, 3: we get higher delivery rate samples. Or transient
++ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
++ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
++ */
++static void bbr_check_full_bw_reached(struct sock *sk,
++				       const struct rate_sample *rs,
++				       struct bbr_context *ctx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 bw_thresh, full_cnt, thresh;
++
++	if (bbr->full_bw_now || rs->is_app_limited)
++		return;
++
++	thresh = bbr_param(sk, full_bw_thresh);
++	full_cnt = bbr_param(sk, full_bw_cnt);
++	bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE;
++	if (ctx->sample_bw >= bw_thresh) {
++		bbr_reset_full_bw(sk);
++		bbr->full_bw = ctx->sample_bw;
++		return;
++	}
++	if (!bbr->round_start)
++		return;
++	++bbr->full_bw_cnt;
++	bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt;
++	bbr->full_bw_reached |= bbr->full_bw_now;
++}
++
++/* If pipe is probably full, drain the queue and then enter steady-state. */
++static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs,
++			    struct bbr_context *ctx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
++		bbr->mode = BBR_DRAIN;	/* drain queue we created */
++		/* Set ssthresh to export purely for monitoring, to signal
++		 * completion of initial STARTUP by setting to a non-
++		 * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR).
++		 */
++		tcp_sk(sk)->snd_ssthresh =
++				bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
++		bbr_reset_congestion_signals(sk);
++	}	/* fall through to check if in-flight is already small: */
++	if (bbr->mode == BBR_DRAIN &&
++	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
++	    bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) {
++		bbr->mode = BBR_PROBE_BW;
++		bbr_start_bw_probe_down(sk);
++	}
++}
++
++static void bbr_update_model(struct sock *sk, const struct rate_sample *rs,
++			      struct bbr_context *ctx)
++{
++	bbr_update_congestion_signals(sk, rs, ctx);
++	bbr_update_ack_aggregation(sk, rs);
++	bbr_check_loss_too_high_in_startup(sk, rs);
++	bbr_check_full_bw_reached(sk, rs, ctx);
++	bbr_check_drain(sk, rs, ctx);
++	bbr_update_cycle_phase(sk, rs, ctx);
++	bbr_update_min_rtt(sk, rs);
++}
++
++/* Fast path for app-limited case.
++ *
++ * On each ack, we execute bbr state machine, which primarily consists of:
++ * 1) update model based on new rate sample, and
++ * 2) update control based on updated model or state change.
++ *
++ * There are certain workload/scenarios, e.g. app-limited case, where
++ * either we can skip updating model or we can skip update of both model
++ * as well as control. This provides signifcant softirq cpu savings for
++ * processing incoming acks.
++ *
++ * In case of app-limited, if there is no congestion (loss/ecn) and
++ * if observed bw sample is less than current estimated bw, then we can
++ * skip some of the computation in bbr state processing:
++ *
++ * - if there is no rtt/mode/phase change: In this case, since all the
++ *   parameters of the network model are constant, we can skip model
++ *   as well control update.
++ *
++ * - else we can skip rest of the model update. But we still need to
++ *   update the control to account for the new rtt/mode/phase.
++ *
++ * Returns whether we can take fast path or not.
++ */
++static bool bbr_run_fast_path(struct sock *sk, bool *update_model,
++		const struct rate_sample *rs, struct bbr_context *ctx)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++	u32 prev_min_rtt_us, prev_mode;
++
++	if (bbr_param(sk, fast_path) && bbr->try_fast_path &&
++	    rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) &&
++	    !bbr->loss_in_round && !bbr->ecn_in_round ) {
++		prev_mode = bbr->mode;
++		prev_min_rtt_us = bbr->min_rtt_us;
++		bbr_check_drain(sk, rs, ctx);
++		bbr_update_cycle_phase(sk, rs, ctx);
++		bbr_update_min_rtt(sk, rs);
++
++		if (bbr->mode == prev_mode &&
++		    bbr->min_rtt_us == prev_min_rtt_us &&
++		    bbr->try_fast_path) {
++			return true;
++		}
++
++		/* Skip model update, but control still needs to be updated */
++		*update_model = false;
++	}
++	return false;
++}
++
++__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	struct bbr_context ctx = { 0 };
++	bool update_model = true;
++	u32 bw, round_delivered;
++	int ce_ratio = -1;
++
++	round_delivered = bbr_update_round_start(sk, rs, &ctx);
++	if (bbr->round_start) {
++		bbr->rounds_since_probe =
++			min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
++		ce_ratio = bbr_update_ecn_alpha(sk);
++	}
++	bbr_plb(sk, rs, ce_ratio);
++
++	bbr->ecn_in_round  |= (bbr->ecn_eligible && rs->is_ece);
++	bbr_calculate_bw_sample(sk, rs, &ctx);
++	bbr_update_latest_delivery_signals(sk, rs, &ctx);
++
++	if (bbr_run_fast_path(sk, &update_model, rs, &ctx))
++		goto out;
++
++	if (update_model)
++		bbr_update_model(sk, rs, &ctx);
++
++	bbr_update_gains(sk);
++	bw = bbr_bw(sk);
++	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
++	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain,
++		     tcp_snd_cwnd(tp), &ctx);
++	bbr_bound_cwnd_for_inflight_model(sk);
++
++out:
++	bbr_advance_latest_delivery_signals(sk, rs, &ctx);
++	bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state;
++	bbr->loss_in_cycle |= rs->lost > 0;
++	bbr->ecn_in_cycle  |= rs->delivered_ce > 0;
++}
++
++__bpf_kfunc static void bbr_init(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr->initialized = 1;
++
++	bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp));
++	bbr->prior_cwnd = tp->prior_cwnd;
++	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
++	bbr->next_rtt_delivered = tp->delivered;
++	bbr->prev_ca_state = TCP_CA_Open;
++
++	bbr->probe_rtt_done_stamp = 0;
++	bbr->probe_rtt_round_done = 0;
++	bbr->probe_rtt_min_us = tcp_min_rtt(tp);
++	bbr->probe_rtt_min_stamp = tcp_jiffies32;
++	bbr->min_rtt_us = tcp_min_rtt(tp);
++	bbr->min_rtt_stamp = tcp_jiffies32;
++
++	bbr->has_seen_rtt = 0;
++	bbr_init_pacing_rate_from_rtt(sk);
++
++	bbr->round_start = 0;
++	bbr->idle_restart = 0;
++	bbr->full_bw_reached = 0;
++	bbr->full_bw = 0;
+ 	bbr->full_bw_cnt = 0;
+-	bbr_reset_lt_bw_sampling(sk);
+-	return tcp_snd_cwnd(tcp_sk(sk));
++	bbr->cycle_mstamp = 0;
++	bbr->cycle_idx = 0;
++
++	bbr_reset_startup_mode(sk);
++
++	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
++	bbr->ack_epoch_acked = 0;
++	bbr->extra_acked_win_rtts = 0;
++	bbr->extra_acked_win_idx = 0;
++	bbr->extra_acked[0] = 0;
++	bbr->extra_acked[1] = 0;
++
++	bbr->ce_state = 0;
++	bbr->prior_rcv_nxt = tp->rcv_nxt;
++	bbr->try_fast_path = 0;
++
++	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
++
++	/* Start sampling ECN mark rate after first full flight is ACKed: */
++	bbr->loss_round_delivered = tp->delivered + 1;
++	bbr->loss_round_start = 0;
++	bbr->undo_bw_lo = 0;
++	bbr->undo_inflight_lo = 0;
++	bbr->undo_inflight_hi = 0;
++	bbr->loss_events_in_round = 0;
++	bbr->startup_ecn_rounds = 0;
++	bbr_reset_congestion_signals(sk);
++	bbr->bw_lo = ~0U;
++	bbr->bw_hi[0] = 0;
++	bbr->bw_hi[1] = 0;
++	bbr->inflight_lo = ~0U;
++	bbr->inflight_hi = ~0U;
++	bbr_reset_full_bw(sk);
++	bbr->bw_probe_up_cnt = ~0U;
++	bbr->bw_probe_up_acks = 0;
++	bbr->bw_probe_up_rounds = 0;
++	bbr->probe_wait_us = 0;
++	bbr->stopped_risky_probe = 0;
++	bbr->ack_phase = BBR_ACKS_INIT;
++	bbr->rounds_since_probe = 0;
++	bbr->bw_probe_samples = 0;
++	bbr->prev_probe_too_high = 0;
++	bbr->ecn_eligible = 0;
++	bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init);
++	bbr->alpha_last_delivered = 0;
++	bbr->alpha_last_delivered_ce = 0;
++	bbr->plb.pause_until = 0;
++
++	tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
++
++	if (bbr_can_use_ecn(sk))
++		tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
++}
++
++/* BBR marks the current round trip as a loss round. */
++static void bbr_note_loss(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	/* Capture "current" data over the full round trip of loss, to
++	 * have a better chance of observing the full capacity of the path.
++	 */
++	if (!bbr->loss_in_round)  /* first loss in this round trip? */
++		bbr->loss_round_delivered = tp->delivered;  /* set round trip */
++	bbr->loss_in_round = 1;
++	bbr->loss_in_cycle = 1;
+ }
+ 
+-/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
++/* Core TCP stack informs us that the given skb was just marked lost. */
++__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk,
++					    const struct sk_buff *skb)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
++	struct rate_sample rs = {};
++
++	bbr_note_loss(sk);
++
++	if (!bbr->bw_probe_samples)
++		return;  /* not an skb sent while probing for bandwidth */
++	if (unlikely(!scb->tx.delivered_mstamp))
++		return;  /* skb was SACKed, reneged, marked lost; ignore it */
++	/* We are probing for bandwidth. Construct a rate sample that
++	 * estimates what happened in the flight leading up to this lost skb,
++	 * then see if the loss rate went too high, and if so at which packet.
++	 */
++	rs.tx_in_flight = scb->tx.in_flight;
++	rs.lost = tp->lost - scb->tx.lost;
++	rs.is_app_limited = scb->tx.is_app_limited;
++	if (bbr_is_inflight_too_high(sk, &rs)) {
++		rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb);
++		bbr_handle_inflight_too_high(sk, &rs);
++	}
++}
++
++static void bbr_run_loss_probe_recovery(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	struct bbr *bbr = inet_csk_ca(sk);
++	struct rate_sample rs = {0};
++
++	bbr_note_loss(sk);
++
++	if (!bbr->bw_probe_samples)
++		return;  /* not sent while probing for bandwidth */
++	/* We are probing for bandwidth. Construct a rate sample that
++	 * estimates what happened in the flight leading up to this
++	 * loss, then see if the loss rate went too high.
++	 */
++	rs.lost = 1;	/* TLP probe repaired loss of a single segment */
++	rs.tx_in_flight = bbr->inflight_latest + rs.lost;
++	rs.is_app_limited = tp->tlp_orig_data_app_limited;
++	if (bbr_is_inflight_too_high(sk, &rs))
++		bbr_handle_inflight_too_high(sk, &rs);
++}
++
++/* Revert short-term model if current loss recovery event was spurious. */
++__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
++{
++	struct bbr *bbr = inet_csk_ca(sk);
++
++	bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */
++	bbr->loss_in_round = 0;
++
++	/* Revert to cwnd and other state saved before loss episode. */
++	bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo);
++	bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo);
++	bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi);
++	bbr->try_fast_path = 0;  /* take slow path to set proper cwnd, pacing */
++	return bbr->prior_cwnd;
++}
++
++/* Entering loss recovery, so save state for when we undo recovery. */
+ __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
+ {
++	struct bbr *bbr = inet_csk_ca(sk);
++
+ 	bbr_save_cwnd(sk);
++	/* For undo, save state that adapts based on loss signal. */
++	bbr->undo_bw_lo		= bbr->bw_lo;
++	bbr->undo_inflight_lo	= bbr->inflight_lo;
++	bbr->undo_inflight_hi	= bbr->inflight_hi;
+ 	return tcp_sk(sk)->snd_ssthresh;
+ }
+ 
++static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr)
++{
++	switch (bbr->mode) {
++	case BBR_STARTUP:
++		return BBR_PHASE_STARTUP;
++	case BBR_DRAIN:
++		return BBR_PHASE_DRAIN;
++	case BBR_PROBE_BW:
++		break;
++	case BBR_PROBE_RTT:
++		return BBR_PHASE_PROBE_RTT;
++	default:
++		return BBR_PHASE_INVALID;
++	}
++	switch (bbr->cycle_idx) {
++	case BBR_BW_PROBE_UP:
++		return BBR_PHASE_PROBE_BW_UP;
++	case BBR_BW_PROBE_DOWN:
++		return BBR_PHASE_PROBE_BW_DOWN;
++	case BBR_BW_PROBE_CRUISE:
++		return BBR_PHASE_PROBE_BW_CRUISE;
++	case BBR_BW_PROBE_REFILL:
++		return BBR_PHASE_PROBE_BW_REFILL;
++	default:
++		return BBR_PHASE_INVALID;
++	}
++}
++
+ static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
+-			   union tcp_cc_info *info)
++			    union tcp_cc_info *info)
+ {
+ 	if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
+ 	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+-		struct tcp_sock *tp = tcp_sk(sk);
+ 		struct bbr *bbr = inet_csk_ca(sk);
+-		u64 bw = bbr_bw(sk);
+-
+-		bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
+-		memset(&info->bbr, 0, sizeof(info->bbr));
+-		info->bbr.bbr_bw_lo		= (u32)bw;
+-		info->bbr.bbr_bw_hi		= (u32)(bw >> 32);
+-		info->bbr.bbr_min_rtt		= bbr->min_rtt_us;
+-		info->bbr.bbr_pacing_gain	= bbr->pacing_gain;
+-		info->bbr.bbr_cwnd_gain		= bbr->cwnd_gain;
++		u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk));
++		u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk));
++		u64 bw_lo = bbr->bw_lo == ~0U ?
++			~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo);
++		struct tcp_bbr_info *bbr_info = &info->bbr;
++
++		memset(bbr_info, 0, sizeof(*bbr_info));
++		bbr_info->bbr_bw_lo		= (u32)bw;
++		bbr_info->bbr_bw_hi		= (u32)(bw >> 32);
++		bbr_info->bbr_min_rtt		= bbr->min_rtt_us;
++		bbr_info->bbr_pacing_gain	= bbr->pacing_gain;
++		bbr_info->bbr_cwnd_gain		= bbr->cwnd_gain;
++		bbr_info->bbr_bw_hi_lsb		= (u32)bw_hi;
++		bbr_info->bbr_bw_hi_msb		= (u32)(bw_hi >> 32);
++		bbr_info->bbr_bw_lo_lsb		= (u32)bw_lo;
++		bbr_info->bbr_bw_lo_msb		= (u32)(bw_lo >> 32);
++		bbr_info->bbr_mode		= bbr->mode;
++		bbr_info->bbr_phase		= (__u8)bbr_get_phase(bbr);
++		bbr_info->bbr_version		= (__u8)BBR_VERSION;
++		bbr_info->bbr_inflight_lo	= bbr->inflight_lo;
++		bbr_info->bbr_inflight_hi	= bbr->inflight_hi;
++		bbr_info->bbr_extra_acked	= bbr_extra_acked(sk);
+ 		*attr = INET_DIAG_BBRINFO;
+-		return sizeof(info->bbr);
++		return sizeof(*bbr_info);
+ 	}
+ 	return 0;
+ }
+ 
+ __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
+ {
++	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct bbr *bbr = inet_csk_ca(sk);
+ 
+ 	if (new_state == TCP_CA_Loss) {
+-		struct rate_sample rs = { .losses = 1 };
+ 
+ 		bbr->prev_ca_state = TCP_CA_Loss;
+-		bbr->full_bw = 0;
+-		bbr->round_start = 1;	/* treat RTO like end of a round */
+-		bbr_lt_bw_sampling(sk, &rs);
++		tcp_plb_update_state_upon_rto(sk, &bbr->plb);
++		/* The tcp_write_timeout() call to sk_rethink_txhash() likely
++		 * repathed this flow, so re-learn the min network RTT on the
++		 * new path:
++		 */
++		bbr_reset_full_bw(sk);
++		if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
++			/* bbr_adapt_lower_bounds() needs cwnd before
++			 * we suffered an RTO, to update inflight_lo:
++			 */
++			bbr->inflight_lo =
++				max(tcp_snd_cwnd(tp), bbr->prior_cwnd);
++		}
++	} else if (bbr->prev_ca_state == TCP_CA_Loss &&
++		   new_state != TCP_CA_Loss) {
++		bbr_exit_loss_recovery(sk);
+ 	}
+ }
+ 
++
+ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
+-	.flags		= TCP_CONG_NON_RESTRICTED,
++	.flags		= TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS,
+ 	.name		= "bbr",
+ 	.owner		= THIS_MODULE,
+ 	.init		= bbr_init,
+ 	.cong_control	= bbr_main,
+ 	.sndbuf_expand	= bbr_sndbuf_expand,
++	.skb_marked_lost = bbr_skb_marked_lost,
+ 	.undo_cwnd	= bbr_undo_cwnd,
+ 	.cwnd_event	= bbr_cwnd_event,
+ 	.ssthresh	= bbr_ssthresh,
+-	.min_tso_segs	= bbr_min_tso_segs,
++	.tso_segs	= bbr_tso_segs,
+ 	.get_info	= bbr_get_info,
+ 	.set_state	= bbr_set_state,
+ };
+@@ -1159,10 +2359,11 @@ BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
+ BTF_ID_FLAGS(func, bbr_init)
+ BTF_ID_FLAGS(func, bbr_main)
+ BTF_ID_FLAGS(func, bbr_sndbuf_expand)
++BTF_ID_FLAGS(func, bbr_skb_marked_lost)
+ BTF_ID_FLAGS(func, bbr_undo_cwnd)
+ BTF_ID_FLAGS(func, bbr_cwnd_event)
+ BTF_ID_FLAGS(func, bbr_ssthresh)
+-BTF_ID_FLAGS(func, bbr_min_tso_segs)
++BTF_ID_FLAGS(func, bbr_tso_segs)
+ BTF_ID_FLAGS(func, bbr_set_state)
+ BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
+ 
+@@ -1195,5 +2396,12 @@ MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
+ MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
+ MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
+ MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
++MODULE_AUTHOR("Priyaranjan Jha <priyarjha@google.com>");
++MODULE_AUTHOR("Yousuk Seung <ysseung@google.com>");
++MODULE_AUTHOR("Kevin Yang <yyd@google.com>");
++MODULE_AUTHOR("Arjun Roy <arjunroy@google.com>");
++MODULE_AUTHOR("David Morley <morleyd@google.com>");
++
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
++MODULE_VERSION(__stringify(BBR_VERSION));
+diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
+index 0306d257fa64..28f581c0dab7 100644
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct sock *sk)
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+ 	tcp_sk(sk)->prior_ssthresh = 0;
++	tcp_sk(sk)->fast_ack_mode = 0;
+ 	if (icsk->icsk_ca_ops->init)
+ 		icsk->icsk_ca_ops->init(sk);
+ 	if (tcp_ca_needs_ecn(sk))
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 889db23bfc05..b924a852f108 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ 			tcp_enter_quickack_mode(sk, 2);
+ 		break;
+ 	case INET_ECN_CE:
+-		if (tcp_ca_needs_ecn(sk))
++		if (tcp_ca_wants_ce_events(sk))
+ 			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
+ 
+ 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+@@ -381,7 +381,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ 		tp->ecn_flags |= TCP_ECN_SEEN;
+ 		break;
+ 	default:
+-		if (tcp_ca_needs_ecn(sk))
++		if (tcp_ca_wants_ce_events(sk))
+ 			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
+ 		tp->ecn_flags |= TCP_ECN_SEEN;
+ 		break;
+@@ -1120,7 +1120,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
+  */
+ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
+ {
++	struct sock *sk = (struct sock *)tp;
++	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
++
+ 	tp->lost += tcp_skb_pcount(skb);
++	if (ca_ops->skb_marked_lost)
++		ca_ops->skb_marked_lost(sk, skb);
+ }
+ 
+ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
+@@ -1501,6 +1506,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ 	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
+ 	tcp_skb_pcount_add(skb, -pcount);
+ 
++	/* Adjust tx.in_flight as pcount is shifted from skb to prev. */
++	if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
++		      "prev in_flight: %u skb in_flight: %u pcount: %u",
++		      TCP_SKB_CB(prev)->tx.in_flight,
++		      TCP_SKB_CB(skb)->tx.in_flight,
++		      pcount))
++		TCP_SKB_CB(skb)->tx.in_flight = 0;
++	else
++		TCP_SKB_CB(skb)->tx.in_flight -= pcount;
++	TCP_SKB_CB(prev)->tx.in_flight += pcount;
++
+ 	/* When we're adding to gso_segs == 1, gso_size will be zero,
+ 	 * in theory this shouldn't be necessary but as long as DSACK
+ 	 * code can come after this skb later on it's better to keep
+@@ -3826,7 +3842,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
+ /* This routine deals with acks during a TLP episode and ends an episode by
+  * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
+  */
+-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
++static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
++				struct rate_sample *rs)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 
+@@ -3843,6 +3860,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+ 		/* ACK advances: there was a loss, so reduce cwnd. Reset
+ 		 * tlp_high_seq in tcp_init_cwnd_reduction()
+ 		 */
++		tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
+ 		tcp_init_cwnd_reduction(sk);
+ 		tcp_set_ca_state(sk, TCP_CA_CWR);
+ 		tcp_end_cwnd_reduction(sk);
+@@ -3853,6 +3871,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+ 			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
+ 		/* Pure dupack: original and TLP probe arrived; no loss */
+ 		tp->tlp_high_seq = 0;
++	} else {
++		/* This ACK matches a TLP retransmit. We cannot yet tell if
++		 * this ACK is for the original or the TLP retransmit.
++		 */
++		rs->is_acking_tlp_retrans_seq = 1;
+ 	}
+ }
+ 
+@@ -3961,6 +3984,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ 
+ 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
+ 	rs.prior_in_flight = tcp_packets_in_flight(tp);
++	tcp_rate_check_app_limited(sk);
+ 
+ 	/* ts_recent update must be made after we are sure that the packet
+ 	 * is in window.
+@@ -4035,7 +4059,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ 	tcp_rack_update_reo_wnd(sk, &rs);
+ 
+ 	if (tp->tlp_high_seq)
+-		tcp_process_tlp_ack(sk, ack, flag);
++		tcp_process_tlp_ack(sk, ack, flag, &rs);
+ 
+ 	if (tcp_ack_is_dubious(sk, flag)) {
+ 		if (!(flag & (FLAG_SND_UNA_ADVANCED |
+@@ -4059,6 +4083,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ 	delivered = tcp_newly_delivered(sk, delivered, flag);
+ 	lost = tp->lost - lost;			/* freshly marked lost */
+ 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
++	rs.is_ece = !!(flag & FLAG_ECE);
+ 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
+ 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
+ 	tcp_xmit_recovery(sk, rexmit);
+@@ -4078,7 +4103,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ 	tcp_ack_probe(sk);
+ 
+ 	if (tp->tlp_high_seq)
+-		tcp_process_tlp_ack(sk, ack, flag);
++		tcp_process_tlp_ack(sk, ack, flag, &rs);
+ 	return 1;
+ 
+ old_ack:
+@@ -5745,13 +5770,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ 
+ 	    /* More than one full frame received... */
+ 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
++	     (tp->fast_ack_mode == 1 ||
+ 	     /* ... and right edge of window advances far enough.
+ 	      * (tcp_recvmsg() will send ACK otherwise).
+ 	      * If application uses SO_RCVLOWAT, we want send ack now if
+ 	      * we have not received enough bytes to satisfy the condition.
+ 	      */
+-	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+-	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
++	      (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
++	       __tcp_select_window(sk) >= tp->rcv_wnd))) ||
+ 	    /* We ACK each frame or... */
+ 	    tcp_in_quickack_mode(sk) ||
+ 	    /* Protocol state mandates a one-time immediate ACK */
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index a19a9dbd3409..e0ef8406a326 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -459,6 +459,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
+ 	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+ 	bool ca_got_dst = false;
+ 
++	tcp_set_ecn_low_from_dst(sk, dst);
++
+ 	if (ca_key != TCP_CA_UNSPEC) {
+ 		const struct tcp_congestion_ops *ca;
+ 
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 8f67eea34779..f497c6c4a609 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+ 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+ 	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
+ 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
++	const struct dst_entry *dst = __sk_dst_get(sk);
+ 
+ 	if (!use_ecn) {
+-		const struct dst_entry *dst = __sk_dst_get(sk);
+-
+ 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ 			use_ecn = true;
+ 	}
+@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+ 		tp->ecn_flags = TCP_ECN_OK;
+ 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
+ 			INET_ECN_xmit(sk);
++
++		if (dst)
++			tcp_set_ecn_low_from_dst(sk, dst);
+ 	}
+ }
+ 
+@@ -388,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
+ 				th->cwr = 1;
+ 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+ 			}
+-		} else if (!tcp_ca_needs_ecn(sk)) {
++		} else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
++			!tcp_ca_needs_ecn(sk)) {
+ 			/* ACK or retransmitted segment: clear ECT|CE */
+ 			INET_ECN_dontxmit(sk);
+ 		}
+@@ -1601,7 +1604,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct sk_buff *buff;
+-	int old_factor;
++	int old_factor, inflight_prev;
+ 	long limit;
+ 	int nlen;
+ 	u8 flags;
+@@ -1676,6 +1679,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ 
+ 		if (diff)
+ 			tcp_adjust_pcount(sk, skb, diff);
++
++		inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
++		if (inflight_prev < 0) {
++			WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
++					  old_factor,
++					  TCP_SKB_CB(skb)->sacked,
++					  TCP_SKB_CB(skb)->tx.in_flight),
++				  "inconsistent: tx.in_flight: %u "
++				  "old_factor: %d mss: %u sacked: %u "
++				  "1st pcount: %d 2nd pcount: %d "
++				  "1st len: %u 2nd len: %u ",
++				  TCP_SKB_CB(skb)->tx.in_flight, old_factor,
++				  mss_now, TCP_SKB_CB(skb)->sacked,
++				  tcp_skb_pcount(skb), tcp_skb_pcount(buff),
++				  skb->len, buff->len);
++			inflight_prev = 0;
++		}
++		/* Set 1st tx.in_flight as if 1st were sent by itself: */
++		TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
++						 tcp_skb_pcount(skb);
++		/* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
++		TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
++						 tcp_skb_pcount(skb) +
++						 tcp_skb_pcount(buff);
+ 	}
+ 
+ 	/* Link BUFF into the send queue. */
+@@ -2033,13 +2060,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+-	u32 min_tso, tso_segs;
+-
+-	min_tso = ca_ops->min_tso_segs ?
+-			ca_ops->min_tso_segs(sk) :
+-			READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
++	u32 tso_segs;
+ 
+-	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
++	tso_segs = ca_ops->tso_segs ?
++		ca_ops->tso_segs(sk, mss_now) :
++		tcp_tso_autosize(sk, mss_now,
++				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+ }
+ 
+@@ -2765,6 +2791,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
+ 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
+ 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ 			tcp_init_tso_segs(skb, mss_now);
++			tcp_set_tx_in_flight(sk, skb);
+ 			goto repair; /* Skip network transmission */
+ 		}
+ 
+@@ -2979,6 +3006,7 @@ void tcp_send_loss_probe(struct sock *sk)
+ 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
+ 		goto rearm_timer;
+ 
++	tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
+ 	if (__tcp_retransmit_skb(sk, skb, 1))
+ 		goto rearm_timer;
+ 
+diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
+index a8f6d9d06f2e..8737f2134648 100644
+--- a/net/ipv4/tcp_rate.c
++++ b/net/ipv4/tcp_rate.c
+@@ -34,6 +34,24 @@
+  * ready to send in the write queue.
+  */
+ 
++void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++	u32 in_flight;
++
++	/* Check, sanitize, and record packets in flight after skb was sent. */
++	in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
++	if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
++		      "insane in_flight %u cc %s mss %u "
++		      "cwnd %u pif %u %u %u %u\n",
++		      in_flight, inet_csk(sk)->icsk_ca_ops->name,
++		      tp->mss_cache, tp->snd_cwnd,
++		      tp->packets_out, tp->retrans_out,
++		      tp->sacked_out, tp->lost_out))
++		in_flight = TCPCB_IN_FLIGHT_MAX;
++	TCP_SKB_CB(skb)->tx.in_flight = in_flight;
++}
++
+ /* Snapshot the current delivery information in the skb, to generate
+  * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
+  */
+@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
+ 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
+ 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
+ 	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
++	TCP_SKB_CB(skb)->tx.lost		= tp->lost;
+ 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
++	tcp_set_tx_in_flight(sk, skb);
+ }
+ 
+ /* When an skb is sacked or acked, we fill in the rate sample with the (prior)
+@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+ 	if (!rs->prior_delivered ||
+ 	    tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ 			       scb->end_seq, rs->last_end_seq)) {
++		rs->prior_lost	     = scb->tx.lost;
+ 		rs->prior_delivered_ce  = scb->tx.delivered_ce;
+ 		rs->prior_delivered  = scb->tx.delivered;
+ 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
+ 		rs->is_app_limited   = scb->tx.is_app_limited;
+ 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
++		rs->tx_in_flight     = scb->tx.in_flight;
+ 		rs->last_end_seq     = scb->end_seq;
+ 
+ 		/* Record send time of most recently ACKed packet: */
+ 		tp->first_tx_mstamp  = tx_tstamp;
+ 		/* Find the duration of the "send phase" of this window: */
+-		rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
+-						     scb->tx.first_tx_mstamp);
++		rs->interval_us      = tcp_stamp32_us_delta(
++						tp->first_tx_mstamp,
++						scb->tx.first_tx_mstamp);
+ 
+ 	}
+ 	/* Mark off the skb delivered once it's sacked to avoid being
+@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
+ 		return;
+ 	}
+ 	rs->delivered   = tp->delivered - rs->prior_delivered;
++	rs->lost        = tp->lost - rs->prior_lost;
+ 
+ 	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+ 	/* delivered_ce occupies less than 32 bits in the skb control block */
+@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
+ 	 * longer phase.
+ 	 */
+ 	snd_us = rs->interval_us;				/* send phase */
+-	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
++	ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
+ 				    rs->prior_mstamp); /* ack phase */
+ 	rs->interval_us = max(snd_us, ack_us);
+ 
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 4d40615dc8fc..f27941201ef2 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -689,6 +689,7 @@ void tcp_write_timer_handler(struct sock *sk)
+ 		return;
+ 	}
+ 
++	tcp_rate_check_app_limited(sk);
+ 	tcp_mstamp_refresh(tcp_sk(sk));
+ 	event = icsk->icsk_pending;
+ 
+-- 
+2.47.0.rc0
+
+From 2a2f186f1c8c99bdd1183fd28527bf95f781166c Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:52:15 +0800
+Subject: [PATCH 05/13] cachy
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ .../admin-guide/kernel-parameters.txt         |   12 +
+ Makefile                                      |    8 +
+ arch/x86/Kconfig.cpu                          |  359 +-
+ arch/x86/Makefile                             |   91 +-
+ arch/x86/include/asm/pci.h                    |    6 +
+ arch/x86/include/asm/vermagic.h               |   70 +
+ arch/x86/pci/common.c                         |    7 +-
+ block/bfq-iosched.c                           |    6 +
+ block/elevator.c                              |   10 +
+ drivers/Makefile                              |   13 +-
+ drivers/ata/ahci.c                            |   23 +-
+ drivers/cpufreq/Kconfig.x86                   |    2 -
+ drivers/cpufreq/cpufreq.c                     |   27 +-
+ drivers/cpufreq/intel_pstate.c                |    2 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h           |    1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   10 +
+ drivers/gpu/drm/amd/display/Kconfig           |    6 +
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |    2 +-
+ .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |    2 +-
+ .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |    6 +-
+ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |    6 +-
+ drivers/gpu/drm/amd/pm/amdgpu_pm.c            |    3 +
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   14 +-
+ drivers/i2c/busses/Kconfig                    |    9 +
+ drivers/i2c/busses/Makefile                   |    1 +
+ drivers/i2c/busses/i2c-nct6775.c              |  648 ++++
+ drivers/i2c/busses/i2c-piix4.c                |    4 +-
+ drivers/input/evdev.c                         |   19 +-
+ drivers/md/dm-crypt.c                         |    5 +
+ drivers/media/v4l2-core/Kconfig               |    5 +
+ drivers/media/v4l2-core/Makefile              |    2 +
+ drivers/media/v4l2-core/v4l2loopback.c        | 3184 +++++++++++++++++
+ drivers/media/v4l2-core/v4l2loopback.h        |   98 +
+ .../media/v4l2-core/v4l2loopback_formats.h    |  445 +++
+ drivers/pci/controller/Makefile               |    6 +
+ drivers/pci/controller/intel-nvme-remap.c     |  462 +++
+ drivers/pci/quirks.c                          |  101 +
+ include/linux/cpufreq.h                       |    6 -
+ include/linux/pagemap.h                       |    2 +-
+ include/linux/user_namespace.h                |    4 +
+ include/linux/wait.h                          |    2 +
+ init/Kconfig                                  |   26 +
+ kernel/Kconfig.hz                             |   24 +
+ kernel/fork.c                                 |   14 +
+ kernel/locking/rwsem.c                        |    4 +-
+ kernel/sched/fair.c                           |   13 +
+ kernel/sched/sched.h                          |    2 +-
+ kernel/sched/wait.c                           |   24 +
+ kernel/sysctl.c                               |   12 +
+ kernel/user_namespace.c                       |    7 +
+ mm/Kconfig                                    |    2 +-
+ mm/compaction.c                               |    4 +
+ mm/huge_memory.c                              |    4 +
+ mm/page-writeback.c                           |    8 +
+ mm/page_alloc.c                               |    4 +
+ mm/swap.c                                     |    5 +
+ mm/vmpressure.c                               |    4 +
+ mm/vmscan.c                                   |    8 +
+ net/ipv4/inet_connection_sock.c               |    2 +-
+ scripts/Makefile.package                      |    3 +-
+ scripts/package/PKGBUILD                      |   52 +-
+ 61 files changed, 5798 insertions(+), 113 deletions(-)
+ create mode 100644 drivers/i2c/busses/i2c-nct6775.c
+ create mode 100644 drivers/media/v4l2-core/v4l2loopback.c
+ create mode 100644 drivers/media/v4l2-core/v4l2loopback.h
+ create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h
+ create mode 100644 drivers/pci/controller/intel-nvme-remap.c
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index be010fec7654..900113802ffc 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2231,6 +2231,9 @@
+ 			disable
+ 			  Do not enable intel_pstate as the default
+ 			  scaling driver for the supported processors
++			enable
++			  Enable intel_pstate in-case "disable" was passed
++			  previously in the kernel boot parameters
+                         active
+                           Use intel_pstate driver to bypass the scaling
+                           governors layer of cpufreq and provides it own
+@@ -4412,6 +4415,15 @@
+ 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
+ 				enabled, this kernel boot option can be used to
+ 				disable the use of MSI interrupts system-wide.
++		pcie_acs_override =
++					[PCIE] Override missing PCIe ACS support for:
++				downstream
++					All downstream ports - full ACS capabilities
++				multfunction
++					All multifunction devices - multifunction ACS subset
++				id:nnnn:nnnn
++					Specfic device - full ACS capabilities
++					Specified as vid:did (vendor/device ID) in hex
+ 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
+ 				Safety option to keep boot IRQs enabled. This
+ 				should never be necessary.
+diff --git a/Makefile b/Makefile
+index 687ce7aee67a..7c3cbfb2f6b5 100644
+--- a/Makefile
++++ b/Makefile
+@@ -803,11 +803,19 @@ KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
+ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
+ KBUILD_CFLAGS += -O2
+ KBUILD_RUSTFLAGS += -Copt-level=2
++else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
++KBUILD_CFLAGS += -O3
++KBUILD_RUSTFLAGS += -Copt-level=3
+ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+ KBUILD_CFLAGS += -Os
+ KBUILD_RUSTFLAGS += -Copt-level=s
+ endif
+ 
++# Perform swing modulo scheduling immediately before the first scheduling pass.
++# This pass looks at innermost loops and reorders their instructions by
++# overlapping different iterations.
++KBUILD_CFLAGS += $(call cc-option,-fmodulo-sched -fmodulo-sched-allow-regmoves -fivopts -fmodulo-sched)
++
+ # Always set `debug-assertions` and `overflow-checks` because their default
+ # depends on `opt-level` and `debug-assertions`, respectively.
+ KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n)
+diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
+index 2a7279d80460..f5849153b385 100644
+--- a/arch/x86/Kconfig.cpu
++++ b/arch/x86/Kconfig.cpu
+@@ -155,9 +155,8 @@ config MPENTIUM4
+ 		-Paxville
+ 		-Dempsey
+ 
+-
+ config MK6
+-	bool "K6/K6-II/K6-III"
++	bool "AMD K6/K6-II/K6-III"
+ 	depends on X86_32
+ 	help
+ 	  Select this for an AMD K6-family processor.  Enables use of
+@@ -165,7 +164,7 @@ config MK6
+ 	  flags to GCC.
+ 
+ config MK7
+-	bool "Athlon/Duron/K7"
++	bool "AMD Athlon/Duron/K7"
+ 	depends on X86_32
+ 	help
+ 	  Select this for an AMD Athlon K7-family processor.  Enables use of
+@@ -173,12 +172,114 @@ config MK7
+ 	  flags to GCC.
+ 
+ config MK8
+-	bool "Opteron/Athlon64/Hammer/K8"
++	bool "AMD Opteron/Athlon64/Hammer/K8"
+ 	help
+ 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+ 	  Enables use of some extended instructions, and passes appropriate
+ 	  optimization flags to GCC.
+ 
++config MK8SSE3
++	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
++	help
++	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
++	  Enables use of some extended instructions, and passes appropriate
++	  optimization flags to GCC.
++
++config MK10
++	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
++	help
++	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
++	  Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
++	  Enables use of some extended instructions, and passes appropriate
++	  optimization flags to GCC.
++
++config MBARCELONA
++	bool "AMD Barcelona"
++	help
++	  Select this for AMD Family 10h Barcelona processors.
++
++	  Enables -march=barcelona
++
++config MBOBCAT
++	bool "AMD Bobcat"
++	help
++	  Select this for AMD Family 14h Bobcat processors.
++
++	  Enables -march=btver1
++
++config MJAGUAR
++	bool "AMD Jaguar"
++	help
++	  Select this for AMD Family 16h Jaguar processors.
++
++	  Enables -march=btver2
++
++config MBULLDOZER
++	bool "AMD Bulldozer"
++	help
++	  Select this for AMD Family 15h Bulldozer processors.
++
++	  Enables -march=bdver1
++
++config MPILEDRIVER
++	bool "AMD Piledriver"
++	help
++	  Select this for AMD Family 15h Piledriver processors.
++
++	  Enables -march=bdver2
++
++config MSTEAMROLLER
++	bool "AMD Steamroller"
++	help
++	  Select this for AMD Family 15h Steamroller processors.
++
++	  Enables -march=bdver3
++
++config MEXCAVATOR
++	bool "AMD Excavator"
++	help
++	  Select this for AMD Family 15h Excavator processors.
++
++	  Enables -march=bdver4
++
++config MZEN
++	bool "AMD Zen"
++	help
++	  Select this for AMD Family 17h Zen processors.
++
++	  Enables -march=znver1
++
++config MZEN2
++	bool "AMD Zen 2"
++	help
++	  Select this for AMD Family 17h Zen 2 processors.
++
++	  Enables -march=znver2
++
++config MZEN3
++	bool "AMD Zen 3"
++	depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
++	help
++	  Select this for AMD Family 19h Zen 3 processors.
++
++	  Enables -march=znver3
++
++config MZEN4
++	bool "AMD Zen 4"
++	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
++	help
++	  Select this for AMD Family 19h Zen 4 processors.
++
++	  Enables -march=znver4
++
++config MZEN5
++	bool "AMD Zen 5"
++	depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 191000)
++	help
++	  Select this for AMD Family 19h Zen 5 processors.
++
++	  Enables -march=znver5
++
+ config MCRUSOE
+ 	bool "Crusoe"
+ 	depends on X86_32
+@@ -269,8 +370,17 @@ config MPSC
+ 	  using the cpu family field
+ 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+ 
++config MATOM
++	bool "Intel Atom"
++	help
++
++	  Select this for the Intel Atom platform. Intel Atom CPUs have an
++	  in-order pipelining architecture and thus can benefit from
++	  accordingly optimized code. Use a recent GCC with specific Atom
++	  support in order to fully benefit from selecting this option.
++
+ config MCORE2
+-	bool "Core 2/newer Xeon"
++	bool "Intel Core 2"
+ 	help
+ 
+ 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+@@ -278,14 +388,191 @@ config MCORE2
+ 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
+ 	  (not a typo)
+ 
+-config MATOM
+-	bool "Intel Atom"
++	  Enables -march=core2
++
++config MNEHALEM
++	bool "Intel Nehalem"
+ 	help
+ 
+-	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+-	  in-order pipelining architecture and thus can benefit from
+-	  accordingly optimized code. Use a recent GCC with specific Atom
+-	  support in order to fully benefit from selecting this option.
++	  Select this for 1st Gen Core processors in the Nehalem family.
++
++	  Enables -march=nehalem
++
++config MWESTMERE
++	bool "Intel Westmere"
++	help
++
++	  Select this for the Intel Westmere formerly Nehalem-C family.
++
++	  Enables -march=westmere
++
++config MSILVERMONT
++	bool "Intel Silvermont"
++	help
++
++	  Select this for the Intel Silvermont platform.
++
++	  Enables -march=silvermont
++
++config MGOLDMONT
++	bool "Intel Goldmont"
++	help
++
++	  Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
++
++	  Enables -march=goldmont
++
++config MGOLDMONTPLUS
++	bool "Intel Goldmont Plus"
++	help
++
++	  Select this for the Intel Goldmont Plus platform including Gemini Lake.
++
++	  Enables -march=goldmont-plus
++
++config MSANDYBRIDGE
++	bool "Intel Sandy Bridge"
++	help
++
++	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
++
++	  Enables -march=sandybridge
++
++config MIVYBRIDGE
++	bool "Intel Ivy Bridge"
++	help
++
++	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
++
++	  Enables -march=ivybridge
++
++config MHASWELL
++	bool "Intel Haswell"
++	help
++
++	  Select this for 4th Gen Core processors in the Haswell family.
++
++	  Enables -march=haswell
++
++config MBROADWELL
++	bool "Intel Broadwell"
++	help
++
++	  Select this for 5th Gen Core processors in the Broadwell family.
++
++	  Enables -march=broadwell
++
++config MSKYLAKE
++	bool "Intel Skylake"
++	help
++
++	  Select this for 6th Gen Core processors in the Skylake family.
++
++	  Enables -march=skylake
++
++config MSKYLAKEX
++	bool "Intel Skylake X"
++	help
++
++	  Select this for 6th Gen Core processors in the Skylake X family.
++
++	  Enables -march=skylake-avx512
++
++config MCANNONLAKE
++	bool "Intel Cannon Lake"
++	help
++
++	  Select this for 8th Gen Core processors
++
++	  Enables -march=cannonlake
++
++config MICELAKE
++	bool "Intel Ice Lake"
++	help
++
++	  Select this for 10th Gen Core processors in the Ice Lake family.
++
++	  Enables -march=icelake-client
++
++config MCASCADELAKE
++	bool "Intel Cascade Lake"
++	help
++
++	  Select this for Xeon processors in the Cascade Lake family.
++
++	  Enables -march=cascadelake
++
++config MCOOPERLAKE
++	bool "Intel Cooper Lake"
++	depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
++	help
++
++	  Select this for Xeon processors in the Cooper Lake family.
++
++	  Enables -march=cooperlake
++
++config MTIGERLAKE
++	bool "Intel Tiger Lake"
++	depends on  (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
++	help
++
++	  Select this for third-generation 10 nm process processors in the Tiger Lake family.
++
++	  Enables -march=tigerlake
++
++config MSAPPHIRERAPIDS
++	bool "Intel Sapphire Rapids"
++	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
++	help
++
++	  Select this for fourth-generation 10 nm process processors in the Sapphire Rapids family.
++
++	  Enables -march=sapphirerapids
++
++config MROCKETLAKE
++	bool "Intel Rocket Lake"
++	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
++	help
++
++	  Select this for eleventh-generation processors in the Rocket Lake family.
++
++	  Enables -march=rocketlake
++
++config MALDERLAKE
++	bool "Intel Alder Lake"
++	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
++	help
++
++	  Select this for twelfth-generation processors in the Alder Lake family.
++
++	  Enables -march=alderlake
++
++config MRAPTORLAKE
++	bool "Intel Raptor Lake"
++	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
++	help
++
++	  Select this for thirteenth-generation processors in the Raptor Lake family.
++
++	  Enables -march=raptorlake
++
++config MMETEORLAKE
++	bool "Intel Meteor Lake"
++	depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
++	help
++
++	  Select this for fourteenth-generation processors in the Meteor Lake family.
++
++	  Enables -march=meteorlake
++
++config MEMERALDRAPIDS
++	bool "Intel Emerald Rapids"
++	depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
++	help
++
++	  Select this for fifth-generation 10 nm process processors in the Emerald Rapids family.
++
++	  Enables -march=emeraldrapids
+ 
+ config GENERIC_CPU
+ 	bool "Generic-x86-64"
+@@ -294,6 +581,26 @@ config GENERIC_CPU
+ 	  Generic x86-64 CPU.
+ 	  Run equally well on all x86-64 CPUs.
+ 
++config MNATIVE_INTEL
++	bool "Intel-Native optimizations autodetected by the compiler"
++	help
++
++	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
++	  the optimum settings to use based on your processor. Do NOT use this
++	  for AMD CPUs.  Intel Only!
++
++	  Enables -march=native
++
++config MNATIVE_AMD
++	bool "AMD-Native optimizations autodetected by the compiler"
++	help
++
++	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
++	  the optimum settings to use based on your processor. Do NOT use this
++	  for Intel CPUs.  AMD Only!
++
++	  Enables -march=native
++
+ endchoice
+ 
+ config X86_GENERIC
+@@ -308,6 +615,30 @@ config X86_GENERIC
+ 	  This is really intended for distributors who need more
+ 	  generic optimizations.
+ 
++config X86_64_VERSION
++	int "x86-64 compiler ISA level"
++	range 1 4
++	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
++	depends on X86_64 && GENERIC_CPU
++	help
++	  Specify a specific x86-64 compiler ISA level.
++
++	  There are three x86-64 ISA levels that work on top of
++	  the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4.
++
++	  x86-64-v2 brings support for vector instructions up to Streaming SIMD
++	  Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
++	  (SSSE3), the POPCNT instruction, and CMPXCHG16B.
++
++	  x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional
++	  bit-manipulation instructions.
++
++	  x86-64-v4 is not included since the kernel does not use AVX512 instructions
++
++	  You can find the best version for your CPU by running one of the following:
++	  /lib/ld-linux-x86-64.so.2 --help | grep supported
++	  /lib64/ld-linux-x86-64.so.2 --help | grep supported
++
+ #
+ # Define implied options from the CPU selection here
+ config X86_INTERNODE_CACHE_SHIFT
+@@ -318,7 +649,7 @@ config X86_INTERNODE_CACHE_SHIFT
+ config X86_L1_CACHE_SHIFT
+ 	int
+ 	default "7" if MPENTIUM4 || MPSC
+-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
+ 	default "4" if MELAN || M486SX || M486 || MGEODEGX1
+ 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+ 
+@@ -336,11 +667,11 @@ config X86_ALIGNMENT_16
+ 
+ config X86_INTEL_USERCOPY
+ 	def_bool y
+-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
++	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL
+ 
+ config X86_USE_PPRO_CHECKSUM
+ 	def_bool y
+-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
++	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
+ 
+ #
+ # P6_NOPs are a relatively minor optimization that require a family >=
+diff --git a/arch/x86/Makefile b/arch/x86/Makefile
+index 801fd85c3ef6..85d962aa68fe 100644
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -70,9 +70,9 @@ export BITS
+ #
+ #    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
+ #
+-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
++KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -fno-tree-vectorize
+ KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
+-KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
++KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
+ 
+ #
+ # CFLAGS for compiling floating point code inside the kernel.
+@@ -177,15 +177,96 @@ else
+         cflags-$(CONFIG_MK8)		+= -march=k8
+         cflags-$(CONFIG_MPSC)		+= -march=nocona
+         cflags-$(CONFIG_MCORE2)		+= -march=core2
+-        cflags-$(CONFIG_MATOM)		+= -march=atom
+-        cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
++        cflags-$(CONFIG_MATOM)		+= -march=bonnell
++        ifeq ($(CONFIG_X86_64_VERSION),1)
++          cflags-$(CONFIG_GENERIC_CPU)		+= -mtune=generic
++          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
++        else
++          cflags-$(CONFIG_GENERIC_CPU)		+= -march=x86-64-v$(CONFIG_X86_64_VERSION)
++          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
++        endif
++        cflags-$(CONFIG_MK8SSE3)	+= -march=k8-sse3
++        cflags-$(CONFIG_MK10) 		+= -march=amdfam10
++        cflags-$(CONFIG_MBARCELONA) 	+= -march=barcelona
++        cflags-$(CONFIG_MBOBCAT) 	+= -march=btver1
++        cflags-$(CONFIG_MJAGUAR) 	+= -march=btver2
++        cflags-$(CONFIG_MBULLDOZER) 	+= -march=bdver1
++        cflags-$(CONFIG_MPILEDRIVER)	+= -march=bdver2 -mno-tbm
++        cflags-$(CONFIG_MSTEAMROLLER) 	+= -march=bdver3 -mno-tbm
++        cflags-$(CONFIG_MEXCAVATOR) 	+= -march=bdver4 -mno-tbm
++        cflags-$(CONFIG_MZEN) 		+= -march=znver1
++        cflags-$(CONFIG_MZEN2) 	+= -march=znver2
++        cflags-$(CONFIG_MZEN3) 	+= -march=znver3
++        cflags-$(CONFIG_MZEN4) 	+= -march=znver4
++        cflags-$(CONFIG_MZEN5) 	+= -march=znver5
++        cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
++        cflags-$(CONFIG_MNATIVE_AMD) 	+= -march=native -mno-tbm
++        cflags-$(CONFIG_MNEHALEM) 	+= -march=nehalem
++        cflags-$(CONFIG_MWESTMERE) 	+= -march=westmere
++        cflags-$(CONFIG_MSILVERMONT) 	+= -march=silvermont
++        cflags-$(CONFIG_MGOLDMONT) 	+= -march=goldmont
++        cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
++        cflags-$(CONFIG_MSANDYBRIDGE) 	+= -march=sandybridge
++        cflags-$(CONFIG_MIVYBRIDGE) 	+= -march=ivybridge
++        cflags-$(CONFIG_MHASWELL) 	+= -march=haswell
++        cflags-$(CONFIG_MBROADWELL) 	+= -march=broadwell
++        cflags-$(CONFIG_MSKYLAKE) 	+= -march=skylake
++        cflags-$(CONFIG_MSKYLAKEX) 	+= -march=skylake-avx512
++        cflags-$(CONFIG_MCANNONLAKE) 	+= -march=cannonlake
++        cflags-$(CONFIG_MICELAKE) 	+= -march=icelake-client
++        cflags-$(CONFIG_MCASCADELAKE) 	+= -march=cascadelake
++        cflags-$(CONFIG_MCOOPERLAKE) 	+= -march=cooperlake
++        cflags-$(CONFIG_MTIGERLAKE) 	+= -march=tigerlake
++        cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
++        cflags-$(CONFIG_MROCKETLAKE) 	+= -march=rocketlake
++        cflags-$(CONFIG_MALDERLAKE) 	+= -march=alderlake
++        cflags-$(CONFIG_MRAPTORLAKE) 	+= -march=raptorlake
++        cflags-$(CONFIG_MMETEORLAKE) 	+= -march=meteorlake
++        cflags-$(CONFIG_MEMERALDRAPIDS)	+= -march=emeraldrapids
+         KBUILD_CFLAGS += $(cflags-y)
+ 
+         rustflags-$(CONFIG_MK8)		+= -Ctarget-cpu=k8
+         rustflags-$(CONFIG_MPSC)	+= -Ctarget-cpu=nocona
+         rustflags-$(CONFIG_MCORE2)	+= -Ctarget-cpu=core2
+         rustflags-$(CONFIG_MATOM)	+= -Ctarget-cpu=atom
+-        rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
++        rustflags-$(CONFIG_MK8SSE3)	+= -Ctarget-cpu=k8-sse3
++        rustflags-$(CONFIG_MK10) 		+= -Ctarget-cpu=amdfam10
++        rustflags-$(CONFIG_MBARCELONA) 	+= -Ctarget-cpu=barcelona
++        rustflags-$(CONFIG_MBOBCAT) 	+= -Ctarget-cpu=btver1
++        rustflags-$(CONFIG_MJAGUAR) 	+= -Ctarget-cpu=btver2
++        rustflags-$(CONFIG_MBULLDOZER) 	+= -Ctarget-cpu=bdver1
++        rustflags-$(CONFIG_MPILEDRIVER)	+= -Ctarget-cpu=bdver2
++        rustflags-$(CONFIG_MSTEAMROLLER) 	+= -Ctarget-cpu=bdver3
++        rustflags-$(CONFIG_MEXCAVATOR) 	+= -Ctarget-cpu=bdver4
++        rustflags-$(CONFIG_MZEN) 		+= -Ctarget-cpu=znver1
++        rustflags-$(CONFIG_MZEN2) 	+= -Ctarget-cpu=znver2
++        rustflags-$(CONFIG_MZEN3) 	+= -Ctarget-cpu=znver3
++        rustflags-$(CONFIG_MZEN4) 	+= -Ctarget-cpu=znver4
++        rustflags-$(CONFIG_MZEN5) 	+= -Ctarget-cpu=znver5
++        rustflags-$(CONFIG_MNATIVE_INTEL) += -Ctarget-cpu=native
++        rustflags-$(CONFIG_MNATIVE_AMD) 	+= -Ctarget-cpu=native
++        rustflags-$(CONFIG_MNEHALEM) 	+= -Ctarget-cpu=nehalem
++        rustflags-$(CONFIG_MWESTMERE) 	+= -Ctarget-cpu=westmere
++        rustflags-$(CONFIG_MSILVERMONT) 	+= -Ctarget-cpu=silvermont
++        rustflags-$(CONFIG_MGOLDMONT) 	+= -Ctarget-cpu=goldmont
++        rustflags-$(CONFIG_MGOLDMONTPLUS) += -Ctarget-cpu=goldmont-plus
++        rustflags-$(CONFIG_MSANDYBRIDGE) 	+= -Ctarget-cpu=sandybridge
++        rustflags-$(CONFIG_MIVYBRIDGE) 	+= -Ctarget-cpu=ivybridge
++        rustflags-$(CONFIG_MHASWELL) 	+= -Ctarget-cpu=haswell
++        rustflags-$(CONFIG_MBROADWELL) 	+= -Ctarget-cpu=broadwell
++        rustflags-$(CONFIG_MSKYLAKE) 	+= -Ctarget-cpu=skylake
++        rustflags-$(CONFIG_MSKYLAKEX) 	+= -Ctarget-cpu=skylake-avx512
++        rustflags-$(CONFIG_MCANNONLAKE) 	+= -Ctarget-cpu=cannonlake
++        rustflags-$(CONFIG_MICELAKE) 	+= -Ctarget-cpu=icelake-client
++        rustflags-$(CONFIG_MCASCADELAKE) 	+= -Ctarget-cpu=cascadelake
++        rustflags-$(CONFIG_MCOOPERLAKE) 	+= -Ctarget-cpu=cooperlake
++        rustflags-$(CONFIG_MTIGERLAKE) 	+= -Ctarget-cpu=tigerlake
++        rustflags-$(CONFIG_MSAPPHIRERAPIDS) += -Ctarget-cpu=sapphirerapids
++        rustflags-$(CONFIG_MROCKETLAKE) 	+= -Ctarget-cpu=rocketlake
++        rustflags-$(CONFIG_MALDERLAKE) 	+= -Ctarget-cpu=alderlake
++        rustflags-$(CONFIG_MRAPTORLAKE) 	+= -Ctarget-cpu=raptorlake
++        rustflags-$(CONFIG_MMETEORLAKE) 	+= -Ctarget-cpu=meteorlake
++        rustflags-$(CONFIG_MEMERALDRAPIDS)	+= -Ctarget-cpu=emeraldrapids
+         KBUILD_RUSTFLAGS += $(rustflags-y)
+ 
+         KBUILD_CFLAGS += -mno-red-zone
+diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
+index b3ab80a03365..5e883b397ff3 100644
+--- a/arch/x86/include/asm/pci.h
++++ b/arch/x86/include/asm/pci.h
+@@ -26,6 +26,7 @@ struct pci_sysdata {
+ #if IS_ENABLED(CONFIG_VMD)
+ 	struct pci_dev	*vmd_dev;	/* VMD Device if in Intel VMD domain */
+ #endif
++	struct pci_dev	*nvme_remap_dev;	/* AHCI Device if NVME remapped bus */
+ };
+ 
+ extern int pci_routeirq;
+@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus)
+ #define is_vmd(bus)		false
+ #endif /* CONFIG_VMD */
+ 
++static inline bool is_nvme_remap(struct pci_bus *bus)
++{
++	return to_pci_sysdata(bus)->nvme_remap_dev != NULL;
++}
++
+ /* Can be used to override the logic in pci_scan_bus for skipping
+    already-configured bus numbers - to be used for buggy BIOSes
+    or architectures with incomplete PCI setup by the loader */
+diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
+index 75884d2cdec3..f4e29563473d 100644
+--- a/arch/x86/include/asm/vermagic.h
++++ b/arch/x86/include/asm/vermagic.h
+@@ -17,6 +17,54 @@
+ #define MODULE_PROC_FAMILY "586MMX "
+ #elif defined CONFIG_MCORE2
+ #define MODULE_PROC_FAMILY "CORE2 "
++#elif defined CONFIG_MNATIVE_INTEL
++#define MODULE_PROC_FAMILY "NATIVE_INTEL "
++#elif defined CONFIG_MNATIVE_AMD
++#define MODULE_PROC_FAMILY "NATIVE_AMD "
++#elif defined CONFIG_MNEHALEM
++#define MODULE_PROC_FAMILY "NEHALEM "
++#elif defined CONFIG_MWESTMERE
++#define MODULE_PROC_FAMILY "WESTMERE "
++#elif defined CONFIG_MSILVERMONT
++#define MODULE_PROC_FAMILY "SILVERMONT "
++#elif defined CONFIG_MGOLDMONT
++#define MODULE_PROC_FAMILY "GOLDMONT "
++#elif defined CONFIG_MGOLDMONTPLUS
++#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
++#elif defined CONFIG_MSANDYBRIDGE
++#define MODULE_PROC_FAMILY "SANDYBRIDGE "
++#elif defined CONFIG_MIVYBRIDGE
++#define MODULE_PROC_FAMILY "IVYBRIDGE "
++#elif defined CONFIG_MHASWELL
++#define MODULE_PROC_FAMILY "HASWELL "
++#elif defined CONFIG_MBROADWELL
++#define MODULE_PROC_FAMILY "BROADWELL "
++#elif defined CONFIG_MSKYLAKE
++#define MODULE_PROC_FAMILY "SKYLAKE "
++#elif defined CONFIG_MSKYLAKEX
++#define MODULE_PROC_FAMILY "SKYLAKEX "
++#elif defined CONFIG_MCANNONLAKE
++#define MODULE_PROC_FAMILY "CANNONLAKE "
++#elif defined CONFIG_MICELAKE
++#define MODULE_PROC_FAMILY "ICELAKE "
++#elif defined CONFIG_MCASCADELAKE
++#define MODULE_PROC_FAMILY "CASCADELAKE "
++#elif defined CONFIG_MCOOPERLAKE
++#define MODULE_PROC_FAMILY "COOPERLAKE "
++#elif defined CONFIG_MTIGERLAKE
++#define MODULE_PROC_FAMILY "TIGERLAKE "
++#elif defined CONFIG_MSAPPHIRERAPIDS
++#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
++#elif defined CONFIG_ROCKETLAKE
++#define MODULE_PROC_FAMILY "ROCKETLAKE "
++#elif defined CONFIG_MALDERLAKE
++#define MODULE_PROC_FAMILY "ALDERLAKE "
++#elif defined CONFIG_MRAPTORLAKE
++#define MODULE_PROC_FAMILY "RAPTORLAKE "
++#elif defined CONFIG_MMETEORLAKE
++#define MODULE_PROC_FAMILY "METEORLAKE "
++#elif defined CONFIG_MEMERALDRAPIDS
++#define MODULE_PROC_FAMILY "EMERALDRAPIDS "
+ #elif defined CONFIG_MATOM
+ #define MODULE_PROC_FAMILY "ATOM "
+ #elif defined CONFIG_M686
+@@ -35,6 +83,28 @@
+ #define MODULE_PROC_FAMILY "K7 "
+ #elif defined CONFIG_MK8
+ #define MODULE_PROC_FAMILY "K8 "
++#elif defined CONFIG_MK8SSE3
++#define MODULE_PROC_FAMILY "K8SSE3 "
++#elif defined CONFIG_MK10
++#define MODULE_PROC_FAMILY "K10 "
++#elif defined CONFIG_MBARCELONA
++#define MODULE_PROC_FAMILY "BARCELONA "
++#elif defined CONFIG_MBOBCAT
++#define MODULE_PROC_FAMILY "BOBCAT "
++#elif defined CONFIG_MBULLDOZER
++#define MODULE_PROC_FAMILY "BULLDOZER "
++#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "PILEDRIVER "
++#elif defined CONFIG_MSTEAMROLLER
++#define MODULE_PROC_FAMILY "STEAMROLLER "
++#elif defined CONFIG_MJAGUAR
++#define MODULE_PROC_FAMILY "JAGUAR "
++#elif defined CONFIG_MEXCAVATOR
++#define MODULE_PROC_FAMILY "EXCAVATOR "
++#elif defined CONFIG_MZEN
++#define MODULE_PROC_FAMILY "ZEN "
++#elif defined CONFIG_MZEN2
++#define MODULE_PROC_FAMILY "ZEN2 "
+ #elif defined CONFIG_MELAN
+ #define MODULE_PROC_FAMILY "ELAN "
+ #elif defined CONFIG_MCRUSOE
+diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
+index ddb798603201..7c20387d8202 100644
+--- a/arch/x86/pci/common.c
++++ b/arch/x86/pci/common.c
+@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void)
+ 		return 0;
+ }
+ 
+-#if IS_ENABLED(CONFIG_VMD)
+ struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
+ {
++#if IS_ENABLED(CONFIG_VMD)
+ 	if (is_vmd(dev->bus))
+ 		return to_pci_sysdata(dev->bus)->vmd_dev;
++#endif
++
++	if (is_nvme_remap(dev->bus))
++		return to_pci_sysdata(dev->bus)->nvme_remap_dev;
+ 
+ 	return dev;
+ }
+-#endif
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 1cc40a857fb8..c446fa6a6ad1 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7656,6 +7656,7 @@ MODULE_ALIAS("bfq-iosched");
+ static int __init bfq_init(void)
+ {
+ 	int ret;
++	char msg[60] = "BFQ I/O-scheduler: BFQ-CachyOS v6.11";
+ 
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	ret = blkcg_policy_register(&blkcg_policy_bfq);
+@@ -7687,6 +7688,11 @@ static int __init bfq_init(void)
+ 	if (ret)
+ 		goto slab_kill;
+ 
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	strcat(msg, " (with cgroups support)");
++#endif
++	pr_info("%s", msg);
++
+ 	return 0;
+ 
+ slab_kill:
+diff --git a/block/elevator.c b/block/elevator.c
+index 4122026b11f1..cd630e991eae 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -567,9 +567,19 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
+ 
+ 	if (q->nr_hw_queues != 1 &&
+ 	    !blk_mq_is_shared_tags(q->tag_set->flags))
++#if defined(CONFIG_CACHY) && defined(CONFIG_MQ_IOSCHED_KYBER)
++		return elevator_find_get(q, "kyber");
++#elif defined(CONFIG_CACHY)
++		return elevator_find_get(q, "mq-deadline");
++#else
+ 		return NULL;
++#endif
+ 
++#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ)
++	return elevator_find_get(q, "bfq");
++#else
+ 	return elevator_find_get(q, "mq-deadline");
++#endif
+ }
+ 
+ /*
+diff --git a/drivers/Makefile b/drivers/Makefile
+index fe9ceb0d2288..b58955caf19b 100644
+--- a/drivers/Makefile
++++ b/drivers/Makefile
+@@ -61,14 +61,8 @@ obj-y				+= char/
+ # iommu/ comes before gpu as gpu are using iommu controllers
+ obj-y				+= iommu/
+ 
+-# gpu/ comes after char for AGP vs DRM startup and after iommu
+-obj-y				+= gpu/
+-
+ obj-$(CONFIG_CONNECTOR)		+= connector/
+ 
+-# i810fb depends on char/agp/
+-obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
+-
+ obj-$(CONFIG_PARPORT)		+= parport/
+ obj-y				+= base/ block/ misc/ mfd/ nfc/
+ obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
+@@ -80,6 +74,13 @@ obj-y				+= macintosh/
+ obj-y				+= scsi/
+ obj-y				+= nvme/
+ obj-$(CONFIG_ATA)		+= ata/
++
++# gpu/ comes after char for AGP vs DRM startup and after iommu
++obj-y				+= gpu/
++
++# i810fb depends on char/agp/
++obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
++
+ obj-$(CONFIG_TARGET_CORE)	+= target/
+ obj-$(CONFIG_MTD)		+= mtd/
+ obj-$(CONFIG_SPI)		+= spi/
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index a05c17249448..be4b54ff7e89 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -1618,7 +1618,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+ }
+ #endif
+ 
+-static void ahci_remap_check(struct pci_dev *pdev, int bar,
++static int ahci_remap_check(struct pci_dev *pdev, int bar,
+ 		struct ahci_host_priv *hpriv)
+ {
+ 	int i;
+@@ -1631,7 +1631,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
+ 	    pci_resource_len(pdev, bar) < SZ_512K ||
+ 	    bar != AHCI_PCI_BAR_STANDARD ||
+ 	    !(readl(hpriv->mmio + AHCI_VSCAP) & 1))
+-		return;
++		return 0;
+ 
+ 	cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
+ 	for (i = 0; i < AHCI_MAX_REMAP; i++) {
+@@ -1646,18 +1646,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar,
+ 	}
+ 
+ 	if (!hpriv->remapped_nvme)
+-		return;
+-
+-	dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n",
+-		 hpriv->remapped_nvme);
+-	dev_warn(&pdev->dev,
+-		 "Switch your BIOS from RAID to AHCI mode to use them.\n");
++		return 0;
+ 
+-	/*
+-	 * Don't rely on the msi-x capability in the remap case,
+-	 * share the legacy interrupt across ahci and remapped devices.
+-	 */
+-	hpriv->flags |= AHCI_HFLAG_NO_MSI;
++	/* Abort probe, allowing intel-nvme-remap to step in when available */
++	dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n");
++	return -ENODEV;
+ }
+ 
+ static int ahci_get_irq_vector(struct ata_host *host, int port)
+@@ -1896,7 +1889,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
+ 
+ 	/* detect remapped nvme devices */
+-	ahci_remap_check(pdev, ahci_pci_bar, hpriv);
++	rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv);
++	if (rc)
++		return rc;
+ 
+ 	sysfs_add_file_to_group(&pdev->dev.kobj,
+ 				&dev_attr_remapped_nvme.attr,
+diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
+index 97c2d4f15d76..5a3af44d785a 100644
+--- a/drivers/cpufreq/Kconfig.x86
++++ b/drivers/cpufreq/Kconfig.x86
+@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE
+ 	select ACPI_PROCESSOR if ACPI
+ 	select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
+ 	select CPU_FREQ_GOV_PERFORMANCE
+-	select CPU_FREQ_GOV_SCHEDUTIL if SMP
+ 	help
+ 	  This driver provides a P state for Intel core processors.
+ 	  The driver implements an internal governor and will become
+@@ -39,7 +38,6 @@ config X86_AMD_PSTATE
+ 	depends on X86 && ACPI
+ 	select ACPI_PROCESSOR
+ 	select ACPI_CPPC_LIB if X86_64
+-	select CPU_FREQ_GOV_SCHEDUTIL if SMP
+ 	help
+ 	  This driver adds a CPUFreq driver which utilizes a fine grain
+ 	  processor performance frequency control range instead of legacy
+diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
+index 04fc786dd2c0..f98c9438760c 100644
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
+ 		return policy->transition_delay_us;
+ 
+ 	latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+-	if (latency) {
+-		unsigned int max_delay_us = 2 * MSEC_PER_SEC;
++	if (latency)
++		/* Give a 50% breathing room between updates */
++		return latency + (latency >> 1);
+ 
+-		/*
+-		 * If the platform already has high transition_latency, use it
+-		 * as-is.
+-		 */
+-		if (latency > max_delay_us)
+-			return latency;
+-
+-		/*
+-		 * For platforms that can change the frequency very fast (< 2
+-		 * us), the above formula gives a decent transition delay. But
+-		 * for platforms where transition_latency is in milliseconds, it
+-		 * ends up giving unrealistic values.
+-		 *
+-		 * Cap the default transition delay to 2 ms, which seems to be
+-		 * a reasonable amount of time after which we should reevaluate
+-		 * the frequency.
+-		 */
+-		return min(latency * LATENCY_MULTIPLIER, max_delay_us);
+-	}
+-
+-	return LATENCY_MULTIPLIER;
++	return USEC_PER_MSEC;
+ }
+ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us);
+ 
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index 949ead440da9..348a330678bd 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -3568,6 +3568,8 @@ static int __init intel_pstate_setup(char *str)
+ 
+ 	if (!strcmp(str, "disable"))
+ 		no_load = 1;
++	else if (!strcmp(str, "enable"))
++		no_load = 0;
+ 	else if (!strcmp(str, "active"))
+ 		default_driver = &intel_pstate;
+ 	else if (!strcmp(str, "passive"))
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 137a88b8de45..233c17537492 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -162,6 +162,7 @@ struct amdgpu_watchdog_timer {
+  */
+ extern int amdgpu_modeset;
+ extern unsigned int amdgpu_vram_limit;
++extern int amdgpu_ignore_min_pcap;
+ extern int amdgpu_vis_vram_limit;
+ extern int amdgpu_gart_size;
+ extern int amdgpu_gtt_size;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index e2382566af44..9c3b7b027485 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -135,6 +135,7 @@ enum AMDGPU_DEBUG_MASK {
+ };
+ 
+ unsigned int amdgpu_vram_limit = UINT_MAX;
++int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */
+ int amdgpu_vis_vram_limit;
+ int amdgpu_gart_size = -1; /* auto */
+ int amdgpu_gtt_size = -1; /* auto */
+@@ -248,6 +249,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
+ 	.period = 0x0, /* default to 0x0 (timeout disable) */
+ };
+ 
++/**
++ * DOC: ignore_min_pcap (int)
++ * Ignore the minimum power cap.
++ * Useful on graphics cards where the minimum power cap is very high.
++ * The default is 0 (Do not ignore).
++ */
++MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap");
++module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600);
++
+ /**
+  * DOC: vramlimit (int)
+  * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
+diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
+index df17e79c45c7..e454488c1a31 100644
+--- a/drivers/gpu/drm/amd/display/Kconfig
++++ b/drivers/gpu/drm/amd/display/Kconfig
+@@ -53,4 +53,10 @@ config DRM_AMD_SECURE_DISPLAY
+ 	  This option enables the calculation of crc of specific region via
+ 	  debugfs. Cooperate with specific DMCU FW.
+ 
++config AMD_PRIVATE_COLOR
++	bool "Enable KMS color management by AMD for AMD"
++	default n
++	help
++	  This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck.
++
+ endmenu
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 4f19e9736a67..575fdcfb138c 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -4445,7 +4445,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
+ 		return r;
+ 	}
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ 	if (amdgpu_dm_create_color_properties(adev)) {
+ 		dc_state_release(state->context);
+ 		kfree(state);
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+index ebabfe3a512f..4d3ebcaacca1 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+@@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+ 	return val;
+ }
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ /* Pre-defined Transfer Functions (TF)
+  *
+  * AMD driver supports pre-defined mathematical functions for transferring
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+index 99014339aaa3..222f72b4c44f 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+@@ -426,7 +426,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
+ }
+ #endif
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ /**
+  * dm_crtc_additional_color_mgmt - enable additional color properties
+  * @crtc: DRM CRTC
+@@ -508,7 +508,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
+ #if defined(CONFIG_DEBUG_FS)
+ 	.late_register = amdgpu_dm_crtc_late_register,
+ #endif
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ 	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+ 	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+ #endif
+@@ -687,7 +687,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
+ 
+ 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ 	dm_crtc_additional_color_mgmt(&acrtc->base);
+ #endif
+ 	return 0;
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+index a573a6639898..52e0e42e26a5 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+@@ -1569,7 +1569,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
+ 	drm_atomic_helper_plane_destroy_state(plane, state);
+ }
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ static void
+ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+ 					     struct drm_plane *plane)
+@@ -1760,7 +1760,7 @@ static const struct drm_plane_funcs dm_plane_funcs = {
+ 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
+ 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
+ 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ 	.atomic_set_property = dm_atomic_plane_set_property,
+ 	.atomic_get_property = dm_atomic_plane_get_property,
+ #endif
+@@ -1853,7 +1853,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
+ 
+ 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+ 
+-#ifdef AMD_PRIVATE_COLOR
++#ifdef CONFIG_AMD_PRIVATE_COLOR
+ 	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+ #endif
+ 	/* Create (reset) the plane state */
+diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+index d5d6ab484e5a..dccba7bcdf97 100644
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -3272,6 +3272,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
+ 					 struct device_attribute *attr,
+ 					 char *buf)
+ {
++	if (amdgpu_ignore_min_pcap)
++		return sysfs_emit(buf, "%i\n", 0);
++
+ 	return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN);
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+index 87672ca714de..21442469791c 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -2762,7 +2762,10 @@ int smu_get_power_limit(void *handle,
+ 			*limit = smu->max_power_limit;
+ 			break;
+ 		case SMU_PPT_LIMIT_MIN:
+-			*limit = smu->min_power_limit;
++			if (amdgpu_ignore_min_pcap)
++				*limit = 0;
++			else
++				*limit = smu->min_power_limit;
+ 			break;
+ 		default:
+ 			return -EINVAL;
+@@ -2786,7 +2789,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit)
+ 		if (smu->ppt_funcs->set_power_limit)
+ 			return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
+ 
+-	if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
++	if (amdgpu_ignore_min_pcap) {
++		if ((limit > smu->max_power_limit)) {
++			dev_err(smu->adev->dev,
++				"New power limit (%d) is over the max allowed %d\n",
++				limit, smu->max_power_limit);
++			return -EINVAL;
++		}
++	} else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
+ 		dev_err(smu->adev->dev,
+ 			"New power limit (%d) is out of range [%d,%d]\n",
+ 			limit, smu->min_power_limit, smu->max_power_limit);
+diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
+index a22f9125322a..44d863e0175e 100644
+--- a/drivers/i2c/busses/Kconfig
++++ b/drivers/i2c/busses/Kconfig
+@@ -240,6 +240,15 @@ config I2C_CHT_WC
+ 	  combined with a FUSB302 Type-C port-controller as such it is advised
+ 	  to also select CONFIG_TYPEC_FUSB302=m.
+ 
++config I2C_NCT6775
++	tristate "Nuvoton NCT6775 and compatible SMBus controller"
++	help
++		If you say yes to this option, support will be included for the
++		Nuvoton NCT6775 and compatible SMBus controllers.
++
++		This driver can also be built as a module.  If so, the module
++		will be called i2c-nct6775.
++
+ config I2C_NFORCE2
+ 	tristate "Nvidia nForce2, nForce3 and nForce4"
+ 	depends on PCI && HAS_IOPORT
+diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
+index 78d0561339e5..9ea3a294f9f0 100644
+--- a/drivers/i2c/busses/Makefile
++++ b/drivers/i2c/busses/Makefile
+@@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_CHT_WC)	+= i2c-cht-wc.o
+ obj-$(CONFIG_I2C_I801)		+= i2c-i801.o
+ obj-$(CONFIG_I2C_ISCH)		+= i2c-isch.o
+ obj-$(CONFIG_I2C_ISMT)		+= i2c-ismt.o
++obj-$(CONFIG_I2C_NCT6775)   += i2c-nct6775.o
+ obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
+ obj-$(CONFIG_I2C_NFORCE2_S4985)	+= i2c-nforce2-s4985.o
+ obj-$(CONFIG_I2C_NVIDIA_GPU)	+= i2c-nvidia-gpu.o
+diff --git a/drivers/i2c/busses/i2c-nct6775.c b/drivers/i2c/busses/i2c-nct6775.c
+new file mode 100644
+index 000000000000..fdbd9a1c8d7a
+--- /dev/null
++++ b/drivers/i2c/busses/i2c-nct6775.c
+@@ -0,0 +1,648 @@
++/*
++ * i2c-nct6775 - Driver for the SMBus master functionality of
++ *	       Nuvoton NCT677x Super-I/O chips
++ *
++ * Copyright (C) 2019  Adam Honse <calcprogrammer1@gmail.com>
++ *
++ * Derived from nct6775 hwmon driver
++ * Copyright (C) 2012  Guenter Roeck <linux@roeck-us.net>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/jiffies.h>
++#include <linux/platform_device.h>
++#include <linux/hwmon.h>
++#include <linux/hwmon-sysfs.h>
++#include <linux/hwmon-vid.h>
++#include <linux/err.h>
++#include <linux/mutex.h>
++#include <linux/delay.h>
++#include <linux/ioport.h>
++#include <linux/i2c.h>
++#include <linux/acpi.h>
++#include <linux/bitops.h>
++#include <linux/dmi.h>
++#include <linux/io.h>
++#include <linux/nospec.h>
++
++#define DRVNAME "i2c-nct6775"
++
++/* Nuvoton SMBus address offsets */
++#define SMBHSTDAT       (0 + nuvoton_nct6793d_smba)
++#define SMBBLKSZ        (1 + nuvoton_nct6793d_smba)
++#define SMBHSTCMD       (2 + nuvoton_nct6793d_smba)
++#define SMBHSTIDX       (3 + nuvoton_nct6793d_smba)  //Index field is the Command field on other controllers
++#define SMBHSTCTL       (4 + nuvoton_nct6793d_smba)
++#define SMBHSTADD       (5 + nuvoton_nct6793d_smba)
++#define SMBHSTERR       (9 + nuvoton_nct6793d_smba)
++#define SMBHSTSTS       (0xE + nuvoton_nct6793d_smba)
++
++/* Command register */
++#define NCT6793D_READ_BYTE      0
++#define NCT6793D_READ_WORD      1
++#define NCT6793D_READ_BLOCK     2
++#define NCT6793D_BLOCK_WRITE_READ_PROC_CALL 3
++#define NCT6793D_PROC_CALL      4
++#define NCT6793D_WRITE_BYTE     8
++#define NCT6793D_WRITE_WORD     9
++#define NCT6793D_WRITE_BLOCK    10
++
++/* Control register */
++#define NCT6793D_MANUAL_START   128
++#define NCT6793D_SOFT_RESET     64
++
++/* Error register */
++#define NCT6793D_NO_ACK         32
++
++/* Status register */
++#define NCT6793D_FIFO_EMPTY     1
++#define NCT6793D_FIFO_FULL      2
++#define NCT6793D_MANUAL_ACTIVE  4
++
++#define NCT6775_LD_SMBUS		0x0B
++
++/* Other settings */
++#define MAX_RETRIES		400
++
++enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
++	     nct6795, nct6796, nct6798 };
++
++struct nct6775_sio_data {
++	int sioreg;
++	enum kinds kind;
++};
++
++/* used to set data->name = nct6775_device_names[data->sio_kind] */
++static const char * const nct6775_device_names[] = {
++	"nct6106",
++	"nct6775",
++	"nct6776",
++	"nct6779",
++	"nct6791",
++	"nct6792",
++	"nct6793",
++	"nct6795",
++	"nct6796",
++	"nct6798",
++};
++
++static const char * const nct6775_sio_names[] __initconst = {
++	"NCT6106D",
++	"NCT6775F",
++	"NCT6776D/F",
++	"NCT6779D",
++	"NCT6791D",
++	"NCT6792D",
++	"NCT6793D",
++	"NCT6795D",
++	"NCT6796D",
++	"NCT6798D",
++};
++
++#define SIO_REG_LDSEL		0x07	/* Logical device select */
++#define SIO_REG_DEVID		0x20	/* Device ID (2 bytes) */
++#define SIO_REG_SMBA		0x62	/* SMBus base address register */
++
++#define SIO_NCT6106_ID		0xc450
++#define SIO_NCT6775_ID		0xb470
++#define SIO_NCT6776_ID		0xc330
++#define SIO_NCT6779_ID		0xc560
++#define SIO_NCT6791_ID		0xc800
++#define SIO_NCT6792_ID		0xc910
++#define SIO_NCT6793_ID		0xd120
++#define SIO_NCT6795_ID		0xd350
++#define SIO_NCT6796_ID		0xd420
++#define SIO_NCT6798_ID		0xd428
++#define SIO_ID_MASK			0xFFF0
++
++static inline void
++superio_outb(int ioreg, int reg, int val)
++{
++	outb(reg, ioreg);
++	outb(val, ioreg + 1);
++}
++
++static inline int
++superio_inb(int ioreg, int reg)
++{
++	outb(reg, ioreg);
++	return inb(ioreg + 1);
++}
++
++static inline void
++superio_select(int ioreg, int ld)
++{
++	outb(SIO_REG_LDSEL, ioreg);
++	outb(ld, ioreg + 1);
++}
++
++static inline int
++superio_enter(int ioreg)
++{
++	/*
++	 * Try to reserve <ioreg> and <ioreg + 1> for exclusive access.
++	 */
++	if (!request_muxed_region(ioreg, 2, DRVNAME))
++		return -EBUSY;
++
++	outb(0x87, ioreg);
++	outb(0x87, ioreg);
++
++	return 0;
++}
++
++static inline void
++superio_exit(int ioreg)
++{
++	outb(0xaa, ioreg);
++	outb(0x02, ioreg);
++	outb(0x02, ioreg + 1);
++	release_region(ioreg, 2);
++}
++
++/*
++ * ISA constants
++ */
++
++#define IOREGION_ALIGNMENT	(~7)
++#define IOREGION_LENGTH		2
++#define ADDR_REG_OFFSET		0
++#define DATA_REG_OFFSET		1
++
++#define NCT6775_REG_BANK	0x4E
++#define NCT6775_REG_CONFIG	0x40
++
++static struct i2c_adapter *nct6775_adapter;
++
++struct i2c_nct6775_adapdata {
++	unsigned short smba;
++};
++
++/* Return negative errno on error. */
++static s32 nct6775_access(struct i2c_adapter * adap, u16 addr,
++		 unsigned short flags, char read_write,
++		 u8 command, int size, union i2c_smbus_data * data)
++{
++	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
++	unsigned short nuvoton_nct6793d_smba = adapdata->smba;
++	int i, len, cnt;
++	union i2c_smbus_data tmp_data;
++	int timeout = 0;
++
++	tmp_data.word = 0;
++	cnt = 0;
++	len = 0;
++
++	outb_p(NCT6793D_SOFT_RESET, SMBHSTCTL);
++
++	switch (size) {
++		case I2C_SMBUS_QUICK:
++			outb_p((addr << 1) | read_write,
++			       SMBHSTADD);
++			break;
++		case I2C_SMBUS_BYTE_DATA:
++			tmp_data.byte = data->byte;
++			fallthrough;
++		case I2C_SMBUS_BYTE:
++			outb_p((addr << 1) | read_write,
++			       SMBHSTADD);
++			outb_p(command, SMBHSTIDX);
++			if (read_write == I2C_SMBUS_WRITE) {
++				outb_p(tmp_data.byte, SMBHSTDAT);
++				outb_p(NCT6793D_WRITE_BYTE, SMBHSTCMD);
++			}
++			else {
++				outb_p(NCT6793D_READ_BYTE, SMBHSTCMD);
++			}
++			break;
++		case I2C_SMBUS_WORD_DATA:
++			outb_p((addr << 1) | read_write,
++			       SMBHSTADD);
++			outb_p(command, SMBHSTIDX);
++			if (read_write == I2C_SMBUS_WRITE) {
++				outb_p(data->word & 0xff, SMBHSTDAT);
++				outb_p((data->word & 0xff00) >> 8, SMBHSTDAT);
++				outb_p(NCT6793D_WRITE_WORD, SMBHSTCMD);
++			}
++			else {
++				outb_p(NCT6793D_READ_WORD, SMBHSTCMD);
++			}
++			break;
++		case I2C_SMBUS_BLOCK_DATA:
++			outb_p((addr << 1) | read_write,
++			       SMBHSTADD);
++			outb_p(command, SMBHSTIDX);
++			if (read_write == I2C_SMBUS_WRITE) {
++				len = data->block[0];
++				if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
++					return -EINVAL;
++				outb_p(len, SMBBLKSZ);
++
++				cnt = 1;
++				if (len >= 4) {
++					for (i = cnt; i <= 4; i++) {
++						outb_p(data->block[i], SMBHSTDAT);
++					}
++
++					len -= 4;
++					cnt += 4;
++				}
++				else {
++					for (i = cnt; i <= len; i++ ) {
++						outb_p(data->block[i], SMBHSTDAT);
++					}
++
++					len = 0;
++				}
++
++				outb_p(NCT6793D_WRITE_BLOCK, SMBHSTCMD);
++			}
++			else {
++				return -ENOTSUPP;
++			}
++			break;
++		default:
++			dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
++			return -EOPNOTSUPP;
++	}
++
++	outb_p(NCT6793D_MANUAL_START, SMBHSTCTL);
++
++	while ((size == I2C_SMBUS_BLOCK_DATA) && (len > 0)) {
++		if (read_write == I2C_SMBUS_WRITE) {
++			timeout = 0;
++			while ((inb_p(SMBHSTSTS) & NCT6793D_FIFO_EMPTY) == 0)
++			{
++				if(timeout > MAX_RETRIES)
++				{
++					return -ETIMEDOUT;
++				}
++				usleep_range(250, 500);
++				timeout++;
++			}
++
++			//Load more bytes into FIFO
++			if (len >= 4) {
++				for (i = cnt; i <= (cnt + 4); i++) {
++					outb_p(data->block[i], SMBHSTDAT);
++				}
++
++				len -= 4;
++				cnt += 4;
++			}
++			else {
++				for (i = cnt; i <= (cnt + len); i++) {
++					outb_p(data->block[i], SMBHSTDAT);
++				}
++
++				len = 0;
++			}
++		}
++		else {
++			return -ENOTSUPP;
++		}
++		
++	}
++
++	//wait for manual mode to complete
++	timeout = 0;
++	while ((inb_p(SMBHSTSTS) & NCT6793D_MANUAL_ACTIVE) != 0)
++	{
++		if(timeout > MAX_RETRIES)
++		{
++			return -ETIMEDOUT;
++		}
++		usleep_range(250, 500);
++		timeout++;
++	}
++
++	if ((inb_p(SMBHSTERR) & NCT6793D_NO_ACK) != 0) {    	
++		return -ENXIO;
++	}
++	else if ((read_write == I2C_SMBUS_WRITE) || (size == I2C_SMBUS_QUICK)) {
++		return 0;
++	}
++
++	switch (size) {
++		case I2C_SMBUS_QUICK:
++		case I2C_SMBUS_BYTE_DATA:
++			data->byte = inb_p(SMBHSTDAT);
++			break;
++		case I2C_SMBUS_WORD_DATA:
++			data->word = inb_p(SMBHSTDAT) + (inb_p(SMBHSTDAT) << 8);
++			break;
++	}
++	return 0;
++}
++
++static u32 nct6775_func(struct i2c_adapter *adapter)
++{
++	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
++	    I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
++	    I2C_FUNC_SMBUS_BLOCK_DATA;
++}
++
++static const struct i2c_algorithm smbus_algorithm = {
++	.smbus_xfer	= nct6775_access,
++	.functionality	= nct6775_func,
++};
++
++static int nct6775_add_adapter(unsigned short smba, const char *name, struct i2c_adapter **padap)
++{
++	struct i2c_adapter *adap;
++	struct i2c_nct6775_adapdata *adapdata;
++	int retval;
++
++	adap = kzalloc(sizeof(*adap), GFP_KERNEL);
++	if (adap == NULL) {
++		return -ENOMEM;
++	}
++
++	adap->owner = THIS_MODULE;
++	adap->class = I2C_CLASS_HWMON;
++	adap->algo = &smbus_algorithm;
++
++	adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
++	if (adapdata == NULL) {
++		kfree(adap);
++		return -ENOMEM;
++	}
++
++	adapdata->smba = smba;
++
++	snprintf(adap->name, sizeof(adap->name),
++		"SMBus NCT67xx adapter%s at %04x", name, smba);
++
++	i2c_set_adapdata(adap, adapdata);
++
++	retval = i2c_add_adapter(adap);
++	if (retval) {
++		kfree(adapdata);
++		kfree(adap);
++		return retval;
++	}
++
++	*padap = adap;
++	return 0;
++}
++
++static void nct6775_remove_adapter(struct i2c_adapter *adap)
++{
++	struct i2c_nct6775_adapdata *adapdata = i2c_get_adapdata(adap);
++
++	if (adapdata->smba) {
++		i2c_del_adapter(adap);
++		kfree(adapdata);
++		kfree(adap);
++	}
++}
++
++//static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
++
++/*
++ * when Super-I/O functions move to a separate file, the Super-I/O
++ * bus will manage the lifetime of the device and this module will only keep
++ * track of the nct6775 driver. But since we use platform_device_alloc(), we
++ * must keep track of the device
++ */
++static struct platform_device *pdev[2];
++
++static int nct6775_probe(struct platform_device *pdev)
++{
++	struct device *dev = &pdev->dev;
++	struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
++	struct resource *res;
++
++	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
++	if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH,
++				 DRVNAME))
++		return -EBUSY;
++
++	switch (sio_data->kind) {
++	case nct6791:
++	case nct6792:
++	case nct6793:
++	case nct6795:
++	case nct6796:
++	case nct6798:
++		nct6775_add_adapter(res->start, "", &nct6775_adapter);
++		break;
++	default:
++		return -ENODEV;
++	}
++
++	return 0;
++}
++/*
++static void nct6791_enable_io_mapping(int sioaddr)
++{
++	int val;
++
++	val = superio_inb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE);
++	if (val & 0x10) {
++		pr_info("Enabling hardware monitor logical device mappings.\n");
++		superio_outb(sioaddr, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE,
++			     val & ~0x10);
++	}
++}*/
++
++static struct platform_driver i2c_nct6775_driver = {
++	.driver = {
++		.name	= DRVNAME,
++//		.pm	= &nct6775_dev_pm_ops,
++	},
++	.probe		= nct6775_probe,
++};
++
++static void __exit i2c_nct6775_exit(void)
++{
++	int i;
++
++	if(nct6775_adapter)
++		nct6775_remove_adapter(nct6775_adapter);
++
++	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
++		if (pdev[i])
++			platform_device_unregister(pdev[i]);
++	}
++	platform_driver_unregister(&i2c_nct6775_driver);
++}
++
++/* nct6775_find() looks for a '627 in the Super-I/O config space */
++static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
++{
++	u16 val;
++	int err;
++	int addr;
++
++	err = superio_enter(sioaddr);
++	if (err)
++		return err;
++
++	val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) |
++		superio_inb(sioaddr, SIO_REG_DEVID + 1);
++
++	switch (val & SIO_ID_MASK) {
++	case SIO_NCT6106_ID:
++		sio_data->kind = nct6106;
++		break;
++	case SIO_NCT6775_ID:
++		sio_data->kind = nct6775;
++		break;
++	case SIO_NCT6776_ID:
++		sio_data->kind = nct6776;
++		break;
++	case SIO_NCT6779_ID:
++		sio_data->kind = nct6779;
++		break;
++	case SIO_NCT6791_ID:
++		sio_data->kind = nct6791;
++		break;
++	case SIO_NCT6792_ID:
++		sio_data->kind = nct6792;
++		break;
++	case SIO_NCT6793_ID:
++		sio_data->kind = nct6793;
++		break;
++	case SIO_NCT6795_ID:
++		sio_data->kind = nct6795;
++		break;
++	case SIO_NCT6796_ID:
++		sio_data->kind = nct6796;
++		break;
++	case SIO_NCT6798_ID:
++		sio_data->kind = nct6798;
++		break;
++	default:
++		if (val != 0xffff)
++			pr_debug("unsupported chip ID: 0x%04x\n", val);
++		superio_exit(sioaddr);
++		return -ENODEV;
++	}
++
++	/* We have a known chip, find the SMBus I/O address */
++	superio_select(sioaddr, NCT6775_LD_SMBUS);
++	val = (superio_inb(sioaddr, SIO_REG_SMBA) << 8)
++	    | superio_inb(sioaddr, SIO_REG_SMBA + 1);
++	addr = val & IOREGION_ALIGNMENT;
++	if (addr == 0) {
++		pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n");
++		superio_exit(sioaddr);
++		return -ENODEV;
++	}
++
++	//if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
++	//    sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
++	//    sio_data->kind == nct6796)
++	//	nct6791_enable_io_mapping(sioaddr);
++
++	superio_exit(sioaddr);
++	pr_info("Found %s or compatible chip at %#x:%#x\n",
++		nct6775_sio_names[sio_data->kind], sioaddr, addr);
++	sio_data->sioreg = sioaddr;
++
++	return addr;
++}
++
++static int __init i2c_nct6775_init(void)
++{
++	int i, err;
++	bool found = false;
++	int address;
++	struct resource res;
++	struct nct6775_sio_data sio_data;
++	int sioaddr[2] = { 0x2e, 0x4e };
++
++	err = platform_driver_register(&i2c_nct6775_driver);
++	if (err)
++		return err;
++
++	/*
++	 * initialize sio_data->kind and sio_data->sioreg.
++	 *
++	 * when Super-I/O functions move to a separate file, the Super-I/O
++	 * driver will probe 0x2e and 0x4e and auto-detect the presence of a
++	 * nct6775 hardware monitor, and call probe()
++	 */
++	for (i = 0; i < ARRAY_SIZE(pdev); i++) {
++		address = nct6775_find(sioaddr[i], &sio_data);
++		if (address <= 0)
++			continue;
++
++		found = true;
++
++		pdev[i] = platform_device_alloc(DRVNAME, address);
++		if (!pdev[i]) {
++			err = -ENOMEM;
++			goto exit_device_unregister;
++		}
++
++		err = platform_device_add_data(pdev[i], &sio_data,
++					       sizeof(struct nct6775_sio_data));
++		if (err)
++			goto exit_device_put;
++
++		memset(&res, 0, sizeof(res));
++		res.name = DRVNAME;
++		res.start = address;
++		res.end = address + IOREGION_LENGTH - 1;
++		res.flags = IORESOURCE_IO;
++
++		err = acpi_check_resource_conflict(&res);
++		if (err) {
++			platform_device_put(pdev[i]);
++			pdev[i] = NULL;
++			continue;
++		}
++
++		err = platform_device_add_resources(pdev[i], &res, 1);
++		if (err)
++			goto exit_device_put;
++
++		/* platform_device_add calls probe() */
++		err = platform_device_add(pdev[i]);
++		if (err)
++			goto exit_device_put;
++	}
++	if (!found) {
++		err = -ENODEV;
++		goto exit_unregister;
++	}
++
++	return 0;
++
++exit_device_put:
++	platform_device_put(pdev[i]);
++exit_device_unregister:
++	while (--i >= 0) {
++		if (pdev[i])
++			platform_device_unregister(pdev[i]);
++	}
++exit_unregister:
++	platform_driver_unregister(&i2c_nct6775_driver);
++	return err;
++}
++
++MODULE_AUTHOR("Adam Honse <calcprogrammer1@gmail.com>");
++MODULE_DESCRIPTION("SMBus driver for NCT6775F and compatible chips");
++MODULE_LICENSE("GPL");
++
++module_init(i2c_nct6775_init);
++module_exit(i2c_nct6775_exit);
+diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
+index 4e32d57ae0bf..a2deb7379904 100644
+--- a/drivers/i2c/busses/i2c-piix4.c
++++ b/drivers/i2c/busses/i2c-piix4.c
+@@ -569,11 +569,11 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter)
+ 	if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */
+ 		usleep_range(2000, 2100);
+ 	else
+-		usleep_range(250, 500);
++		usleep_range(25, 50);
+ 
+ 	while ((++timeout < MAX_TIMEOUT) &&
+ 	       ((temp = inb_p(SMBHSTSTS)) & 0x01))
+-		usleep_range(250, 500);
++		usleep_range(25, 50);
+ 
+ 	/* If the SMBus is still busy, we give up */
+ 	if (timeout == MAX_TIMEOUT) {
+diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
+index a8ce3d140722..49729cf8c12f 100644
+--- a/drivers/input/evdev.c
++++ b/drivers/input/evdev.c
+@@ -46,6 +46,7 @@ struct evdev_client {
+ 	struct fasync_struct *fasync;
+ 	struct evdev *evdev;
+ 	struct list_head node;
++	struct rcu_head rcu;
+ 	enum input_clock_type clk_type;
+ 	bool revoked;
+ 	unsigned long *evmasks[EV_CNT];
+@@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev,
+ 	spin_unlock(&evdev->client_lock);
+ }
+ 
++static void evdev_reclaim_client(struct rcu_head *rp)
++{
++	struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
++	unsigned int i;
++	for (i = 0; i < EV_CNT; ++i)
++		bitmap_free(client->evmasks[i]);
++	kvfree(client);
++}
++
+ static void evdev_detach_client(struct evdev *evdev,
+ 				struct evdev_client *client)
+ {
+ 	spin_lock(&evdev->client_lock);
+ 	list_del_rcu(&client->node);
+ 	spin_unlock(&evdev->client_lock);
+-	synchronize_rcu();
++	call_rcu(&client->rcu, evdev_reclaim_client);
+ }
+ 
+ static int evdev_open_device(struct evdev *evdev)
+@@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file)
+ {
+ 	struct evdev_client *client = file->private_data;
+ 	struct evdev *evdev = client->evdev;
+-	unsigned int i;
+ 
+ 	mutex_lock(&evdev->mutex);
+ 
+@@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file)
+ 
+ 	evdev_detach_client(evdev, client);
+ 
+-	for (i = 0; i < EV_CNT; ++i)
+-		bitmap_free(client->evmasks[i]);
+-
+-	kvfree(client);
+-
+ 	evdev_close_device(evdev);
+ 
+ 	return 0;
+@@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file)
+ 
+  err_free_client:
+ 	evdev_detach_client(evdev, client);
+-	kvfree(client);
+ 	return error;
+ }
+ 
+diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
+index 348b4b26c272..708405b16687 100644
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -3310,6 +3310,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ 			goto bad;
+ 	}
+ 
++#ifdef CONFIG_CACHY
++	set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
++	set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
++#endif
++
+ 	ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
+ 	if (ret < 0)
+ 		goto bad;
+diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
+index 331b8e535e5b..80dabeebf580 100644
+--- a/drivers/media/v4l2-core/Kconfig
++++ b/drivers/media/v4l2-core/Kconfig
+@@ -40,6 +40,11 @@ config VIDEO_TUNER
+ config V4L2_JPEG_HELPER
+ 	tristate
+ 
++config V4L2_LOOPBACK
++	tristate "V4L2 loopback device"
++	help
++	  V4L2 loopback device
++
+ # Used by drivers that need v4l2-h264.ko
+ config V4L2_H264
+ 	tristate
+diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
+index 2177b9d63a8f..c179507cedc4 100644
+--- a/drivers/media/v4l2-core/Makefile
++++ b/drivers/media/v4l2-core/Makefile
+@@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o
+ obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o
+ obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o
+ 
++obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o
++
+ obj-$(CONFIG_VIDEO_TUNER) += tuner.o
+ obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o
+diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c
+new file mode 100644
+index 000000000000..25cb1beb26e5
+--- /dev/null
++++ b/drivers/media/v4l2-core/v4l2loopback.c
+@@ -0,0 +1,3184 @@
++/* -*- c-file-style: "linux" -*- */
++/*
++ * v4l2loopback.c  --  video4linux2 loopback driver
++ *
++ * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com)
++ * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at)
++ * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de)
++ * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ */
++#include <linux/version.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/time.h>
++#include <linux/module.h>
++#include <linux/videodev2.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/capability.h>
++#include <linux/eventpoll.h>
++#include <media/v4l2-ioctl.h>
++#include <media/v4l2-common.h>
++#include <media/v4l2-device.h>
++#include <media/v4l2-ctrls.h>
++#include <media/v4l2-event.h>
++
++#include <linux/miscdevice.h>
++#include "v4l2loopback.h"
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
++#error This module is not supported on kernels before 4.0.0.
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
++#define strscpy strlcpy
++#endif
++
++#if defined(timer_setup) && defined(from_timer)
++#define HAVE_TIMER_SETUP
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0)
++#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER
++#endif
++
++#define V4L2LOOPBACK_VERSION_CODE                                              \
++	KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \
++		       V4L2LOOPBACK_VERSION_BUGFIX)
++
++MODULE_DESCRIPTION("V4L2 loopback video device");
++MODULE_AUTHOR("Vasily Levin, "
++	      "IOhannes m zmoelnig <zmoelnig@iem.at>,"
++	      "Stefan Diewald,"
++	      "Anton Novikov"
++	      "et al.");
++#ifdef SNAPSHOT_VERSION
++MODULE_VERSION(__stringify(SNAPSHOT_VERSION));
++#else
++MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify(
++	V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX));
++#endif
++MODULE_LICENSE("GPL");
++
++/*
++ * helpers
++ */
++#define dprintk(fmt, args...)                                          \
++	do {                                                           \
++		if (debug > 0) {                                       \
++			printk(KERN_INFO "v4l2-loopback[" __stringify( \
++				       __LINE__) "], pid(%d):  " fmt,  \
++			       task_pid_nr(current), ##args);          \
++		}                                                      \
++	} while (0)
++
++#define MARK()                                                             \
++	do {                                                               \
++		if (debug > 1) {                                           \
++			printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \
++			       __LINE__, __func__, task_pid_nr(current));  \
++		}                                                          \
++	} while (0)
++
++#define dprintkrw(fmt, args...)                                        \
++	do {                                                           \
++		if (debug > 2) {                                       \
++			printk(KERN_INFO "v4l2-loopback[" __stringify( \
++				       __LINE__) "], pid(%d): " fmt,   \
++			       task_pid_nr(current), ##args);          \
++		}                                                      \
++	} while (0)
++
++static inline void v4l2l_get_timestamp(struct v4l2_buffer *b)
++{
++	struct timespec64 ts;
++	ktime_get_ts64(&ts);
++
++	b->timestamp.tv_sec = ts.tv_sec;
++	b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC);
++	b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
++}
++
++#if BITS_PER_LONG == 32
++#include <asm/div64.h> /* do_div() for 64bit division */
++static inline int v4l2l_mod64(const s64 A, const u32 B)
++{
++	u64 a = (u64)A;
++	u32 b = B;
++
++	if (A > 0)
++		return do_div(a, b);
++	a = -A;
++	return -do_div(a, b);
++}
++#else
++static inline int v4l2l_mod64(const s64 A, const u32 B)
++{
++	return A % B;
++}
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
++typedef unsigned __poll_t;
++#endif
++
++/* module constants
++ *  can be overridden during he build process using something like
++ *	make KCPPFLAGS="-DMAX_DEVICES=100"
++ */
++
++/* maximum number of v4l2loopback devices that can be created */
++#ifndef MAX_DEVICES
++#define MAX_DEVICES 8
++#endif
++
++/* whether the default is to announce capabilities exclusively or not */
++#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
++#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0
++#endif
++
++/* when a producer is considered to have gone stale */
++#ifndef MAX_TIMEOUT
++#define MAX_TIMEOUT (100 * 1000) /* in msecs */
++#endif
++
++/* max buffers that can be mapped, actually they
++ * are all mapped to max_buffers buffers */
++#ifndef MAX_BUFFERS
++#define MAX_BUFFERS 32
++#endif
++
++/* module parameters */
++static int debug = 0;
++module_param(debug, int, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)");
++
++#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2
++static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS;
++module_param(max_buffers, int, S_IRUGO);
++MODULE_PARM_DESC(max_buffers,
++		 "how many buffers should be allocated [DEFAULT: " __stringify(
++			 V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]");
++
++/* how many times a device can be opened
++ * the per-module default value can be overridden on a per-device basis using
++ * the /sys/devices interface
++ *
++ * note that max_openers should be at least 2 in order to get a working system:
++ *   one opener for the producer and one opener for the consumer
++ *   however, we leave that to the user
++ */
++#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10
++static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS;
++module_param(max_openers, int, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(
++	max_openers,
++	"how many users can open the loopback device [DEFAULT: " __stringify(
++		V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]");
++
++static int devices = -1;
++module_param(devices, int, 0);
++MODULE_PARM_DESC(devices, "how many devices should be created");
++
++static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 };
++module_param_array(video_nr, int, NULL, 0444);
++MODULE_PARM_DESC(video_nr,
++		 "video device numbers (-1=auto, 0=/dev/video0, etc.)");
++
++static char *card_label[MAX_DEVICES];
++module_param_array(card_label, charp, NULL, 0000);
++MODULE_PARM_DESC(card_label, "card labels for each device");
++
++static bool exclusive_caps[MAX_DEVICES] = {
++	[0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS
++};
++module_param_array(exclusive_caps, bool, NULL, 0444);
++/* FIXXME: wording */
++MODULE_PARM_DESC(
++	exclusive_caps,
++	"whether to announce OUTPUT/CAPTURE capabilities exclusively or not  [DEFAULT: " __stringify(
++		V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]");
++
++/* format specifications */
++#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2
++#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1
++#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192
++#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192
++
++#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640
++#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480
++
++static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
++module_param(max_width, int, S_IRUGO);
++MODULE_PARM_DESC(max_width,
++		 "maximum allowed frame width [DEFAULT: " __stringify(
++			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]");
++static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
++module_param(max_height, int, S_IRUGO);
++MODULE_PARM_DESC(max_height,
++		 "maximum allowed frame height [DEFAULT: " __stringify(
++			 V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]");
++
++static DEFINE_IDR(v4l2loopback_index_idr);
++static DEFINE_MUTEX(v4l2loopback_ctl_mutex);
++
++/* frame intervals */
++#define V4L2LOOPBACK_FPS_MIN 0
++#define V4L2LOOPBACK_FPS_MAX 1000
++
++/* control IDs */
++#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000)
++#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0)
++#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1)
++#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2)
++#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3)
++
++static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl);
++static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = {
++	.s_ctrl = v4l2loopback_s_ctrl,
++};
++static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = {
++	// clang-format off
++	.ops	= &v4l2loopback_ctrl_ops,
++	.id	= CID_KEEP_FORMAT,
++	.name	= "keep_format",
++	.type	= V4L2_CTRL_TYPE_BOOLEAN,
++	.min	= 0,
++	.max	= 1,
++	.step	= 1,
++	.def	= 0,
++	// clang-format on
++};
++static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = {
++	// clang-format off
++	.ops	= &v4l2loopback_ctrl_ops,
++	.id	= CID_SUSTAIN_FRAMERATE,
++	.name	= "sustain_framerate",
++	.type	= V4L2_CTRL_TYPE_BOOLEAN,
++	.min	= 0,
++	.max	= 1,
++	.step	= 1,
++	.def	= 0,
++	// clang-format on
++};
++static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = {
++	// clang-format off
++	.ops	= &v4l2loopback_ctrl_ops,
++	.id	= CID_TIMEOUT,
++	.name	= "timeout",
++	.type	= V4L2_CTRL_TYPE_INTEGER,
++	.min	= 0,
++	.max	= MAX_TIMEOUT,
++	.step	= 1,
++	.def	= 0,
++	// clang-format on
++};
++static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = {
++	// clang-format off
++	.ops	= &v4l2loopback_ctrl_ops,
++	.id	= CID_TIMEOUT_IMAGE_IO,
++	.name	= "timeout_image_io",
++	.type	= V4L2_CTRL_TYPE_BUTTON,
++	.min	= 0,
++	.max	= 1,
++	.step	= 1,
++	.def	= 0,
++	// clang-format on
++};
++
++/* module structures */
++struct v4l2loopback_private {
++	int device_nr;
++};
++
++/* TODO(vasaka) use typenames which are common to kernel, but first find out if
++ * it is needed */
++/* struct keeping state and settings of loopback device */
++
++struct v4l2l_buffer {
++	struct v4l2_buffer buffer;
++	struct list_head list_head;
++	int use_count;
++};
++
++struct v4l2_loopback_device {
++	struct v4l2_device v4l2_dev;
++	struct v4l2_ctrl_handler ctrl_handler;
++	struct video_device *vdev;
++	/* pixel and stream format */
++	struct v4l2_pix_format pix_format;
++	bool pix_format_has_valid_sizeimage;
++	struct v4l2_captureparm capture_param;
++	unsigned long frame_jiffies;
++
++	/* ctrls */
++	int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all
++			    openers close() the device */
++	int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain
++				  (close to) nominal framerate */
++
++	/* buffers stuff */
++	u8 *image; /* pointer to actual buffers data */
++	unsigned long int imagesize; /* size of buffers data */
++	int buffers_number; /* should not be big, 4 is a good choice */
++	struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */
++	int used_buffers; /* number of the actually used buffers */
++	int max_openers; /* how many times can this device be opened */
++
++	s64 write_position; /* number of last written frame + 1 */
++	struct list_head outbufs_list; /* buffers in output DQBUF order */
++	int bufpos2index
++		[MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers)
++                        * to inner buffer index */
++	long buffer_size;
++
++	/* sustain_framerate stuff */
++	struct timer_list sustain_timer;
++	unsigned int reread_count;
++
++	/* timeout stuff */
++	unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */
++	int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will
++			       * read/write to timeout_image */
++	u8 *timeout_image; /* copy of it will be captured when timeout passes */
++	struct v4l2l_buffer timeout_image_buffer;
++	struct timer_list timeout_timer;
++	int timeout_happened;
++
++	/* sync stuff */
++	atomic_t open_count;
++
++	int ready_for_capture; /* set to the number of writers that opened the
++                                * device and negotiated format. */
++	int ready_for_output; /* set to true when no writer is currently attached
++			       * this differs slightly from !ready_for_capture,
++			       * e.g. when using fallback images */
++	int active_readers; /* increase if any reader starts streaming */
++	int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE)
++                                * should only be announced if the resp. "ready"
++                                * flag is set; default=TRUE */
++
++	int min_width, max_width;
++	int min_height, max_height;
++
++	char card_label[32];
++
++	wait_queue_head_t read_event;
++	spinlock_t lock, list_lock;
++};
++
++/* types of opener shows what opener wants to do with loopback */
++enum opener_type {
++	// clang-format off
++	UNNEGOTIATED	= 0,
++	READER		= 1,
++	WRITER		= 2,
++	// clang-format on
++};
++
++/* struct keeping state and type of opener */
++struct v4l2_loopback_opener {
++	enum opener_type type;
++	s64 read_position; /* number of last processed frame + 1 or
++			    * write_position - 1 if reader went out of sync */
++	unsigned int reread_count;
++	struct v4l2_buffer *buffers;
++	int buffers_number; /* should not be big, 4 is a good choice */
++	int timeout_image_io;
++
++	struct v4l2_fh fh;
++};
++
++#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh)
++
++/* this is heavily inspired by the bttv driver found in the linux kernel */
++struct v4l2l_format {
++	char *name;
++	int fourcc; /* video4linux 2 */
++	int depth; /* bit/pixel */
++	int flags;
++};
++/* set the v4l2l_format.flags to PLANAR for non-packed formats */
++#define FORMAT_FLAGS_PLANAR 0x01
++#define FORMAT_FLAGS_COMPRESSED 0x02
++
++#include "v4l2loopback_formats.h"
++
++#ifndef V4L2_TYPE_IS_CAPTURE
++#define V4L2_TYPE_IS_CAPTURE(type)                \
++	((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \
++	 (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
++#endif /* V4L2_TYPE_IS_CAPTURE */
++#ifndef V4L2_TYPE_IS_OUTPUT
++#define V4L2_TYPE_IS_OUTPUT(type)                \
++	((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \
++	 (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
++#endif /* V4L2_TYPE_IS_OUTPUT */
++
++/* whether the format can be changed */
++/* the format is fixated if we
++   - have writers (ready_for_capture>0)
++   - and/or have readers (active_readers>0)
++*/
++#define V4L2LOOPBACK_IS_FIXED_FMT(device)                               \
++	(device->ready_for_capture > 0 || device->active_readers > 0 || \
++	 device->keep_format)
++
++static const unsigned int FORMATS = ARRAY_SIZE(formats);
++
++static char *fourcc2str(unsigned int fourcc, char buf[4])
++{
++	buf[0] = (fourcc >> 0) & 0xFF;
++	buf[1] = (fourcc >> 8) & 0xFF;
++	buf[2] = (fourcc >> 16) & 0xFF;
++	buf[3] = (fourcc >> 24) & 0xFF;
++
++	return buf;
++}
++
++static const struct v4l2l_format *format_by_fourcc(int fourcc)
++{
++	unsigned int i;
++
++	for (i = 0; i < FORMATS; i++) {
++		if (formats[i].fourcc == fourcc)
++			return formats + i;
++	}
++
++	dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF,
++		(fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF,
++		(fourcc >> 24) & 0xFF);
++	return NULL;
++}
++
++static void pix_format_set_size(struct v4l2_pix_format *f,
++				const struct v4l2l_format *fmt,
++				unsigned int width, unsigned int height)
++{
++	f->width = width;
++	f->height = height;
++
++	if (fmt->flags & FORMAT_FLAGS_PLANAR) {
++		f->bytesperline = width; /* Y plane */
++		f->sizeimage = (width * height * fmt->depth) >> 3;
++	} else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) {
++		/* doesn't make sense for compressed formats */
++		f->bytesperline = 0;
++		f->sizeimage = (width * height * fmt->depth) >> 3;
++	} else {
++		f->bytesperline = (width * fmt->depth) >> 3;
++		f->sizeimage = height * f->bytesperline;
++	}
++}
++
++static int v4l2l_fill_format(struct v4l2_format *fmt, int capture,
++			     const u32 minwidth, const u32 maxwidth,
++			     const u32 minheight, const u32 maxheight)
++{
++	u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height;
++	u32 pixelformat = fmt->fmt.pix.pixelformat;
++	struct v4l2_format fmt0 = *fmt;
++	u32 bytesperline = 0, sizeimage = 0;
++	if (!width)
++		width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
++	if (!height)
++		height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
++	if (width < minwidth)
++		width = minwidth;
++	if (width > maxwidth)
++		width = maxwidth;
++	if (height < minheight)
++		height = minheight;
++	if (height > maxheight)
++		height = maxheight;
++
++	/* sets: width,height,pixelformat,bytesperline,sizeimage */
++	if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) {
++		fmt0.fmt.pix.bytesperline = 0;
++		fmt0.fmt.pix.sizeimage = 0;
++	}
++
++	if (0) {
++		;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
++	} else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width,
++				     height)) {
++		;
++	} else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width,
++					height)) {
++		;
++#endif
++	} else {
++		const struct v4l2l_format *format =
++			format_by_fourcc(pixelformat);
++		if (!format)
++			return -EINVAL;
++		pix_format_set_size(&fmt0.fmt.pix, format, width, height);
++		fmt0.fmt.pix.pixelformat = format->fourcc;
++	}
++
++	if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) {
++		*fmt = fmt0;
++
++		if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) ||
++		    (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3))
++			fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB;
++		if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field)
++			fmt->fmt.pix_mp.field = V4L2_FIELD_NONE;
++		if (capture)
++			fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
++		else
++			fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++	} else {
++		bytesperline = fmt->fmt.pix.bytesperline;
++		sizeimage = fmt->fmt.pix.sizeimage;
++
++		*fmt = fmt0;
++
++		if (!fmt->fmt.pix.bytesperline)
++			fmt->fmt.pix.bytesperline = bytesperline;
++		if (!fmt->fmt.pix.sizeimage)
++			fmt->fmt.pix.sizeimage = sizeimage;
++
++		if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) ||
++		    (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3))
++			fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
++		if (V4L2_FIELD_ANY == fmt->fmt.pix.field)
++			fmt->fmt.pix.field = V4L2_FIELD_NONE;
++		if (capture)
++			fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++		else
++			fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++	}
++
++	return 0;
++}
++
++/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */
++static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt)
++{
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
++	const struct v4l2_format_info *info;
++
++	info = v4l2_format_info(fmt->fmt.pix.pixelformat);
++	if (info && info->mem_planes == 1)
++		return true;
++#endif
++
++	return false;
++}
++
++static int pix_format_eq(const struct v4l2_pix_format *ref,
++			 const struct v4l2_pix_format *tgt, int strict)
++{
++	/* check if the two formats are equivalent.
++	 * ANY fields are handled gracefully
++	 */
++#define _pix_format_eq0(x)    \
++	if (ref->x != tgt->x) \
++	result = 0
++#define _pix_format_eq1(x, def)                              \
++	do {                                                 \
++		if ((def != tgt->x) && (ref->x != tgt->x)) { \
++			printk(KERN_INFO #x " failed");      \
++			result = 0;                          \
++		}                                            \
++	} while (0)
++	int result = 1;
++	_pix_format_eq0(width);
++	_pix_format_eq0(height);
++	_pix_format_eq0(pixelformat);
++	if (!strict)
++		return result;
++	_pix_format_eq1(field, V4L2_FIELD_ANY);
++	_pix_format_eq0(bytesperline);
++	_pix_format_eq0(sizeimage);
++	_pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT);
++	return result;
++}
++
++static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f);
++static int inner_try_setfmt(struct file *file, struct v4l2_format *fmt)
++{
++	int capture = V4L2_TYPE_IS_CAPTURE(fmt->type);
++	struct v4l2_loopback_device *dev;
++	int needschange = 0;
++	char buf[5];
++	buf[4] = 0;
++
++	dev = v4l2loopback_getdevice(file);
++
++	needschange = !(pix_format_eq(&dev->pix_format, &fmt->fmt.pix, 0));
++	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
++		fmt->fmt.pix = dev->pix_format;
++		if (needschange) {
++			if (dev->active_readers > 0 && capture) {
++				/* cannot call fmt_cap while there are readers */
++				return -EBUSY;
++			}
++			if (dev->ready_for_capture > 0 && !capture) {
++				/* cannot call fmt_out while there are writers */
++				return -EBUSY;
++			}
++		}
++	}
++	if (v4l2l_fill_format(fmt, capture, dev->min_width, dev->max_width,
++			      dev->min_height, dev->max_height) != 0) {
++		return -EINVAL;
++	}
++
++	if (1) {
++		char buf[5];
++		buf[4] = 0;
++		dprintk("capFOURCC=%s\n",
++			fourcc2str(dev->pix_format.pixelformat, buf));
++	}
++	return 0;
++}
++
++static int set_timeperframe(struct v4l2_loopback_device *dev,
++			    struct v4l2_fract *tpf)
++{
++	if ((tpf->denominator < 1) || (tpf->numerator < 1)) {
++		return -EINVAL;
++	}
++	dev->capture_param.timeperframe = *tpf;
++	dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator /
++					      tpf->denominator);
++	return 0;
++}
++
++static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd);
++
++/* device attributes */
++/* available via sysfs: /sys/devices/virtual/video4linux/video* */
++
++static ssize_t attr_show_format(struct device *cd,
++				struct device_attribute *attr, char *buf)
++{
++	/* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */
++	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
++	const struct v4l2_fract *tpf;
++	char buf4cc[5], buf_fps[32];
++
++	if (!dev || !V4L2LOOPBACK_IS_FIXED_FMT(dev))
++		return 0;
++	tpf = &dev->capture_param.timeperframe;
++
++	fourcc2str(dev->pix_format.pixelformat, buf4cc);
++	buf4cc[4] = 0;
++	if (tpf->numerator == 1)
++		snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator);
++	else
++		snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator,
++			 tpf->numerator);
++	return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width,
++		       dev->pix_format.height, buf_fps);
++}
++
++static ssize_t attr_store_format(struct device *cd,
++				 struct device_attribute *attr, const char *buf,
++				 size_t len)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
++	int fps_num = 0, fps_den = 1;
++
++	if (!dev)
++		return -ENODEV;
++
++	/* only fps changing is supported */
++	if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) {
++		struct v4l2_fract f = { .numerator = fps_den,
++					.denominator = fps_num };
++		int err = 0;
++		if ((err = set_timeperframe(dev, &f)) < 0)
++			return err;
++		return len;
++	}
++	return -EINVAL;
++}
++
++static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format,
++		   attr_store_format);
++
++static ssize_t attr_show_buffers(struct device *cd,
++				 struct device_attribute *attr, char *buf)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
++
++	if (!dev)
++		return -ENODEV;
++
++	return sprintf(buf, "%d\n", dev->used_buffers);
++}
++
++static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL);
++
++static ssize_t attr_show_maxopeners(struct device *cd,
++				    struct device_attribute *attr, char *buf)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
++
++	if (!dev)
++		return -ENODEV;
++
++	return sprintf(buf, "%d\n", dev->max_openers);
++}
++
++static ssize_t attr_store_maxopeners(struct device *cd,
++				     struct device_attribute *attr,
++				     const char *buf, size_t len)
++{
++	struct v4l2_loopback_device *dev = NULL;
++	unsigned long curr = 0;
++
++	if (kstrtoul(buf, 0, &curr))
++		return -EINVAL;
++
++	dev = v4l2loopback_cd2dev(cd);
++	if (!dev)
++		return -ENODEV;
++
++	if (dev->max_openers == curr)
++		return len;
++
++	if (curr > __INT_MAX__ || dev->open_count.counter > curr) {
++		/* request to limit to less openers as are currently attached to us */
++		return -EINVAL;
++	}
++
++	dev->max_openers = (int)curr;
++
++	return len;
++}
++
++static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners,
++		   attr_store_maxopeners);
++
++static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr,
++			       char *buf)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd);
++
++	if (!dev)
++		return -ENODEV;
++
++	if (dev->ready_for_capture)
++		return sprintf(buf, "capture\n");
++	if (dev->ready_for_output)
++		return sprintf(buf, "output\n");
++
++	return -EAGAIN;
++}
++
++static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL);
++
++static void v4l2loopback_remove_sysfs(struct video_device *vdev)
++{
++#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x)
++
++	if (vdev) {
++		V4L2_SYSFS_DESTROY(format);
++		V4L2_SYSFS_DESTROY(buffers);
++		V4L2_SYSFS_DESTROY(max_openers);
++		V4L2_SYSFS_DESTROY(state);
++		/* ... */
++	}
++}
++
++static void v4l2loopback_create_sysfs(struct video_device *vdev)
++{
++	int res = 0;
++
++#define V4L2_SYSFS_CREATE(x)                                 \
++	res = device_create_file(&vdev->dev, &dev_attr_##x); \
++	if (res < 0)                                         \
++	break
++	if (!vdev)
++		return;
++	do {
++		V4L2_SYSFS_CREATE(format);
++		V4L2_SYSFS_CREATE(buffers);
++		V4L2_SYSFS_CREATE(max_openers);
++		V4L2_SYSFS_CREATE(state);
++		/* ... */
++	} while (0);
++
++	if (res >= 0)
++		return;
++	dev_err(&vdev->dev, "%s error: %d\n", __func__, res);
++}
++
++/* Event APIs */
++
++#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START)
++#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000
++#define V4L2_EVENT_PRI_CLIENT_USAGE \
++	(V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1)
++
++struct v4l2_event_client_usage {
++	__u32 count;
++};
++
++/* global module data */
++/* find a device based on it's device-number (e.g. '3' for /dev/video3) */
++struct v4l2loopback_lookup_cb_data {
++	int device_nr;
++	struct v4l2_loopback_device *device;
++};
++static int v4l2loopback_lookup_cb(int id, void *ptr, void *data)
++{
++	struct v4l2_loopback_device *device = ptr;
++	struct v4l2loopback_lookup_cb_data *cbdata = data;
++	if (cbdata && device && device->vdev) {
++		if (device->vdev->num == cbdata->device_nr) {
++			cbdata->device = device;
++			cbdata->device_nr = id;
++			return 1;
++		}
++	}
++	return 0;
++}
++static int v4l2loopback_lookup(int device_nr,
++			       struct v4l2_loopback_device **device)
++{
++	struct v4l2loopback_lookup_cb_data data = {
++		.device_nr = device_nr,
++		.device = NULL,
++	};
++	int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb,
++			       &data);
++	if (1 == err) {
++		if (device)
++			*device = data.device;
++		return data.device_nr;
++	}
++	return -ENODEV;
++}
++static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd)
++{
++	struct video_device *loopdev = to_video_device(cd);
++	struct v4l2loopback_private *ptr =
++		(struct v4l2loopback_private *)video_get_drvdata(loopdev);
++	int nr = ptr->device_nr;
++
++	return idr_find(&v4l2loopback_index_idr, nr);
++}
++
++static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f)
++{
++	struct v4l2loopback_private *ptr = video_drvdata(f);
++	int nr = ptr->device_nr;
++
++	return idr_find(&v4l2loopback_index_idr, nr);
++}
++
++/* forward declarations */
++static void client_usage_queue_event(struct video_device *vdev);
++static void init_buffers(struct v4l2_loopback_device *dev);
++static int allocate_buffers(struct v4l2_loopback_device *dev);
++static void free_buffers(struct v4l2_loopback_device *dev);
++static void try_free_buffers(struct v4l2_loopback_device *dev);
++static int allocate_timeout_image(struct v4l2_loopback_device *dev);
++static void check_timers(struct v4l2_loopback_device *dev);
++static const struct v4l2_file_operations v4l2_loopback_fops;
++static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops;
++
++/* Queue helpers */
++/* next functions sets buffer flags and adjusts counters accordingly */
++static inline void set_done(struct v4l2l_buffer *buffer)
++{
++	buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED;
++	buffer->buffer.flags |= V4L2_BUF_FLAG_DONE;
++}
++
++static inline void set_queued(struct v4l2l_buffer *buffer)
++{
++	buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE;
++	buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED;
++}
++
++static inline void unset_flags(struct v4l2l_buffer *buffer)
++{
++	buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED;
++	buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE;
++}
++
++/* V4L2 ioctl caps and params calls */
++/* returns device capabilities
++ * called on VIDIOC_QUERYCAP
++ */
++static int vidioc_querycap(struct file *file, void *priv,
++			   struct v4l2_capability *cap)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	int device_nr =
++		((struct v4l2loopback_private *)video_get_drvdata(dev->vdev))
++			->device_nr;
++	__u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE;
++
++	strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver));
++	snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label);
++	snprintf(cap->bus_info, sizeof(cap->bus_info),
++		 "platform:v4l2loopback-%03d", device_nr);
++
++	if (dev->announce_all_caps) {
++		capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT;
++	} else {
++		if (dev->ready_for_capture) {
++			capabilities |= V4L2_CAP_VIDEO_CAPTURE;
++		}
++		if (dev->ready_for_output) {
++			capabilities |= V4L2_CAP_VIDEO_OUTPUT;
++		}
++	}
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
++	dev->vdev->device_caps =
++#endif /* >=linux-4.7.0 */
++		cap->device_caps = cap->capabilities = capabilities;
++
++	cap->capabilities |= V4L2_CAP_DEVICE_CAPS;
++
++	memset(cap->reserved, 0, sizeof(cap->reserved));
++	return 0;
++}
++
++static int vidioc_enum_framesizes(struct file *file, void *fh,
++				  struct v4l2_frmsizeenum *argp)
++{
++	struct v4l2_loopback_device *dev;
++
++	/* there can be only one... */
++	if (argp->index)
++		return -EINVAL;
++
++	dev = v4l2loopback_getdevice(file);
++	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
++		/* format has already been negotiated
++		 * cannot change during runtime
++		 */
++		if (argp->pixel_format != dev->pix_format.pixelformat)
++			return -EINVAL;
++
++		argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
++
++		argp->discrete.width = dev->pix_format.width;
++		argp->discrete.height = dev->pix_format.height;
++	} else {
++		/* if the format has not been negotiated yet, we accept anything
++		 */
++		if (NULL == format_by_fourcc(argp->pixel_format))
++			return -EINVAL;
++
++		if (dev->min_width == dev->max_width &&
++		    dev->min_height == dev->max_height) {
++			argp->type = V4L2_FRMSIZE_TYPE_DISCRETE;
++
++			argp->discrete.width = dev->min_width;
++			argp->discrete.height = dev->min_height;
++		} else {
++			argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
++
++			argp->stepwise.min_width = dev->min_width;
++			argp->stepwise.min_height = dev->min_height;
++
++			argp->stepwise.max_width = dev->max_width;
++			argp->stepwise.max_height = dev->max_height;
++
++			argp->stepwise.step_width = 1;
++			argp->stepwise.step_height = 1;
++		}
++	}
++	return 0;
++}
++
++/* returns frameinterval (fps) for the set resolution
++ * called on VIDIOC_ENUM_FRAMEINTERVALS
++ */
++static int vidioc_enum_frameintervals(struct file *file, void *fh,
++				      struct v4l2_frmivalenum *argp)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++
++	/* there can be only one... */
++	if (argp->index)
++		return -EINVAL;
++
++	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
++		if (argp->width != dev->pix_format.width ||
++		    argp->height != dev->pix_format.height ||
++		    argp->pixel_format != dev->pix_format.pixelformat)
++			return -EINVAL;
++
++		argp->type = V4L2_FRMIVAL_TYPE_DISCRETE;
++		argp->discrete = dev->capture_param.timeperframe;
++	} else {
++		if (argp->width < dev->min_width ||
++		    argp->width > dev->max_width ||
++		    argp->height < dev->min_height ||
++		    argp->height > dev->max_height ||
++		    NULL == format_by_fourcc(argp->pixel_format))
++			return -EINVAL;
++
++		argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
++		argp->stepwise.min.numerator = 1;
++		argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX;
++		argp->stepwise.max.numerator = 1;
++		argp->stepwise.max.denominator = V4L2LOOPBACK_FPS_MIN;
++		argp->stepwise.step.numerator = 1;
++		argp->stepwise.step.denominator = 1;
++	}
++
++	return 0;
++}
++
++/* ------------------ CAPTURE ----------------------- */
++
++/* returns device formats
++ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
++ */
++static int vidioc_enum_fmt_cap(struct file *file, void *fh,
++			       struct v4l2_fmtdesc *f)
++{
++	struct v4l2_loopback_device *dev;
++	const struct v4l2l_format *fmt;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++
++	if (f->index)
++		return -EINVAL;
++
++	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
++		/* format has been fixed, so only one single format is supported */
++		const __u32 format = dev->pix_format.pixelformat;
++
++		if ((fmt = format_by_fourcc(format))) {
++			snprintf(f->description, sizeof(f->description), "%s",
++				 fmt->name);
++		} else {
++			snprintf(f->description, sizeof(f->description),
++				 "[%c%c%c%c]", (format >> 0) & 0xFF,
++				 (format >> 8) & 0xFF, (format >> 16) & 0xFF,
++				 (format >> 24) & 0xFF);
++		}
++
++		f->pixelformat = dev->pix_format.pixelformat;
++	} else {
++		return -EINVAL;
++	}
++	f->flags = 0;
++	MARK();
++	return 0;
++}
++
++/* returns current video format
++ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
++ */
++static int vidioc_g_fmt_cap(struct file *file, void *priv,
++			    struct v4l2_format *fmt)
++{
++	struct v4l2_loopback_device *dev;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	if (!dev->ready_for_capture && !dev->ready_for_output)
++		return -EINVAL;
++
++	fmt->fmt.pix = dev->pix_format;
++	MARK();
++	return 0;
++}
++
++/* checks if it is OK to change to format fmt;
++ * actual check is done by inner_try_setfmt
++ * just checking that pixelformat is OK and set other parameters, app should
++ * obey this decision
++ * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
++ */
++static int vidioc_try_fmt_cap(struct file *file, void *priv,
++			      struct v4l2_format *fmt)
++{
++	int ret = 0;
++	if (!V4L2_TYPE_IS_CAPTURE(fmt->type))
++		return -EINVAL;
++	ret = inner_try_setfmt(file, fmt);
++	if (-EBUSY == ret)
++		return 0;
++	return ret;
++}
++
++/* sets new output format, if possible
++ * actually format is set  by input and we even do not check it, just return
++ * current one, but it is possible to set subregions of input TODO(vasaka)
++ * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE
++ */
++static int vidioc_s_fmt_cap(struct file *file, void *priv,
++			    struct v4l2_format *fmt)
++{
++	int ret;
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	if (!V4L2_TYPE_IS_CAPTURE(fmt->type))
++		return -EINVAL;
++	ret = inner_try_setfmt(file, fmt);
++	if (!ret) {
++		dev->pix_format = fmt->fmt.pix;
++	}
++	return ret;
++}
++
++/* ------------------ OUTPUT ----------------------- */
++
++/* returns device formats;
++ * LATER: allow all formats
++ * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
++ */
++static int vidioc_enum_fmt_out(struct file *file, void *fh,
++			       struct v4l2_fmtdesc *f)
++{
++	struct v4l2_loopback_device *dev;
++	const struct v4l2l_format *fmt;
++
++	dev = v4l2loopback_getdevice(file);
++
++	if (V4L2LOOPBACK_IS_FIXED_FMT(dev)) {
++		/* format has been fixed, so only one single format is supported */
++		const __u32 format = dev->pix_format.pixelformat;
++
++		if (f->index)
++			return -EINVAL;
++
++		if ((fmt = format_by_fourcc(format))) {
++			snprintf(f->description, sizeof(f->description), "%s",
++				 fmt->name);
++		} else {
++			snprintf(f->description, sizeof(f->description),
++				 "[%c%c%c%c]", (format >> 0) & 0xFF,
++				 (format >> 8) & 0xFF, (format >> 16) & 0xFF,
++				 (format >> 24) & 0xFF);
++		}
++
++		f->pixelformat = dev->pix_format.pixelformat;
++	} else {
++		/* fill in a dummy format */
++		/* coverity[unsigned_compare] */
++		if (f->index < 0 || f->index >= FORMATS)
++			return -EINVAL;
++
++		fmt = &formats[f->index];
++
++		f->pixelformat = fmt->fourcc;
++		snprintf(f->description, sizeof(f->description), "%s",
++			 fmt->name);
++	}
++	f->flags = 0;
++
++	return 0;
++}
++
++/* returns current video format format fmt */
++/* NOTE: this is called from the producer
++ * so if format has not been negotiated yet,
++ * it should return ALL of available formats,
++ * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
++ */
++static int vidioc_g_fmt_out(struct file *file, void *priv,
++			    struct v4l2_format *fmt)
++{
++	struct v4l2_loopback_device *dev;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++
++	/*
++	 * LATER: this should return the currently valid format
++	 * gstreamer doesn't like it, if this returns -EINVAL, as it
++	 * then concludes that there is _no_ valid format
++	 * CHECK whether this assumption is wrong,
++	 * or whether we have to always provide a valid format
++	 */
++
++	fmt->fmt.pix = dev->pix_format;
++	return 0;
++}
++
++/* checks if it is OK to change to format fmt;
++ * if format is negotiated do not change it
++ * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
++ */
++static int vidioc_try_fmt_out(struct file *file, void *priv,
++			      struct v4l2_format *fmt)
++{
++	int ret = 0;
++	if (!V4L2_TYPE_IS_OUTPUT(fmt->type))
++		return -EINVAL;
++	ret = inner_try_setfmt(file, fmt);
++	if (-EBUSY == ret)
++		return 0;
++	return ret;
++}
++
++/* sets new output format, if possible;
++ * allocate data here because we do not know if it will be streaming or
++ * read/write IO
++ * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT
++ */
++static int vidioc_s_fmt_out(struct file *file, void *priv,
++			    struct v4l2_format *fmt)
++{
++	struct v4l2_loopback_device *dev;
++	int ret;
++	char buf[5];
++	buf[4] = 0;
++	if (!V4L2_TYPE_IS_OUTPUT(fmt->type))
++		return -EINVAL;
++	dev = v4l2loopback_getdevice(file);
++
++	ret = inner_try_setfmt(file, fmt);
++	if (!ret) {
++		dev->pix_format = fmt->fmt.pix;
++		dev->pix_format_has_valid_sizeimage =
++			v4l2l_pix_format_has_valid_sizeimage(fmt);
++		dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture,
++			dev->pix_format.sizeimage);
++		dprintk("outFOURCC=%s\n",
++			fourcc2str(dev->pix_format.pixelformat, buf));
++
++		if (!dev->ready_for_capture) {
++			dev->buffer_size =
++				PAGE_ALIGN(dev->pix_format.sizeimage);
++			// JMZ: TODO get rid of the next line
++			fmt->fmt.pix.sizeimage = dev->buffer_size;
++			ret = allocate_buffers(dev);
++		}
++	}
++	return ret;
++}
++
++// #define V4L2L_OVERLAY
++#ifdef V4L2L_OVERLAY
++/* ------------------ OVERLAY ----------------------- */
++/* currently unsupported */
++/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work
++ * while it should only require it, if overlay is requested
++ * once the gstreamer element is fixed, remove the overlay dummies
++ */
++#warning OVERLAY dummies
++static int vidioc_g_fmt_overlay(struct file *file, void *priv,
++				struct v4l2_format *fmt)
++{
++	return 0;
++}
++
++static int vidioc_s_fmt_overlay(struct file *file, void *priv,
++				struct v4l2_format *fmt)
++{
++	return 0;
++}
++#endif /* V4L2L_OVERLAY */
++
++/* ------------------ PARAMs ----------------------- */
++
++/* get some data flow parameters, only capability, fps and readbuffers has
++ * effect on this driver
++ * called on VIDIOC_G_PARM
++ */
++static int vidioc_g_parm(struct file *file, void *priv,
++			 struct v4l2_streamparm *parm)
++{
++	/* do not care about type of opener, hope these enums would always be
++	 * compatible */
++	struct v4l2_loopback_device *dev;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	parm->parm.capture = dev->capture_param;
++	return 0;
++}
++
++/* get some data flow parameters, only capability, fps and readbuffers has
++ * effect on this driver
++ * called on VIDIOC_S_PARM
++ */
++static int vidioc_s_parm(struct file *file, void *priv,
++			 struct v4l2_streamparm *parm)
++{
++	struct v4l2_loopback_device *dev;
++	int err = 0;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	dprintk("vidioc_s_parm called frate=%d/%d\n",
++		parm->parm.capture.timeperframe.numerator,
++		parm->parm.capture.timeperframe.denominator);
++
++	switch (parm->type) {
++	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
++		if ((err = set_timeperframe(
++			     dev, &parm->parm.capture.timeperframe)) < 0)
++			return err;
++		break;
++	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
++		if ((err = set_timeperframe(
++			     dev, &parm->parm.capture.timeperframe)) < 0)
++			return err;
++		break;
++	default:
++		return -1;
++	}
++
++	parm->parm.capture = dev->capture_param;
++	return 0;
++}
++
++#ifdef V4L2LOOPBACK_WITH_STD
++/* sets a tv standard, actually we do not need to handle this any special way
++ * added to support effecttv
++ * called on VIDIOC_S_STD
++ */
++static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std)
++{
++	v4l2_std_id req_std = 0, supported_std = 0;
++	const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0;
++
++	if (_std) {
++		req_std = *_std;
++		*_std = all_std;
++	}
++
++	/* we support everything in V4L2_STD_ALL, but not more... */
++	supported_std = (all_std & req_std);
++	if (no_std == supported_std)
++		return -EINVAL;
++
++	return 0;
++}
++
++/* gets a fake video standard
++ * called on VIDIOC_G_STD
++ */
++static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm)
++{
++	if (norm)
++		*norm = V4L2_STD_ALL;
++	return 0;
++}
++/* gets a fake video standard
++ * called on VIDIOC_QUERYSTD
++ */
++static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm)
++{
++	if (norm)
++		*norm = V4L2_STD_ALL;
++	return 0;
++}
++#endif /* V4L2LOOPBACK_WITH_STD */
++
++static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id,
++				 s64 val)
++{
++	switch (id) {
++	case CID_KEEP_FORMAT:
++		if (val < 0 || val > 1)
++			return -EINVAL;
++		dev->keep_format = val;
++		try_free_buffers(
++			dev); /* will only free buffers if !keep_format */
++		break;
++	case CID_SUSTAIN_FRAMERATE:
++		if (val < 0 || val > 1)
++			return -EINVAL;
++		spin_lock_bh(&dev->lock);
++		dev->sustain_framerate = val;
++		check_timers(dev);
++		spin_unlock_bh(&dev->lock);
++		break;
++	case CID_TIMEOUT:
++		if (val < 0 || val > MAX_TIMEOUT)
++			return -EINVAL;
++		spin_lock_bh(&dev->lock);
++		dev->timeout_jiffies = msecs_to_jiffies(val);
++		check_timers(dev);
++		spin_unlock_bh(&dev->lock);
++		allocate_timeout_image(dev);
++		break;
++	case CID_TIMEOUT_IMAGE_IO:
++		dev->timeout_image_io = 1;
++		break;
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl)
++{
++	struct v4l2_loopback_device *dev = container_of(
++		ctrl->handler, struct v4l2_loopback_device, ctrl_handler);
++	return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val);
++}
++
++/* returns set of device outputs, in our case there is only one
++ * called on VIDIOC_ENUMOUTPUT
++ */
++static int vidioc_enum_output(struct file *file, void *fh,
++			      struct v4l2_output *outp)
++{
++	__u32 index = outp->index;
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	MARK();
++
++	if (!dev->announce_all_caps && !dev->ready_for_output)
++		return -ENOTTY;
++
++	if (0 != index)
++		return -EINVAL;
++
++	/* clear all data (including the reserved fields) */
++	memset(outp, 0, sizeof(*outp));
++
++	outp->index = index;
++	strscpy(outp->name, "loopback in", sizeof(outp->name));
++	outp->type = V4L2_OUTPUT_TYPE_ANALOG;
++	outp->audioset = 0;
++	outp->modulator = 0;
++#ifdef V4L2LOOPBACK_WITH_STD
++	outp->std = V4L2_STD_ALL;
++#ifdef V4L2_OUT_CAP_STD
++	outp->capabilities |= V4L2_OUT_CAP_STD;
++#endif /*  V4L2_OUT_CAP_STD */
++#endif /* V4L2LOOPBACK_WITH_STD */
++
++	return 0;
++}
++
++/* which output is currently active,
++ * called on VIDIOC_G_OUTPUT
++ */
++static int vidioc_g_output(struct file *file, void *fh, unsigned int *i)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	if (!dev->announce_all_caps && !dev->ready_for_output)
++		return -ENOTTY;
++	if (i)
++		*i = 0;
++	return 0;
++}
++
++/* set output, can make sense if we have more than one video src,
++ * called on VIDIOC_S_OUTPUT
++ */
++static int vidioc_s_output(struct file *file, void *fh, unsigned int i)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	if (!dev->announce_all_caps && !dev->ready_for_output)
++		return -ENOTTY;
++
++	if (i)
++		return -EINVAL;
++
++	return 0;
++}
++
++/* returns set of device inputs, in our case there is only one,
++ * but later I may add more
++ * called on VIDIOC_ENUMINPUT
++ */
++static int vidioc_enum_input(struct file *file, void *fh,
++			     struct v4l2_input *inp)
++{
++	struct v4l2_loopback_device *dev;
++	__u32 index = inp->index;
++	MARK();
++
++	if (0 != index)
++		return -EINVAL;
++
++	/* clear all data (including the reserved fields) */
++	memset(inp, 0, sizeof(*inp));
++
++	inp->index = index;
++	strscpy(inp->name, "loopback", sizeof(inp->name));
++	inp->type = V4L2_INPUT_TYPE_CAMERA;
++	inp->audioset = 0;
++	inp->tuner = 0;
++	inp->status = 0;
++
++#ifdef V4L2LOOPBACK_WITH_STD
++	inp->std = V4L2_STD_ALL;
++#ifdef V4L2_IN_CAP_STD
++	inp->capabilities |= V4L2_IN_CAP_STD;
++#endif
++#endif /* V4L2LOOPBACK_WITH_STD */
++
++	dev = v4l2loopback_getdevice(file);
++	if (!dev->ready_for_capture) {
++		inp->status |= V4L2_IN_ST_NO_SIGNAL;
++	}
++
++	return 0;
++}
++
++/* which input is currently active,
++ * called on VIDIOC_G_INPUT
++ */
++static int vidioc_g_input(struct file *file, void *fh, unsigned int *i)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	if (!dev->announce_all_caps && !dev->ready_for_capture)
++		return -ENOTTY;
++	if (i)
++		*i = 0;
++	return 0;
++}
++
++/* set input, can make sense if we have more than one video src,
++ * called on VIDIOC_S_INPUT
++ */
++static int vidioc_s_input(struct file *file, void *fh, unsigned int i)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	if (!dev->announce_all_caps && !dev->ready_for_capture)
++		return -ENOTTY;
++	if (i == 0)
++		return 0;
++	return -EINVAL;
++}
++
++/* --------------- V4L2 ioctl buffer related calls ----------------- */
++
++/* negotiate buffer type
++ * only mmap streaming supported
++ * called on VIDIOC_REQBUFS
++ */
++static int vidioc_reqbufs(struct file *file, void *fh,
++			  struct v4l2_requestbuffers *b)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	int i;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++
++	dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count,
++		dev->buffers_number);
++
++	if (opener->timeout_image_io) {
++		dev->timeout_image_io = 0;
++		if (b->memory != V4L2_MEMORY_MMAP)
++			return -EINVAL;
++		b->count = 2;
++		return 0;
++	}
++
++	if (V4L2_TYPE_IS_OUTPUT(b->type) && (!dev->ready_for_output)) {
++		return -EBUSY;
++	}
++
++	init_buffers(dev);
++	switch (b->memory) {
++	case V4L2_MEMORY_MMAP:
++		/* do nothing here, buffers are always allocated */
++		if (b->count < 1 || dev->buffers_number < 1)
++			return 0;
++
++		if (b->count > dev->buffers_number)
++			b->count = dev->buffers_number;
++
++		/* make sure that outbufs_list contains buffers from 0 to used_buffers-1
++		 * actually, it will have been already populated via v4l2_loopback_init()
++		 * at this point */
++		if (list_empty(&dev->outbufs_list)) {
++			for (i = 0; i < dev->used_buffers; ++i)
++				list_add_tail(&dev->buffers[i].list_head,
++					      &dev->outbufs_list);
++		}
++
++		/* also, if dev->used_buffers is going to be decreased, we should remove
++		 * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */
++		if (b->count < dev->used_buffers) {
++			struct v4l2l_buffer *pos, *n;
++
++			list_for_each_entry_safe(pos, n, &dev->outbufs_list,
++						 list_head) {
++				if (pos->buffer.index >= b->count)
++					list_del(&pos->list_head);
++			}
++
++			/* after we update dev->used_buffers, buffers in outbufs_list will
++			 * correspond to dev->write_position + [0;b->count-1] range */
++			i = v4l2l_mod64(dev->write_position, b->count);
++			list_for_each_entry(pos, &dev->outbufs_list,
++					    list_head) {
++				dev->bufpos2index[i % b->count] =
++					pos->buffer.index;
++				++i;
++			}
++		}
++
++		opener->buffers_number = b->count;
++		if (opener->buffers_number < dev->used_buffers)
++			dev->used_buffers = opener->buffers_number;
++		return 0;
++	default:
++		return -EINVAL;
++	}
++}
++
++/* returns buffer asked for;
++ * give app as many buffers as it wants, if it less than MAX,
++ * but map them in our inner buffers
++ * called on VIDIOC_QUERYBUF
++ */
++static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b)
++{
++	enum v4l2_buf_type type;
++	int index;
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++
++	MARK();
++
++	type = b->type;
++	index = b->index;
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++
++	if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
++	    (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) {
++		return -EINVAL;
++	}
++	if (b->index > max_buffers)
++		return -EINVAL;
++
++	if (opener->timeout_image_io)
++		*b = dev->timeout_image_buffer.buffer;
++	else
++		*b = dev->buffers[b->index % dev->used_buffers].buffer;
++
++	b->type = type;
++	b->index = index;
++	dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory,
++		  dev->buffers_number, dev->buffer_size);
++
++	/*  Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture'
++            https://github.com/umlaeute/v4l2loopback/issues/60 */
++	b->flags &= ~V4L2_BUF_FLAG_DONE;
++	b->flags |= V4L2_BUF_FLAG_QUEUED;
++
++	return 0;
++}
++
++static void buffer_written(struct v4l2_loopback_device *dev,
++			   struct v4l2l_buffer *buf)
++{
++	del_timer_sync(&dev->sustain_timer);
++	del_timer_sync(&dev->timeout_timer);
++
++	spin_lock_bh(&dev->list_lock);
++	list_move_tail(&buf->list_head, &dev->outbufs_list);
++	spin_unlock_bh(&dev->list_lock);
++
++	spin_lock_bh(&dev->lock);
++	dev->bufpos2index[v4l2l_mod64(dev->write_position, dev->used_buffers)] =
++		buf->buffer.index;
++	++dev->write_position;
++	dev->reread_count = 0;
++
++	check_timers(dev);
++	spin_unlock_bh(&dev->lock);
++}
++
++/* put buffer to queue
++ * called on VIDIOC_QBUF
++ */
++static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	struct v4l2l_buffer *b;
++	int index;
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++
++	if (buf->index > max_buffers)
++		return -EINVAL;
++	if (opener->timeout_image_io)
++		return 0;
++
++	index = buf->index % dev->used_buffers;
++	b = &dev->buffers[index];
++
++	switch (buf->type) {
++	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
++		dprintkrw(
++			"qbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
++			index, buf->index, buf, buf->type, buf->bytesused,
++			buf->length, buf->flags, buf->field,
++			(long long)buf->timestamp.tv_sec,
++			(long int)buf->timestamp.tv_usec, buf->sequence);
++		set_queued(b);
++		return 0;
++	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
++		dprintkrw(
++			"qbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
++			index, buf->index, buf, buf->type, buf->bytesused,
++			buf->length, buf->flags, buf->field,
++			(long long)buf->timestamp.tv_sec,
++			(long int)buf->timestamp.tv_usec, buf->sequence);
++		if ((!(b->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY)) &&
++		    (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0))
++			v4l2l_get_timestamp(&b->buffer);
++		else {
++			b->buffer.timestamp = buf->timestamp;
++			b->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
++		}
++		if (dev->pix_format_has_valid_sizeimage) {
++			if (buf->bytesused >= dev->pix_format.sizeimage) {
++				b->buffer.bytesused = dev->pix_format.sizeimage;
++			} else {
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
++				dev_warn_ratelimited(
++					&dev->vdev->dev,
++#else
++				dprintkrw(
++#endif
++					"warning queued output buffer bytesused too small %d < %d\n",
++					buf->bytesused,
++					dev->pix_format.sizeimage);
++				b->buffer.bytesused = buf->bytesused;
++			}
++		} else {
++			b->buffer.bytesused = buf->bytesused;
++		}
++
++		set_done(b);
++		buffer_written(dev, b);
++
++		/*  Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture'
++                    https://github.com/umlaeute/v4l2loopback/issues/60 */
++		buf->flags &= ~V4L2_BUF_FLAG_DONE;
++		buf->flags |= V4L2_BUF_FLAG_QUEUED;
++
++		wake_up_all(&dev->read_event);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++}
++
++static int can_read(struct v4l2_loopback_device *dev,
++		    struct v4l2_loopback_opener *opener)
++{
++	int ret;
++
++	spin_lock_bh(&dev->lock);
++	check_timers(dev);
++	ret = dev->write_position > opener->read_position ||
++	      dev->reread_count > opener->reread_count || dev->timeout_happened;
++	spin_unlock_bh(&dev->lock);
++	return ret;
++}
++
++static int get_capture_buffer(struct file *file)
++{
++	struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file);
++	struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data);
++	int pos, ret;
++	int timeout_happened;
++
++	if ((file->f_flags & O_NONBLOCK) &&
++	    (dev->write_position <= opener->read_position &&
++	     dev->reread_count <= opener->reread_count &&
++	     !dev->timeout_happened))
++		return -EAGAIN;
++	wait_event_interruptible(dev->read_event, can_read(dev, opener));
++
++	spin_lock_bh(&dev->lock);
++	if (dev->write_position == opener->read_position) {
++		if (dev->reread_count > opener->reread_count + 2)
++			opener->reread_count = dev->reread_count - 1;
++		++opener->reread_count;
++		pos = v4l2l_mod64(opener->read_position + dev->used_buffers - 1,
++				  dev->used_buffers);
++	} else {
++		opener->reread_count = 0;
++		if (dev->write_position >
++		    opener->read_position + dev->used_buffers)
++			opener->read_position = dev->write_position - 1;
++		pos = v4l2l_mod64(opener->read_position, dev->used_buffers);
++		++opener->read_position;
++	}
++	timeout_happened = dev->timeout_happened;
++	dev->timeout_happened = 0;
++	spin_unlock_bh(&dev->lock);
++
++	ret = dev->bufpos2index[pos];
++	if (timeout_happened) {
++		if (ret < 0) {
++			dprintk("trying to return not mapped buf[%d]\n", ret);
++			return -EFAULT;
++		}
++		/* although allocated on-demand, timeout_image is freed only
++		 * in free_buffers(), so we don't need to worry about it being
++		 * deallocated suddenly */
++		memcpy(dev->image + dev->buffers[ret].buffer.m.offset,
++		       dev->timeout_image, dev->buffer_size);
++	}
++	return ret;
++}
++
++/* put buffer to dequeue
++ * called on VIDIOC_DQBUF
++ */
++static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	int index;
++	struct v4l2l_buffer *b;
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++	if (opener->timeout_image_io) {
++		*buf = dev->timeout_image_buffer.buffer;
++		return 0;
++	}
++
++	switch (buf->type) {
++	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
++		index = get_capture_buffer(file);
++		if (index < 0)
++			return index;
++		dprintkrw("capture DQBUF pos: %lld index: %d\n",
++			  (long long)(opener->read_position - 1), index);
++		if (!(dev->buffers[index].buffer.flags &
++		      V4L2_BUF_FLAG_MAPPED)) {
++			dprintk("trying to return not mapped buf[%d]\n", index);
++			return -EINVAL;
++		}
++		unset_flags(&dev->buffers[index]);
++		*buf = dev->buffers[index].buffer;
++		dprintkrw(
++			"dqbuf(CAPTURE)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
++			index, buf->index, buf, buf->type, buf->bytesused,
++			buf->length, buf->flags, buf->field,
++			(long long)buf->timestamp.tv_sec,
++			(long int)buf->timestamp.tv_usec, buf->sequence);
++		return 0;
++	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
++		spin_lock_bh(&dev->list_lock);
++
++		b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer,
++			       list_head);
++		list_move_tail(&b->list_head, &dev->outbufs_list);
++
++		spin_unlock_bh(&dev->list_lock);
++		dprintkrw("output DQBUF index: %d\n", b->buffer.index);
++		unset_flags(b);
++		*buf = b->buffer;
++		buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++		dprintkrw(
++			"dqbuf(OUTPUT)#%d: buffer#%d @ %p type=%d bytesused=%d length=%d flags=%x field=%d timestamp=%lld.%06ld sequence=%d\n",
++			index, buf->index, buf, buf->type, buf->bytesused,
++			buf->length, buf->flags, buf->field,
++			(long long)buf->timestamp.tv_sec,
++			(long int)buf->timestamp.tv_usec, buf->sequence);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++}
++
++/* ------------- STREAMING ------------------- */
++
++/* start streaming
++ * called on VIDIOC_STREAMON
++ */
++static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++
++	switch (type) {
++	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
++		if (!dev->ready_for_capture) {
++			int ret = allocate_buffers(dev);
++			if (ret < 0)
++				return ret;
++		}
++		opener->type = WRITER;
++		dev->ready_for_output = 0;
++		dev->ready_for_capture++;
++		return 0;
++	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
++		if (!dev->ready_for_capture)
++			return -EIO;
++		if (dev->active_readers > 0)
++			return -EBUSY;
++		opener->type = READER;
++		dev->active_readers++;
++		client_usage_queue_event(dev->vdev);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++	return -EINVAL;
++}
++
++/* stop streaming
++ * called on VIDIOC_STREAMOFF
++ */
++static int vidioc_streamoff(struct file *file, void *fh,
++			    enum v4l2_buf_type type)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++
++	MARK();
++	dprintk("%d\n", type);
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(fh);
++	switch (type) {
++	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
++		if (dev->ready_for_capture > 0)
++			dev->ready_for_capture--;
++		return 0;
++	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
++		if (opener->type == READER) {
++			opener->type = 0;
++			dev->active_readers--;
++			client_usage_queue_event(dev->vdev);
++		}
++		return 0;
++	default:
++		return -EINVAL;
++	}
++	return -EINVAL;
++}
++
++#ifdef CONFIG_VIDEO_V4L1_COMPAT
++static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p)
++{
++	struct v4l2_loopback_device *dev;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	p->frames = dev->buffers_number;
++	p->offsets[0] = 0;
++	p->offsets[1] = 0;
++	p->size = dev->buffer_size;
++	return 0;
++}
++#endif
++
++static void client_usage_queue_event(struct video_device *vdev)
++{
++	struct v4l2_event ev;
++	struct v4l2_loopback_device *dev;
++
++	dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device,
++			   v4l2_dev);
++
++	memset(&ev, 0, sizeof(ev));
++	ev.type = V4L2_EVENT_PRI_CLIENT_USAGE;
++	((struct v4l2_event_client_usage *)&ev.u)->count = dev->active_readers;
++
++	v4l2_event_queue(vdev, &ev);
++}
++
++static int client_usage_ops_add(struct v4l2_subscribed_event *sev,
++				unsigned elems)
++{
++	if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL))
++		return 0;
++
++	client_usage_queue_event(sev->fh->vdev);
++	return 0;
++}
++
++static void client_usage_ops_replace(struct v4l2_event *old,
++				     const struct v4l2_event *new)
++{
++	*((struct v4l2_event_client_usage *)&old->u) =
++		*((struct v4l2_event_client_usage *)&new->u);
++}
++
++static void client_usage_ops_merge(const struct v4l2_event *old,
++				   struct v4l2_event *new)
++{
++	*((struct v4l2_event_client_usage *)&new->u) =
++		*((struct v4l2_event_client_usage *)&old->u);
++}
++
++const struct v4l2_subscribed_event_ops client_usage_ops = {
++	.add = client_usage_ops_add,
++	.replace = client_usage_ops_replace,
++	.merge = client_usage_ops_merge,
++};
++
++static int vidioc_subscribe_event(struct v4l2_fh *fh,
++				  const struct v4l2_event_subscription *sub)
++{
++	switch (sub->type) {
++	case V4L2_EVENT_CTRL:
++		return v4l2_ctrl_subscribe_event(fh, sub);
++	case V4L2_EVENT_PRI_CLIENT_USAGE:
++		return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops);
++	}
++
++	return -EINVAL;
++}
++
++/* file operations */
++static void vm_open(struct vm_area_struct *vma)
++{
++	struct v4l2l_buffer *buf;
++	MARK();
++
++	buf = vma->vm_private_data;
++	buf->use_count++;
++
++	buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED;
++}
++
++static void vm_close(struct vm_area_struct *vma)
++{
++	struct v4l2l_buffer *buf;
++	MARK();
++
++	buf = vma->vm_private_data;
++	buf->use_count--;
++
++	if (buf->use_count <= 0)
++		buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED;
++}
++
++static struct vm_operations_struct vm_ops = {
++	.open = vm_open,
++	.close = vm_close,
++};
++
++static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	u8 *addr;
++	unsigned long start;
++	unsigned long size;
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	struct v4l2l_buffer *buffer = NULL;
++	MARK();
++
++	start = (unsigned long)vma->vm_start;
++	size = (unsigned long)(vma->vm_end - vma->vm_start);
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(file->private_data);
++
++	if (size > dev->buffer_size) {
++		dprintk("userspace tries to mmap too much, fail\n");
++		return -EINVAL;
++	}
++	if (opener->timeout_image_io) {
++		/* we are going to map the timeout_image_buffer */
++		if ((vma->vm_pgoff << PAGE_SHIFT) !=
++		    dev->buffer_size * MAX_BUFFERS) {
++			dprintk("invalid mmap offset for timeout_image_io mode\n");
++			return -EINVAL;
++		}
++	} else if ((vma->vm_pgoff << PAGE_SHIFT) >
++		   dev->buffer_size * (dev->buffers_number - 1)) {
++		dprintk("userspace tries to mmap too far, fail\n");
++		return -EINVAL;
++	}
++
++	/* FIXXXXXME: allocation should not happen here! */
++	if (NULL == dev->image)
++		if (allocate_buffers(dev) < 0)
++			return -EINVAL;
++
++	if (opener->timeout_image_io) {
++		buffer = &dev->timeout_image_buffer;
++		addr = dev->timeout_image;
++	} else {
++		int i;
++		for (i = 0; i < dev->buffers_number; ++i) {
++			buffer = &dev->buffers[i];
++			if ((buffer->buffer.m.offset >> PAGE_SHIFT) ==
++			    vma->vm_pgoff)
++				break;
++		}
++
++		if (i >= dev->buffers_number)
++			return -EINVAL;
++
++		addr = dev->image + (vma->vm_pgoff << PAGE_SHIFT);
++	}
++
++	while (size > 0) {
++		struct page *page;
++
++		page = vmalloc_to_page(addr);
++
++		if (vm_insert_page(vma, start, page) < 0)
++			return -EAGAIN;
++
++		start += PAGE_SIZE;
++		addr += PAGE_SIZE;
++		size -= PAGE_SIZE;
++	}
++
++	vma->vm_ops = &vm_ops;
++	vma->vm_private_data = buffer;
++
++	vm_open(vma);
++
++	MARK();
++	return 0;
++}
++
++static unsigned int v4l2_loopback_poll(struct file *file,
++				       struct poll_table_struct *pts)
++{
++	struct v4l2_loopback_opener *opener;
++	struct v4l2_loopback_device *dev;
++	__poll_t req_events = poll_requested_events(pts);
++	int ret_mask = 0;
++	MARK();
++
++	opener = fh_to_opener(file->private_data);
++	dev = v4l2loopback_getdevice(file);
++
++	if (req_events & POLLPRI) {
++		if (!v4l2_event_pending(&opener->fh))
++			poll_wait(file, &opener->fh.wait, pts);
++		if (v4l2_event_pending(&opener->fh)) {
++			ret_mask |= POLLPRI;
++			if (!(req_events & DEFAULT_POLLMASK))
++				return ret_mask;
++		}
++	}
++
++	switch (opener->type) {
++	case WRITER:
++		ret_mask |= POLLOUT | POLLWRNORM;
++		break;
++	case READER:
++		if (!can_read(dev, opener)) {
++			if (ret_mask)
++				return ret_mask;
++			poll_wait(file, &dev->read_event, pts);
++		}
++		if (can_read(dev, opener))
++			ret_mask |= POLLIN | POLLRDNORM;
++		if (v4l2_event_pending(&opener->fh))
++			ret_mask |= POLLPRI;
++		break;
++	default:
++		break;
++	}
++
++	MARK();
++	return ret_mask;
++}
++
++/* do not want to limit device opens, it can be as many readers as user want,
++ * writers are limited by means of setting writer field */
++static int v4l2_loopback_open(struct file *file)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_opener *opener;
++	MARK();
++	dev = v4l2loopback_getdevice(file);
++	if (dev->open_count.counter >= dev->max_openers)
++		return -EBUSY;
++	/* kfree on close */
++	opener = kzalloc(sizeof(*opener), GFP_KERNEL);
++	if (opener == NULL)
++		return -ENOMEM;
++
++	atomic_inc(&dev->open_count);
++
++	opener->timeout_image_io = dev->timeout_image_io;
++	if (opener->timeout_image_io) {
++		int r = allocate_timeout_image(dev);
++
++		if (r < 0) {
++			dprintk("timeout image allocation failed\n");
++
++			atomic_dec(&dev->open_count);
++
++			kfree(opener);
++			return r;
++		}
++	}
++
++	v4l2_fh_init(&opener->fh, video_devdata(file));
++	file->private_data = &opener->fh;
++
++	v4l2_fh_add(&opener->fh);
++	dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL);
++	MARK();
++	return 0;
++}
++
++static int v4l2_loopback_close(struct file *file)
++{
++	struct v4l2_loopback_opener *opener;
++	struct v4l2_loopback_device *dev;
++	int is_writer = 0, is_reader = 0;
++	MARK();
++
++	opener = fh_to_opener(file->private_data);
++	dev = v4l2loopback_getdevice(file);
++
++	if (WRITER == opener->type)
++		is_writer = 1;
++	if (READER == opener->type)
++		is_reader = 1;
++
++	atomic_dec(&dev->open_count);
++	if (dev->open_count.counter == 0) {
++		del_timer_sync(&dev->sustain_timer);
++		del_timer_sync(&dev->timeout_timer);
++	}
++	try_free_buffers(dev);
++
++	v4l2_fh_del(&opener->fh);
++	v4l2_fh_exit(&opener->fh);
++
++	kfree(opener);
++	if (is_writer)
++		dev->ready_for_output = 1;
++	if (is_reader) {
++		dev->active_readers--;
++		client_usage_queue_event(dev->vdev);
++	}
++	MARK();
++	return 0;
++}
++
++static ssize_t v4l2_loopback_read(struct file *file, char __user *buf,
++				  size_t count, loff_t *ppos)
++{
++	int read_index;
++	struct v4l2_loopback_device *dev;
++	struct v4l2_buffer *b;
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++
++	read_index = get_capture_buffer(file);
++	if (read_index < 0)
++		return read_index;
++	if (count > dev->buffer_size)
++		count = dev->buffer_size;
++	b = &dev->buffers[read_index].buffer;
++	if (count > b->bytesused)
++		count = b->bytesused;
++	if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset),
++			 count)) {
++		printk(KERN_ERR
++		       "v4l2-loopback: failed copy_to_user() in read buf\n");
++		return -EFAULT;
++	}
++	dprintkrw("leave v4l2_loopback_read()\n");
++	return count;
++}
++
++static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *ppos)
++{
++	struct v4l2_loopback_opener *opener;
++	struct v4l2_loopback_device *dev;
++	int write_index;
++	struct v4l2_buffer *b;
++	int err = 0;
++
++	MARK();
++
++	dev = v4l2loopback_getdevice(file);
++	opener = fh_to_opener(file->private_data);
++
++	if (UNNEGOTIATED == opener->type) {
++		spin_lock(&dev->lock);
++
++		if (dev->ready_for_output) {
++			err = vidioc_streamon(file, file->private_data,
++					      V4L2_BUF_TYPE_VIDEO_OUTPUT);
++		}
++
++		spin_unlock(&dev->lock);
++
++		if (err < 0)
++			return err;
++	}
++
++	if (WRITER != opener->type)
++		return -EINVAL;
++
++	if (!dev->ready_for_capture) {
++		int ret = allocate_buffers(dev);
++		if (ret < 0)
++			return ret;
++		dev->ready_for_capture = 1;
++	}
++	dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count);
++	if (count > dev->buffer_size)
++		count = dev->buffer_size;
++
++	write_index = v4l2l_mod64(dev->write_position, dev->used_buffers);
++	b = &dev->buffers[write_index].buffer;
++
++	if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf,
++			   count)) {
++		printk(KERN_ERR
++		       "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n",
++		       count);
++		return -EFAULT;
++	}
++	v4l2l_get_timestamp(b);
++	b->bytesused = count;
++	b->sequence = dev->write_position;
++	buffer_written(dev, &dev->buffers[write_index]);
++	wake_up_all(&dev->read_event);
++	dprintkrw("leave v4l2_loopback_write()\n");
++	return count;
++}
++
++/* init functions */
++/* frees buffers, if already allocated */
++static void free_buffers(struct v4l2_loopback_device *dev)
++{
++	MARK();
++	dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev);
++	if (!dev)
++		return;
++	if (dev->image) {
++		vfree(dev->image);
++		dev->image = NULL;
++	}
++	if (dev->timeout_image) {
++		vfree(dev->timeout_image);
++		dev->timeout_image = NULL;
++	}
++	dev->imagesize = 0;
++}
++/* frees buffers, if they are no longer needed */
++static void try_free_buffers(struct v4l2_loopback_device *dev)
++{
++	MARK();
++	if (0 == dev->open_count.counter && !dev->keep_format) {
++		free_buffers(dev);
++		dev->ready_for_capture = 0;
++		dev->buffer_size = 0;
++		dev->write_position = 0;
++	}
++}
++/* allocates buffers, if buffer_size is set */
++static int allocate_buffers(struct v4l2_loopback_device *dev)
++{
++	int err;
++
++	MARK();
++	/* vfree on close file operation in case no open handles left */
++
++	if (dev->buffer_size < 1 || dev->buffers_number < 1)
++		return -EINVAL;
++
++	if ((__LONG_MAX__ / dev->buffer_size) < dev->buffers_number)
++		return -ENOSPC;
++
++	if (dev->image) {
++		dprintk("allocating buffers again: %ld %ld\n",
++			dev->buffer_size * dev->buffers_number, dev->imagesize);
++		/* FIXME: prevent double allocation more intelligently! */
++		if (dev->buffer_size * dev->buffers_number == dev->imagesize)
++			return 0;
++
++		/* check whether the total number of readers/writers is <=1 */
++		if ((dev->ready_for_capture + dev->active_readers) <= 1)
++			free_buffers(dev);
++		else
++			return -EINVAL;
++	}
++
++	dev->imagesize = (unsigned long)dev->buffer_size *
++			 (unsigned long)dev->buffers_number;
++
++	dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size,
++		dev->buffers_number);
++	err = -ENOMEM;
++
++	if (dev->timeout_jiffies > 0) {
++		err = allocate_timeout_image(dev);
++		if (err < 0)
++			goto error;
++	}
++
++	dev->image = vmalloc(dev->imagesize);
++	if (dev->image == NULL)
++		goto error;
++
++	dprintk("vmallocated %ld bytes\n", dev->imagesize);
++	MARK();
++
++	init_buffers(dev);
++	return 0;
++
++error:
++	free_buffers(dev);
++	return err;
++}
++
++/* init inner buffers, they are capture mode and flags are set as
++ * for capture mod buffers */
++static void init_buffers(struct v4l2_loopback_device *dev)
++{
++	int i;
++	int buffer_size;
++	int bytesused;
++	MARK();
++
++	buffer_size = dev->buffer_size;
++	bytesused = dev->pix_format.sizeimage;
++	for (i = 0; i < dev->buffers_number; ++i) {
++		struct v4l2_buffer *b = &dev->buffers[i].buffer;
++		b->index = i;
++		b->bytesused = bytesused;
++		b->length = buffer_size;
++		b->field = V4L2_FIELD_NONE;
++		b->flags = 0;
++		b->m.offset = i * buffer_size;
++		b->memory = V4L2_MEMORY_MMAP;
++		b->sequence = 0;
++		b->timestamp.tv_sec = 0;
++		b->timestamp.tv_usec = 0;
++		b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++
++		v4l2l_get_timestamp(b);
++	}
++	dev->timeout_image_buffer = dev->buffers[0];
++	dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size;
++	MARK();
++}
++
++static int allocate_timeout_image(struct v4l2_loopback_device *dev)
++{
++	MARK();
++	if (dev->buffer_size <= 0) {
++		dev->timeout_image_io = 0;
++		return -EINVAL;
++	}
++
++	if (dev->timeout_image == NULL) {
++		dev->timeout_image = vzalloc(dev->buffer_size);
++		if (dev->timeout_image == NULL) {
++			dev->timeout_image_io = 0;
++			return -ENOMEM;
++		}
++	}
++	return 0;
++}
++
++/* fills and register video device */
++static void init_vdev(struct video_device *vdev, int nr)
++{
++	MARK();
++
++#ifdef V4L2LOOPBACK_WITH_STD
++	vdev->tvnorms = V4L2_STD_ALL;
++#endif /* V4L2LOOPBACK_WITH_STD */
++
++	vdev->vfl_type = VFL_TYPE_VIDEO;
++	vdev->fops = &v4l2_loopback_fops;
++	vdev->ioctl_ops = &v4l2_loopback_ioctl_ops;
++	vdev->release = &video_device_release;
++	vdev->minor = -1;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
++	vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE |
++			    V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE |
++			    V4L2_CAP_STREAMING;
++#endif
++
++	if (debug > 1)
++		vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL |
++				  V4L2_DEV_DEBUG_IOCTL_ARG;
++
++	vdev->vfl_dir = VFL_DIR_M2M;
++
++	MARK();
++}
++
++/* init default capture parameters, only fps may be changed in future */
++static void init_capture_param(struct v4l2_captureparm *capture_param)
++{
++	MARK();
++	capture_param->capability = 0;
++	capture_param->capturemode = 0;
++	capture_param->extendedmode = 0;
++	capture_param->readbuffers = max_buffers;
++	capture_param->timeperframe.numerator = 1;
++	capture_param->timeperframe.denominator = 30;
++}
++
++static void check_timers(struct v4l2_loopback_device *dev)
++{
++	if (!dev->ready_for_capture)
++		return;
++
++	if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer))
++		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
++	if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer))
++		mod_timer(&dev->sustain_timer,
++			  jiffies + dev->frame_jiffies * 3 / 2);
++}
++#ifdef HAVE_TIMER_SETUP
++static void sustain_timer_clb(struct timer_list *t)
++{
++	struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer);
++#else
++static void sustain_timer_clb(unsigned long nr)
++{
++	struct v4l2_loopback_device *dev =
++		idr_find(&v4l2loopback_index_idr, nr);
++#endif
++	spin_lock(&dev->lock);
++	if (dev->sustain_framerate) {
++		dev->reread_count++;
++		dprintkrw("reread: %lld %d\n", (long long)dev->write_position,
++			  dev->reread_count);
++		if (dev->reread_count == 1)
++			mod_timer(&dev->sustain_timer,
++				  jiffies + max(1UL, dev->frame_jiffies / 2));
++		else
++			mod_timer(&dev->sustain_timer,
++				  jiffies + dev->frame_jiffies);
++		wake_up_all(&dev->read_event);
++	}
++	spin_unlock(&dev->lock);
++}
++#ifdef HAVE_TIMER_SETUP
++static void timeout_timer_clb(struct timer_list *t)
++{
++	struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer);
++#else
++static void timeout_timer_clb(unsigned long nr)
++{
++	struct v4l2_loopback_device *dev =
++		idr_find(&v4l2loopback_index_idr, nr);
++#endif
++	spin_lock(&dev->lock);
++	if (dev->timeout_jiffies > 0) {
++		dev->timeout_happened = 1;
++		mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies);
++		wake_up_all(&dev->read_event);
++	}
++	spin_unlock(&dev->lock);
++}
++
++/* init loopback main structure */
++#define DEFAULT_FROM_CONF(confmember, default_condition, default_value)        \
++	((conf) ?                                                              \
++		 ((conf->confmember default_condition) ? (default_value) :     \
++							 (conf->confmember)) : \
++		 default_value)
++
++static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_ctrl_handler *hdl;
++	struct v4l2loopback_private *vdev_priv = NULL;
++
++	int err = -ENOMEM;
++
++	u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH;
++	u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT;
++
++	u32 _min_width = DEFAULT_FROM_CONF(min_width,
++					   < V4L2LOOPBACK_SIZE_MIN_WIDTH,
++					   V4L2LOOPBACK_SIZE_MIN_WIDTH);
++	u32 _min_height = DEFAULT_FROM_CONF(min_height,
++					    < V4L2LOOPBACK_SIZE_MIN_HEIGHT,
++					    V4L2LOOPBACK_SIZE_MIN_HEIGHT);
++	u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width);
++	u32 _max_height =
++		DEFAULT_FROM_CONF(max_height, < _min_height, max_height);
++	bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ?
++					  (conf->announce_all_caps) :
++					  V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS;
++	int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers);
++	int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers);
++
++	int nr = -1;
++
++	_announce_all_caps = (!!_announce_all_caps);
++
++	if (conf) {
++		const int output_nr = conf->output_nr;
++#ifdef SPLIT_DEVICES
++		const int capture_nr = conf->capture_nr;
++#else
++		const int capture_nr = output_nr;
++#endif
++		if (capture_nr >= 0 && output_nr == capture_nr) {
++			nr = output_nr;
++		} else if (capture_nr < 0 && output_nr < 0) {
++			nr = -1;
++		} else if (capture_nr < 0) {
++			nr = output_nr;
++		} else if (output_nr < 0) {
++			nr = capture_nr;
++		} else {
++			printk(KERN_ERR
++			       "split OUTPUT and CAPTURE devices not yet supported.");
++			printk(KERN_INFO
++			       "both devices must have the same number (%d != %d).",
++			       output_nr, capture_nr);
++			return -EINVAL;
++		}
++	}
++
++	if (idr_find(&v4l2loopback_index_idr, nr))
++		return -EEXIST;
++
++	dprintk("creating v4l2loopback-device #%d\n", nr);
++	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev)
++		return -ENOMEM;
++
++	/* allocate id, if @id >= 0, we're requesting that specific id */
++	if (nr >= 0) {
++		err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1,
++				GFP_KERNEL);
++		if (err == -ENOSPC)
++			err = -EEXIST;
++	} else {
++		err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL);
++	}
++	if (err < 0)
++		goto out_free_dev;
++	nr = err;
++	err = -ENOMEM;
++
++	if (conf && conf->card_label[0]) {
++		snprintf(dev->card_label, sizeof(dev->card_label), "%s",
++			 conf->card_label);
++	} else {
++		snprintf(dev->card_label, sizeof(dev->card_label),
++			 "Dummy video device (0x%04X)", nr);
++	}
++	snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name),
++		 "v4l2loopback-%03d", nr);
++
++	err = v4l2_device_register(NULL, &dev->v4l2_dev);
++	if (err)
++		goto out_free_idr;
++	MARK();
++
++	dev->vdev = video_device_alloc();
++	if (dev->vdev == NULL) {
++		err = -ENOMEM;
++		goto out_unregister;
++	}
++
++	vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL);
++	if (vdev_priv == NULL) {
++		err = -ENOMEM;
++		goto out_unregister;
++	}
++
++	video_set_drvdata(dev->vdev, vdev_priv);
++	if (video_get_drvdata(dev->vdev) == NULL) {
++		err = -ENOMEM;
++		goto out_unregister;
++	}
++
++	MARK();
++	snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s",
++		 dev->card_label);
++
++	vdev_priv->device_nr = nr;
++
++	init_vdev(dev->vdev, nr);
++	dev->vdev->v4l2_dev = &dev->v4l2_dev;
++	init_capture_param(&dev->capture_param);
++	err = set_timeperframe(dev, &dev->capture_param.timeperframe);
++	if (err)
++		goto out_unregister;
++	dev->keep_format = 0;
++	dev->sustain_framerate = 0;
++
++	dev->announce_all_caps = _announce_all_caps;
++	dev->min_width = _min_width;
++	dev->min_height = _min_height;
++	dev->max_width = _max_width;
++	dev->max_height = _max_height;
++	dev->max_openers = _max_openers;
++	dev->buffers_number = dev->used_buffers = _max_buffers;
++
++	dev->write_position = 0;
++
++	MARK();
++	spin_lock_init(&dev->lock);
++	spin_lock_init(&dev->list_lock);
++	INIT_LIST_HEAD(&dev->outbufs_list);
++	if (list_empty(&dev->outbufs_list)) {
++		int i;
++
++		for (i = 0; i < dev->used_buffers; ++i)
++			list_add_tail(&dev->buffers[i].list_head,
++				      &dev->outbufs_list);
++	}
++	memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index));
++	atomic_set(&dev->open_count, 0);
++	dev->ready_for_capture = 0;
++	dev->ready_for_output = 1;
++
++	dev->buffer_size = 0;
++	dev->image = NULL;
++	dev->imagesize = 0;
++#ifdef HAVE_TIMER_SETUP
++	timer_setup(&dev->sustain_timer, sustain_timer_clb, 0);
++	timer_setup(&dev->timeout_timer, timeout_timer_clb, 0);
++#else
++	setup_timer(&dev->sustain_timer, sustain_timer_clb, nr);
++	setup_timer(&dev->timeout_timer, timeout_timer_clb, nr);
++#endif
++	dev->reread_count = 0;
++	dev->timeout_jiffies = 0;
++	dev->timeout_image = NULL;
++	dev->timeout_happened = 0;
++
++	hdl = &dev->ctrl_handler;
++	err = v4l2_ctrl_handler_init(hdl, 4);
++	if (err)
++		goto out_unregister;
++	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL);
++	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL);
++	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL);
++	v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL);
++	if (hdl->error) {
++		err = hdl->error;
++		goto out_free_handler;
++	}
++	dev->v4l2_dev.ctrl_handler = hdl;
++
++	err = v4l2_ctrl_handler_setup(hdl);
++	if (err)
++		goto out_free_handler;
++
++	/* FIXME set buffers to 0 */
++
++	/* Set initial format */
++	if (_width < _min_width)
++		_width = _min_width;
++	if (_width > _max_width)
++		_width = _max_width;
++	if (_height < _min_height)
++		_height = _min_height;
++	if (_height > _max_height)
++		_height = _max_height;
++
++	dev->pix_format.width = _width;
++	dev->pix_format.height = _height;
++	dev->pix_format.pixelformat = formats[0].fourcc;
++	dev->pix_format.colorspace =
++		V4L2_COLORSPACE_DEFAULT; /* do we need to set this ? */
++	dev->pix_format.field = V4L2_FIELD_NONE;
++
++	dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage);
++	dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size,
++		dev->pix_format.sizeimage);
++
++	if (dev->buffer_size && ((err = allocate_buffers(dev)) < 0))
++		goto out_free_handler;
++
++	init_waitqueue_head(&dev->read_event);
++
++	/* register the device -> it creates /dev/video* */
++	if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) {
++		printk(KERN_ERR
++		       "v4l2loopback: failed video_register_device()\n");
++		err = -EFAULT;
++		goto out_free_device;
++	}
++	v4l2loopback_create_sysfs(dev->vdev);
++
++	MARK();
++	if (ret_nr)
++		*ret_nr = dev->vdev->num;
++	return 0;
++
++out_free_device:
++	video_device_release(dev->vdev);
++out_free_handler:
++	v4l2_ctrl_handler_free(&dev->ctrl_handler);
++out_unregister:
++	video_set_drvdata(dev->vdev, NULL);
++	if (vdev_priv != NULL)
++		kfree(vdev_priv);
++	v4l2_device_unregister(&dev->v4l2_dev);
++out_free_idr:
++	idr_remove(&v4l2loopback_index_idr, nr);
++out_free_dev:
++	kfree(dev);
++	return err;
++}
++
++static void v4l2_loopback_remove(struct v4l2_loopback_device *dev)
++{
++	free_buffers(dev);
++	v4l2loopback_remove_sysfs(dev->vdev);
++	kfree(video_get_drvdata(dev->vdev));
++	video_unregister_device(dev->vdev);
++	v4l2_device_unregister(&dev->v4l2_dev);
++	v4l2_ctrl_handler_free(&dev->ctrl_handler);
++	kfree(dev);
++}
++
++static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd,
++				       unsigned long parm)
++{
++	struct v4l2_loopback_device *dev;
++	struct v4l2_loopback_config conf;
++	struct v4l2_loopback_config *confptr = &conf;
++	int device_nr, capture_nr, output_nr;
++	int ret;
++
++	ret = mutex_lock_killable(&v4l2loopback_ctl_mutex);
++	if (ret)
++		return ret;
++
++	ret = -EINVAL;
++	switch (cmd) {
++	default:
++		ret = -ENOSYS;
++		break;
++		/* add a v4l2loopback device (pair), based on the user-provided specs */
++	case V4L2LOOPBACK_CTL_ADD:
++		if (parm) {
++			if ((ret = copy_from_user(&conf, (void *)parm,
++						  sizeof(conf))) < 0)
++				break;
++		} else
++			confptr = NULL;
++		ret = v4l2_loopback_add(confptr, &device_nr);
++		if (ret >= 0)
++			ret = device_nr;
++		break;
++		/* remove a v4l2loopback device (both capture and output) */
++	case V4L2LOOPBACK_CTL_REMOVE:
++		ret = v4l2loopback_lookup((int)parm, &dev);
++		if (ret >= 0 && dev) {
++			int nr = ret;
++			ret = -EBUSY;
++			if (dev->open_count.counter > 0)
++				break;
++			idr_remove(&v4l2loopback_index_idr, nr);
++			v4l2_loopback_remove(dev);
++			ret = 0;
++		};
++		break;
++		/* get information for a loopback device.
++                 * this is mostly about limits (which cannot be queried directly with  VIDIOC_G_FMT and friends
++                 */
++	case V4L2LOOPBACK_CTL_QUERY:
++		if (!parm)
++			break;
++		if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) <
++		    0)
++			break;
++		capture_nr = output_nr = conf.output_nr;
++#ifdef SPLIT_DEVICES
++		capture_nr = conf.capture_nr;
++#endif
++		device_nr = (output_nr < 0) ? capture_nr : output_nr;
++		MARK();
++		/* get the device from either capture_nr or output_nr (whatever is valid) */
++		if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0)
++			break;
++		MARK();
++		/* if we got the device from output_nr and there is a valid capture_nr,
++                 * make sure that both refer to the same device (or bail out)
++                 */
++		if ((device_nr != capture_nr) && (capture_nr >= 0) &&
++		    ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0))
++			break;
++		MARK();
++		/* if otoh, we got the device from capture_nr and there is a valid output_nr,
++                 * make sure that both refer to the same device (or bail out)
++                 */
++		if ((device_nr != output_nr) && (output_nr >= 0) &&
++		    ((ret = v4l2loopback_lookup(output_nr, 0)) < 0))
++			break;
++		MARK();
++
++		/* v4l2_loopback_config identified a single device, so fetch the data */
++		snprintf(conf.card_label, sizeof(conf.card_label), "%s",
++			 dev->card_label);
++		MARK();
++		conf.output_nr = dev->vdev->num;
++#ifdef SPLIT_DEVICES
++		conf.capture_nr = dev->vdev->num;
++#endif
++		conf.min_width = dev->min_width;
++		conf.min_height = dev->min_height;
++		conf.max_width = dev->max_width;
++		conf.max_height = dev->max_height;
++		conf.announce_all_caps = dev->announce_all_caps;
++		conf.max_buffers = dev->buffers_number;
++		conf.max_openers = dev->max_openers;
++		conf.debug = debug;
++		MARK();
++		if (copy_to_user((void *)parm, &conf, sizeof(conf))) {
++			ret = -EFAULT;
++			break;
++		}
++		MARK();
++		ret = 0;
++		;
++		break;
++	}
++
++	MARK();
++	mutex_unlock(&v4l2loopback_ctl_mutex);
++	MARK();
++	return ret;
++}
++
++/* LINUX KERNEL */
++
++static const struct file_operations v4l2loopback_ctl_fops = {
++	// clang-format off
++	.owner		= THIS_MODULE,
++	.open		= nonseekable_open,
++	.unlocked_ioctl	= v4l2loopback_control_ioctl,
++	.compat_ioctl	= v4l2loopback_control_ioctl,
++	.llseek		= noop_llseek,
++	// clang-format on
++};
++
++static struct miscdevice v4l2loopback_misc = {
++	// clang-format off
++	.minor		= MISC_DYNAMIC_MINOR,
++	.name		= "v4l2loopback",
++	.fops		= &v4l2loopback_ctl_fops,
++	// clang-format on
++};
++
++static const struct v4l2_file_operations v4l2_loopback_fops = {
++	// clang-format off
++	.owner		= THIS_MODULE,
++	.open		= v4l2_loopback_open,
++	.release	= v4l2_loopback_close,
++	.read		= v4l2_loopback_read,
++	.write		= v4l2_loopback_write,
++	.poll		= v4l2_loopback_poll,
++	.mmap		= v4l2_loopback_mmap,
++	.unlocked_ioctl	= video_ioctl2,
++	// clang-format on
++};
++
++static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = {
++	// clang-format off
++	.vidioc_querycap		= &vidioc_querycap,
++	.vidioc_enum_framesizes		= &vidioc_enum_framesizes,
++	.vidioc_enum_frameintervals	= &vidioc_enum_frameintervals,
++
++	.vidioc_enum_output		= &vidioc_enum_output,
++	.vidioc_g_output		= &vidioc_g_output,
++	.vidioc_s_output		= &vidioc_s_output,
++
++	.vidioc_enum_input		= &vidioc_enum_input,
++	.vidioc_g_input			= &vidioc_g_input,
++	.vidioc_s_input			= &vidioc_s_input,
++
++	.vidioc_enum_fmt_vid_cap	= &vidioc_enum_fmt_cap,
++	.vidioc_g_fmt_vid_cap		= &vidioc_g_fmt_cap,
++	.vidioc_s_fmt_vid_cap		= &vidioc_s_fmt_cap,
++	.vidioc_try_fmt_vid_cap		= &vidioc_try_fmt_cap,
++
++	.vidioc_enum_fmt_vid_out	= &vidioc_enum_fmt_out,
++	.vidioc_s_fmt_vid_out		= &vidioc_s_fmt_out,
++	.vidioc_g_fmt_vid_out		= &vidioc_g_fmt_out,
++	.vidioc_try_fmt_vid_out		= &vidioc_try_fmt_out,
++
++#ifdef V4L2L_OVERLAY
++	.vidioc_s_fmt_vid_overlay	= &vidioc_s_fmt_overlay,
++	.vidioc_g_fmt_vid_overlay	= &vidioc_g_fmt_overlay,
++#endif
++
++#ifdef V4L2LOOPBACK_WITH_STD
++	.vidioc_s_std			= &vidioc_s_std,
++	.vidioc_g_std			= &vidioc_g_std,
++	.vidioc_querystd		= &vidioc_querystd,
++#endif /* V4L2LOOPBACK_WITH_STD */
++
++	.vidioc_g_parm			= &vidioc_g_parm,
++	.vidioc_s_parm			= &vidioc_s_parm,
++
++	.vidioc_reqbufs			= &vidioc_reqbufs,
++	.vidioc_querybuf		= &vidioc_querybuf,
++	.vidioc_qbuf			= &vidioc_qbuf,
++	.vidioc_dqbuf			= &vidioc_dqbuf,
++
++	.vidioc_streamon		= &vidioc_streamon,
++	.vidioc_streamoff		= &vidioc_streamoff,
++
++#ifdef CONFIG_VIDEO_V4L1_COMPAT
++	.vidiocgmbuf			= &vidiocgmbuf,
++#endif
++
++	.vidioc_subscribe_event		= &vidioc_subscribe_event,
++	.vidioc_unsubscribe_event	= &v4l2_event_unsubscribe,
++	// clang-format on
++};
++
++static int free_device_cb(int id, void *ptr, void *data)
++{
++	struct v4l2_loopback_device *dev = ptr;
++	v4l2_loopback_remove(dev);
++	return 0;
++}
++static void free_devices(void)
++{
++	idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL);
++	idr_destroy(&v4l2loopback_index_idr);
++}
++
++static int __init v4l2loopback_init_module(void)
++{
++	const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH;
++	const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT;
++	int err;
++	int i;
++	MARK();
++
++	err = misc_register(&v4l2loopback_misc);
++	if (err < 0)
++		return err;
++
++	if (devices < 0) {
++		devices = 1;
++
++		/* try guessing the devices from the "video_nr" parameter */
++		for (i = MAX_DEVICES - 1; i >= 0; i--) {
++			if (video_nr[i] >= 0) {
++				devices = i + 1;
++				break;
++			}
++		}
++	}
++
++	if (devices > MAX_DEVICES) {
++		devices = MAX_DEVICES;
++		printk(KERN_INFO
++		       "v4l2loopback: number of initial devices is limited to: %d\n",
++		       MAX_DEVICES);
++	}
++
++	if (max_buffers > MAX_BUFFERS) {
++		max_buffers = MAX_BUFFERS;
++		printk(KERN_INFO
++		       "v4l2loopback: number of buffers is limited to: %d\n",
++		       MAX_BUFFERS);
++	}
++
++	if (max_openers < 0) {
++		printk(KERN_INFO
++		       "v4l2loopback: allowing %d openers rather than %d\n",
++		       2, max_openers);
++		max_openers = 2;
++	}
++
++	if (max_width < min_width) {
++		max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH;
++		printk(KERN_INFO "v4l2loopback: using max_width %d\n",
++		       max_width);
++	}
++	if (max_height < min_height) {
++		max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT;
++		printk(KERN_INFO "v4l2loopback: using max_height %d\n",
++		       max_height);
++	}
++
++	for (i = 0; i < devices; i++) {
++		struct v4l2_loopback_config cfg = {
++			// clang-format off
++			.output_nr		= video_nr[i],
++#ifdef SPLIT_DEVICES
++			.capture_nr		= video_nr[i],
++#endif
++			.min_width		= min_width,
++			.min_height		= min_height,
++			.max_width		= max_width,
++			.max_height		= max_height,
++			.announce_all_caps	= (!exclusive_caps[i]),
++			.max_buffers		= max_buffers,
++			.max_openers		= max_openers,
++			.debug			= debug,
++			// clang-format on
++		};
++		cfg.card_label[0] = 0;
++		if (card_label[i])
++			snprintf(cfg.card_label, sizeof(cfg.card_label), "%s",
++				 card_label[i]);
++		err = v4l2_loopback_add(&cfg, 0);
++		if (err) {
++			free_devices();
++			goto error;
++		}
++	}
++
++	dprintk("module installed\n");
++
++	printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n",
++	       // clang-format off
++	       (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff,
++	       (V4L2LOOPBACK_VERSION_CODE >>  8) & 0xff,
++	       (V4L2LOOPBACK_VERSION_CODE      ) & 0xff,
++#ifdef SNAPSHOT_VERSION
++	       " (" __stringify(SNAPSHOT_VERSION) ")"
++#else
++	       ""
++#endif
++	       );
++	// clang-format on
++
++	return 0;
++error:
++	misc_deregister(&v4l2loopback_misc);
++	return err;
++}
++
++static void v4l2loopback_cleanup_module(void)
++{
++	MARK();
++	/* unregister the device -> it deletes /dev/video* */
++	free_devices();
++	/* and get rid of /dev/v4l2loopback */
++	misc_deregister(&v4l2loopback_misc);
++	dprintk("module removed\n");
++}
++
++MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
++
++module_init(v4l2loopback_init_module);
++module_exit(v4l2loopback_cleanup_module);
+diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h
+new file mode 100644
+index 000000000000..1bc7e6b747a4
+--- /dev/null
++++ b/drivers/media/v4l2-core/v4l2loopback.h
+@@ -0,0 +1,98 @@
++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
++/*
++ * v4l2loopback.h
++ *
++ * Written by IOhannes m zmölnig, 7/1/20.
++ *
++ * Copyright 2020 by IOhannes m zmölnig.  Redistribution of this file is
++ * permitted under the GNU General Public License.
++ */
++#ifndef _V4L2LOOPBACK_H
++#define _V4L2LOOPBACK_H
++
++#define V4L2LOOPBACK_VERSION_MAJOR 0
++#define V4L2LOOPBACK_VERSION_MINOR 13
++#define V4L2LOOPBACK_VERSION_BUGFIX 1
++
++/* /dev/v4l2loopback interface */
++
++struct v4l2_loopback_config {
++	/**
++         * the device-number (/dev/video<nr>)
++         * V4L2LOOPBACK_CTL_ADD:
++         * setting this to a value<0, will allocate an available one
++         * if nr>=0 and the device already exists, the ioctl will EEXIST
++         * if output_nr and capture_nr are the same, only a single device will be created
++	 * NOTE: currently split-devices (where output_nr and capture_nr differ)
++	 *   are not implemented yet.
++	 *   until then, requesting different device-IDs will result in EINVAL.
++         *
++         * V4L2LOOPBACK_CTL_QUERY:
++         * either both output_nr and capture_nr must refer to the same loopback,
++         * or one (and only one) of them must be -1
++         *
++         */
++	int output_nr;
++	int unused; /*capture_nr;*/
++
++	/**
++         * a nice name for your device
++         * if (*card_label)==0, an automatic name is assigned
++         */
++	char card_label[32];
++
++	/**
++         * allowed frame size
++         * if too low, default values are used
++         */
++	unsigned int min_width;
++	unsigned int max_width;
++	unsigned int min_height;
++	unsigned int max_height;
++
++	/**
++         * number of buffers to allocate for the queue
++         * if set to <=0, default values are used
++         */
++	int max_buffers;
++
++	/**
++         * how many consumers are allowed to open this device concurrently
++         * if set to <=0, default values are used
++         */
++	int max_openers;
++
++	/**
++         * set the debugging level for this device
++         */
++	int debug;
++
++	/**
++         * whether to announce OUTPUT/CAPTURE capabilities exclusively
++         * for this device or not
++         * (!exclusive_caps)
++	 * NOTE: this is going to be removed once separate output/capture
++	 *       devices are implemented
++         */
++	int announce_all_caps;
++};
++
++/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the
++ * to-be-created device set.
++ * if the ptr is NULL, a new device is created with default values at the driver's discretion.
++ *
++ * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY,
++ * to get more information on the device)
++ */
++#define V4L2LOOPBACK_CTL_ADD 0x4C80
++
++/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set
++ * (the two values must either refer to video-devices associated with the same loopback device
++ *  or exactly one of them must be <0
++ */
++#define V4L2LOOPBACK_CTL_QUERY 0x4C82
++
++/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */
++#define V4L2LOOPBACK_CTL_REMOVE 0x4C81
++
++#endif /* _V4L2LOOPBACK_H */
+diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h
+new file mode 100644
+index 000000000000..d855a3796554
+--- /dev/null
++++ b/drivers/media/v4l2-core/v4l2loopback_formats.h
+@@ -0,0 +1,445 @@
++static const struct v4l2l_format formats[] = {
++#ifndef V4L2_PIX_FMT_VP9
++#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0')
++#endif
++#ifndef V4L2_PIX_FMT_HEVC
++#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C')
++#endif
++
++	/* here come the packed formats */
++	{
++		.name = "32 bpp RGB, le",
++		.fourcc = V4L2_PIX_FMT_BGR32,
++		.depth = 32,
++		.flags = 0,
++	},
++	{
++		.name = "32 bpp RGB, be",
++		.fourcc = V4L2_PIX_FMT_RGB32,
++		.depth = 32,
++		.flags = 0,
++	},
++	{
++		.name = "24 bpp RGB, le",
++		.fourcc = V4L2_PIX_FMT_BGR24,
++		.depth = 24,
++		.flags = 0,
++	},
++	{
++		.name = "24 bpp RGB, be",
++		.fourcc = V4L2_PIX_FMT_RGB24,
++		.depth = 24,
++		.flags = 0,
++	},
++#ifdef V4L2_PIX_FMT_ABGR32
++	{
++		.name = "32 bpp RGBA, le",
++		.fourcc = V4L2_PIX_FMT_ABGR32,
++		.depth = 32,
++		.flags = 0,
++	},
++#endif
++#ifdef V4L2_PIX_FMT_RGBA32
++	{
++		.name = "32 bpp RGBA",
++		.fourcc = V4L2_PIX_FMT_RGBA32,
++		.depth = 32,
++		.flags = 0,
++	},
++#endif
++#ifdef V4L2_PIX_FMT_RGB332
++	{
++		.name = "8 bpp RGB-3-3-2",
++		.fourcc = V4L2_PIX_FMT_RGB332,
++		.depth = 8,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB332 */
++#ifdef V4L2_PIX_FMT_RGB444
++	{
++		.name = "16 bpp RGB (xxxxrrrr ggggbbbb)",
++		.fourcc = V4L2_PIX_FMT_RGB444,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB444 */
++#ifdef V4L2_PIX_FMT_RGB555
++	{
++		.name = "16 bpp RGB-5-5-5",
++		.fourcc = V4L2_PIX_FMT_RGB555,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB555 */
++#ifdef V4L2_PIX_FMT_RGB565
++	{
++		.name = "16 bpp RGB-5-6-5",
++		.fourcc = V4L2_PIX_FMT_RGB565,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB565 */
++#ifdef V4L2_PIX_FMT_RGB555X
++	{
++		.name = "16 bpp RGB-5-5-5 BE",
++		.fourcc = V4L2_PIX_FMT_RGB555X,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB555X */
++#ifdef V4L2_PIX_FMT_RGB565X
++	{
++		.name = "16 bpp RGB-5-6-5 BE",
++		.fourcc = V4L2_PIX_FMT_RGB565X,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_RGB565X */
++#ifdef V4L2_PIX_FMT_BGR666
++	{
++		.name = "18 bpp BGR-6-6-6",
++		.fourcc = V4L2_PIX_FMT_BGR666,
++		.depth = 18,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_BGR666 */
++	{
++		.name = "4:2:2, packed, YUYV",
++		.fourcc = V4L2_PIX_FMT_YUYV,
++		.depth = 16,
++		.flags = 0,
++	},
++	{
++		.name = "4:2:2, packed, UYVY",
++		.fourcc = V4L2_PIX_FMT_UYVY,
++		.depth = 16,
++		.flags = 0,
++	},
++#ifdef V4L2_PIX_FMT_YVYU
++	{
++		.name = "4:2:2, packed YVYU",
++		.fourcc = V4L2_PIX_FMT_YVYU,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif
++#ifdef V4L2_PIX_FMT_VYUY
++	{
++		.name = "4:2:2, packed VYUY",
++		.fourcc = V4L2_PIX_FMT_VYUY,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif
++	{
++		.name = "4:2:2, packed YYUV",
++		.fourcc = V4L2_PIX_FMT_YYUV,
++		.depth = 16,
++		.flags = 0,
++	},
++	{
++		.name = "YUV-8-8-8-8",
++		.fourcc = V4L2_PIX_FMT_YUV32,
++		.depth = 32,
++		.flags = 0,
++	},
++	{
++		.name = "8 bpp, Greyscale",
++		.fourcc = V4L2_PIX_FMT_GREY,
++		.depth = 8,
++		.flags = 0,
++	},
++#ifdef V4L2_PIX_FMT_Y4
++	{
++		.name = "4 bpp Greyscale",
++		.fourcc = V4L2_PIX_FMT_Y4,
++		.depth = 4,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_Y4 */
++#ifdef V4L2_PIX_FMT_Y6
++	{
++		.name = "6 bpp Greyscale",
++		.fourcc = V4L2_PIX_FMT_Y6,
++		.depth = 6,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_Y6 */
++#ifdef V4L2_PIX_FMT_Y10
++	{
++		.name = "10 bpp Greyscale",
++		.fourcc = V4L2_PIX_FMT_Y10,
++		.depth = 10,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_Y10 */
++#ifdef V4L2_PIX_FMT_Y12
++	{
++		.name = "12 bpp Greyscale",
++		.fourcc = V4L2_PIX_FMT_Y12,
++		.depth = 12,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_Y12 */
++	{
++		.name = "16 bpp, Greyscale",
++		.fourcc = V4L2_PIX_FMT_Y16,
++		.depth = 16,
++		.flags = 0,
++	},
++#ifdef V4L2_PIX_FMT_YUV444
++	{
++		.name = "16 bpp xxxxyyyy uuuuvvvv",
++		.fourcc = V4L2_PIX_FMT_YUV444,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_YUV444 */
++#ifdef V4L2_PIX_FMT_YUV555
++	{
++		.name = "16 bpp YUV-5-5-5",
++		.fourcc = V4L2_PIX_FMT_YUV555,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_YUV555 */
++#ifdef V4L2_PIX_FMT_YUV565
++	{
++		.name = "16 bpp YUV-5-6-5",
++		.fourcc = V4L2_PIX_FMT_YUV565,
++		.depth = 16,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_YUV565 */
++
++/* bayer formats */
++#ifdef V4L2_PIX_FMT_SRGGB8
++	{
++		.name = "Bayer RGGB 8bit",
++		.fourcc = V4L2_PIX_FMT_SRGGB8,
++		.depth = 8,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_SRGGB8 */
++#ifdef V4L2_PIX_FMT_SGRBG8
++	{
++		.name = "Bayer GRBG 8bit",
++		.fourcc = V4L2_PIX_FMT_SGRBG8,
++		.depth = 8,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_SGRBG8 */
++#ifdef V4L2_PIX_FMT_SGBRG8
++	{
++		.name = "Bayer GBRG 8bit",
++		.fourcc = V4L2_PIX_FMT_SGBRG8,
++		.depth = 8,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_SGBRG8 */
++#ifdef V4L2_PIX_FMT_SBGGR8
++	{
++		.name = "Bayer BA81 8bit",
++		.fourcc = V4L2_PIX_FMT_SBGGR8,
++		.depth = 8,
++		.flags = 0,
++	},
++#endif /* V4L2_PIX_FMT_SBGGR8 */
++
++	/* here come the planar formats */
++	{
++		.name = "4:1:0, planar, Y-Cr-Cb",
++		.fourcc = V4L2_PIX_FMT_YVU410,
++		.depth = 9,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++	{
++		.name = "4:2:0, planar, Y-Cr-Cb",
++		.fourcc = V4L2_PIX_FMT_YVU420,
++		.depth = 12,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++	{
++		.name = "4:1:0, planar, Y-Cb-Cr",
++		.fourcc = V4L2_PIX_FMT_YUV410,
++		.depth = 9,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++	{
++		.name = "4:2:0, planar, Y-Cb-Cr",
++		.fourcc = V4L2_PIX_FMT_YUV420,
++		.depth = 12,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++#ifdef V4L2_PIX_FMT_YUV422P
++	{
++		.name = "16 bpp YVU422 planar",
++		.fourcc = V4L2_PIX_FMT_YUV422P,
++		.depth = 16,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++#endif /* V4L2_PIX_FMT_YUV422P */
++#ifdef V4L2_PIX_FMT_YUV411P
++	{
++		.name = "16 bpp YVU411 planar",
++		.fourcc = V4L2_PIX_FMT_YUV411P,
++		.depth = 16,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++#endif /* V4L2_PIX_FMT_YUV411P */
++#ifdef V4L2_PIX_FMT_Y41P
++	{
++		.name = "12 bpp YUV 4:1:1",
++		.fourcc = V4L2_PIX_FMT_Y41P,
++		.depth = 12,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++#endif /* V4L2_PIX_FMT_Y41P */
++#ifdef V4L2_PIX_FMT_NV12
++	{
++		.name = "12 bpp Y/CbCr 4:2:0 ",
++		.fourcc = V4L2_PIX_FMT_NV12,
++		.depth = 12,
++		.flags = FORMAT_FLAGS_PLANAR,
++	},
++#endif /* V4L2_PIX_FMT_NV12 */
++
++/* here come the compressed formats */
++
++#ifdef V4L2_PIX_FMT_MJPEG
++	{
++		.name = "Motion-JPEG",
++		.fourcc = V4L2_PIX_FMT_MJPEG,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_MJPEG */
++#ifdef V4L2_PIX_FMT_JPEG
++	{
++		.name = "JFIF JPEG",
++		.fourcc = V4L2_PIX_FMT_JPEG,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_JPEG */
++#ifdef V4L2_PIX_FMT_DV
++	{
++		.name = "DV1394",
++		.fourcc = V4L2_PIX_FMT_DV,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_DV */
++#ifdef V4L2_PIX_FMT_MPEG
++	{
++		.name = "MPEG-1/2/4 Multiplexed",
++		.fourcc = V4L2_PIX_FMT_MPEG,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_MPEG */
++#ifdef V4L2_PIX_FMT_H264
++	{
++		.name = "H264 with start codes",
++		.fourcc = V4L2_PIX_FMT_H264,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_H264 */
++#ifdef V4L2_PIX_FMT_H264_NO_SC
++	{
++		.name = "H264 without start codes",
++		.fourcc = V4L2_PIX_FMT_H264_NO_SC,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_H264_NO_SC */
++#ifdef V4L2_PIX_FMT_H264_MVC
++	{
++		.name = "H264 MVC",
++		.fourcc = V4L2_PIX_FMT_H264_MVC,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_H264_MVC */
++#ifdef V4L2_PIX_FMT_H263
++	{
++		.name = "H263",
++		.fourcc = V4L2_PIX_FMT_H263,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_H263 */
++#ifdef V4L2_PIX_FMT_MPEG1
++	{
++		.name = "MPEG-1 ES",
++		.fourcc = V4L2_PIX_FMT_MPEG1,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_MPEG1 */
++#ifdef V4L2_PIX_FMT_MPEG2
++	{
++		.name = "MPEG-2 ES",
++		.fourcc = V4L2_PIX_FMT_MPEG2,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_MPEG2 */
++#ifdef V4L2_PIX_FMT_MPEG4
++	{
++		.name = "MPEG-4 part 2 ES",
++		.fourcc = V4L2_PIX_FMT_MPEG4,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_MPEG4 */
++#ifdef V4L2_PIX_FMT_XVID
++	{
++		.name = "Xvid",
++		.fourcc = V4L2_PIX_FMT_XVID,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_XVID */
++#ifdef V4L2_PIX_FMT_VC1_ANNEX_G
++	{
++		.name = "SMPTE 421M Annex G compliant stream",
++		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_G,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */
++#ifdef V4L2_PIX_FMT_VC1_ANNEX_L
++	{
++		.name = "SMPTE 421M Annex L compliant stream",
++		.fourcc = V4L2_PIX_FMT_VC1_ANNEX_L,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */
++#ifdef V4L2_PIX_FMT_VP8
++	{
++		.name = "VP8",
++		.fourcc = V4L2_PIX_FMT_VP8,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_VP8 */
++#ifdef V4L2_PIX_FMT_VP9
++	{
++		.name = "VP9",
++		.fourcc = V4L2_PIX_FMT_VP9,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_VP9 */
++#ifdef V4L2_PIX_FMT_HEVC
++	{
++		.name = "HEVC",
++		.fourcc = V4L2_PIX_FMT_HEVC,
++		.depth = 32,
++		.flags = FORMAT_FLAGS_COMPRESSED,
++	},
++#endif /* V4L2_PIX_FMT_HEVC */
++};
+diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
+index 038ccbd9e3ba..de5e4f5145af 100644
+--- a/drivers/pci/controller/Makefile
++++ b/drivers/pci/controller/Makefile
+@@ -1,4 +1,10 @@
+ # SPDX-License-Identifier: GPL-2.0
++ifdef CONFIG_X86_64
++ifdef CONFIG_SATA_AHCI
++obj-y += intel-nvme-remap.o
++endif
++endif
++
+ obj-$(CONFIG_PCIE_CADENCE) += cadence/
+ obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
+ obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
+diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c
+new file mode 100644
+index 000000000000..e105e6f5cc91
+--- /dev/null
++++ b/drivers/pci/controller/intel-nvme-remap.c
+@@ -0,0 +1,462 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Intel remapped NVMe device support.
++ *
++ * Copyright (c) 2019 Endless Mobile, Inc.
++ * Author: Daniel Drake <drake@endlessm.com>
++ *
++ * Some products ship by default with the SATA controller in "RAID" or
++ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this
++ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe
++ * devices disappear from the PCI bus, and instead their I/O memory becomes
++ * available within the AHCI device BARs.
++ *
++ * This scheme is understood to be a way of avoiding usage of the standard
++ * Windows NVMe driver under that OS, instead mandating usage of Intel's
++ * driver instead, which has better power management, and presumably offers
++ * some RAID/disk-caching solutions too.
++ *
++ * Here in this driver, we support the remapped NVMe mode by claiming the
++ * AHCI device and creating a fake PCIe root port. On the new bus, the
++ * original AHCI device is exposed with only minor tweaks. Then, fake PCI
++ * devices corresponding to the remapped NVMe devices are created. The usual
++ * ahci and nvme drivers are then expected to bind to these devices and
++ * operate as normal.
++ *
++ * The PCI configuration space for the NVMe devices is completely
++ * unavailable, so we fake a minimal one and hope for the best.
++ *
++ * Interrupts are shared between the AHCI and NVMe devices. For simplicity,
++ * we only support the legacy interrupt here, although MSI support
++ * could potentially be added later.
++ */
++
++#define MODULE_NAME "intel-nvme-remap"
++
++#include <linux/ahci-remap.h>
++#include <linux/irq.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++
++#define AHCI_PCI_BAR_STANDARD 5
++
++struct nvme_remap_dev {
++	struct pci_dev		*dev;		/* AHCI device */
++	struct pci_bus		*bus;		/* our fake PCI bus */
++	struct pci_sysdata	sysdata;
++	int			irq_base;	/* our fake interrupts */
++
++	/*
++	 * When we detect an all-ones write to a BAR register, this flag
++	 * is set, so that we return the BAR size on the next read (a
++	 * standard PCI behaviour).
++	 * This includes the assumption that an all-ones BAR write is
++	 * immediately followed by a read of the same register.
++	 */
++	bool			bar_sizing;
++
++	/*
++	 * Resources copied from the AHCI device, to be regarded as
++	 * resources on our fake bus.
++	 */
++	struct resource		ahci_resources[PCI_NUM_RESOURCES];
++
++	/* Resources corresponding to the NVMe devices. */
++	struct resource		remapped_dev_mem[AHCI_MAX_REMAP];
++
++	/* Number of remapped NVMe devices found. */
++	int			num_remapped_devices;
++};
++
++static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus)
++{
++	return container_of(bus->sysdata, struct nvme_remap_dev, sysdata);
++}
++
++
++/******** PCI configuration space **********/
++
++/*
++ * Helper macros for tweaking returned contents of PCI configuration space.
++ *
++ * value contains len bytes of data read from reg.
++ * If fixup_reg is included in that range, fix up the contents of that
++ * register to fixed_value.
++ */
++#define NR_FIX8(fixup_reg, fixed_value) do { \
++		if (reg <= fixup_reg && fixup_reg < reg + len) \
++			((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \
++	} while (0)
++
++#define NR_FIX16(fixup_reg, fixed_value) do { \
++		NR_FIX8(fixup_reg, fixed_value); \
++		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
++	} while (0)
++
++#define NR_FIX24(fixup_reg, fixed_value) do { \
++		NR_FIX8(fixup_reg, fixed_value); \
++		NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
++		NR_FIX8(fixup_reg + 2, fixed_value >> 16); \
++	} while (0)
++
++#define NR_FIX32(fixup_reg, fixed_value) do { \
++		NR_FIX16(fixup_reg, (u16) fixed_value); \
++		NR_FIX16(fixup_reg + 2, fixed_value >> 16); \
++	} while (0)
++
++/*
++ * Read PCI config space of the slot 0 (AHCI) device.
++ * We pass through the read request to the underlying device, but
++ * tweak the results in some cases.
++ */
++static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg,
++				     int len, u32 *value)
++{
++	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
++	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
++	int ret;
++
++	ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn,
++				      reg, len, value);
++	if (ret)
++		return ret;
++
++	/*
++	 * Adjust the device class, to prevent this driver from attempting to
++	 * additionally probe the device we're simulating here.
++	 */
++	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI);
++
++	/*
++	 * Unset interrupt pin, otherwise ACPI tries to find routing
++	 * info for our virtual IRQ, fails, and complains.
++	 */
++	NR_FIX8(PCI_INTERRUPT_PIN, 0);
++
++	/*
++	 * Truncate the AHCI BAR to not include the region that covers the
++	 * hidden devices. This will cause the ahci driver to successfully
++	 * probe th new device (instead of handing it over to this driver).
++	 */
++	if (nrdev->bar_sizing) {
++		NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1));
++		nrdev->bar_sizing = false;
++	}
++
++	return PCIBIOS_SUCCESSFUL;
++}
++
++/*
++ * Read PCI config space of a remapped device.
++ * Since the original PCI config space is inaccessible, we provide a minimal,
++ * fake config space instead.
++ */
++static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port,
++					int reg, int len, u32 *value)
++{
++	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
++	struct resource *remapped_mem;
++
++	if (port > nrdev->num_remapped_devices)
++		return PCIBIOS_DEVICE_NOT_FOUND;
++
++	*value = 0;
++	remapped_mem = &nrdev->remapped_dev_mem[port - 1];
++
++	/* Set a Vendor ID, otherwise Linux assumes no device is present */
++	NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL);
++
++	/* Always appear on & bus mastering */
++	NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
++
++	/* Set class so that nvme driver probes us */
++	NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS);
++
++	if (nrdev->bar_sizing) {
++		NR_FIX32(PCI_BASE_ADDRESS_0,
++			 ~(resource_size(remapped_mem) - 1));
++		nrdev->bar_sizing = false;
++	} else {
++		resource_size_t mem_start = remapped_mem->start;
++
++		mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64;
++		NR_FIX32(PCI_BASE_ADDRESS_0, mem_start);
++		mem_start >>= 32;
++		NR_FIX32(PCI_BASE_ADDRESS_1, mem_start);
++	}
++
++	return PCIBIOS_SUCCESSFUL;
++}
++
++/* Read PCI configuration space. */
++static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn,
++			       int reg, int len, u32 *value)
++{
++	if (PCI_SLOT(devfn) == 0)
++		return nvme_remap_pci_read_slot0(bus, reg, len, value);
++	else
++		return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn),
++						    reg, len, value);
++}
++
++/*
++ * Write PCI config space of the slot 0 (AHCI) device.
++ * Apart from the special case of BAR sizing, we disable all writes.
++ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master)
++ * that would affect the operation of the NVMe devices.
++ */
++static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg,
++				      int len, u32 value)
++{
++	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
++	struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
++
++	if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) {
++		/*
++		 * Writing all-ones to a BAR means that the size of the
++		 * memory region is being checked. Flag this so that we can
++		 * reply with an appropriate size on the next read.
++		 */
++		if (value == ~0)
++			nrdev->bar_sizing = true;
++
++		return ahci_dev_bus->ops->write(ahci_dev_bus,
++						nrdev->dev->devfn,
++						reg, len, value);
++	}
++
++	return PCIBIOS_SET_FAILED;
++}
++
++/*
++ * Write PCI config space of a remapped device.
++ * Since the original PCI config space is inaccessible, we reject all
++ * writes, except for the special case of BAR probing.
++ */
++static int nvme_remap_pci_write_remapped(struct pci_bus *bus,
++					 unsigned int port,
++					 int reg, int len, u32 value)
++{
++	struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
++
++	if (port > nrdev->num_remapped_devices)
++		return PCIBIOS_DEVICE_NOT_FOUND;
++
++	/*
++	 * Writing all-ones to a BAR means that the size of the memory
++	 * region is being checked. Flag this so that we can reply with
++	 * an appropriate size on the next read.
++	 */
++	if (value == ~0 && reg >= PCI_BASE_ADDRESS_0
++			&& reg <= PCI_BASE_ADDRESS_5) {
++		nrdev->bar_sizing = true;
++		return PCIBIOS_SUCCESSFUL;
++	}
++
++	return PCIBIOS_SET_FAILED;
++}
++
++/* Write PCI configuration space. */
++static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn,
++				int reg, int len, u32 value)
++{
++	if (PCI_SLOT(devfn) == 0)
++		return nvme_remap_pci_write_slot0(bus, reg, len, value);
++	else
++		return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn),
++						     reg, len, value);
++}
++
++static struct pci_ops nvme_remap_pci_ops = {
++	.read	= nvme_remap_pci_read,
++	.write	= nvme_remap_pci_write,
++};
++
++
++/******** Initialization & exit **********/
++
++/*
++ * Find a PCI domain ID to use for our fake bus.
++ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits).
++ */
++static int find_free_domain(void)
++{
++	int domain = 0xffff;
++	struct pci_bus *bus = NULL;
++
++	while ((bus = pci_find_next_bus(bus)) != NULL)
++		domain = max_t(int, domain, pci_domain_nr(bus));
++
++	return domain + 1;
++}
++
++static int find_remapped_devices(struct nvme_remap_dev *nrdev,
++				 struct list_head *resources)
++{
++	void __iomem *mmio;
++	int i, count = 0;
++	u32 cap;
++
++	mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD,
++			  pci_resource_len(nrdev->dev,
++					   AHCI_PCI_BAR_STANDARD));
++	if (!mmio)
++		return -ENODEV;
++
++	/* Check if this device might have remapped nvme devices. */
++	if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K ||
++	    !(readl(mmio + AHCI_VSCAP) & 1))
++		return -ENODEV;
++
++	cap = readq(mmio + AHCI_REMAP_CAP);
++	for (i = AHCI_MAX_REMAP-1; i >= 0; i--) {
++		struct resource *remapped_mem;
++
++		if ((cap & (1 << i)) == 0)
++			continue;
++		if (readl(mmio + ahci_remap_dcc(i))
++				!= PCI_CLASS_STORAGE_EXPRESS)
++			continue;
++
++		/* We've found a remapped device */
++		remapped_mem = &nrdev->remapped_dev_mem[count++];
++		remapped_mem->start =
++			pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD)
++			+ ahci_remap_base(i);
++		remapped_mem->end = remapped_mem->start
++			+ AHCI_REMAP_N_SIZE - 1;
++		remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED;
++		pci_add_resource(resources, remapped_mem);
++	}
++
++	pcim_iounmap(nrdev->dev, mmio);
++
++	if (count == 0)
++		return -ENODEV;
++
++	nrdev->num_remapped_devices = count;
++	dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n",
++		 nrdev->num_remapped_devices);
++	return 0;
++}
++
++static void nvme_remap_remove_root_bus(void *data)
++{
++	struct pci_bus *bus = data;
++
++	pci_stop_root_bus(bus);
++	pci_remove_root_bus(bus);
++}
++
++static int nvme_remap_probe(struct pci_dev *dev,
++			    const struct pci_device_id *id)
++{
++	struct nvme_remap_dev *nrdev;
++	LIST_HEAD(resources);
++	int i;
++	int ret;
++	struct pci_dev *child;
++
++	nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL);
++	nrdev->sysdata.domain = find_free_domain();
++	nrdev->sysdata.nvme_remap_dev = dev;
++	nrdev->dev = dev;
++	pci_set_drvdata(dev, nrdev);
++
++	ret = pcim_enable_device(dev);
++	if (ret < 0)
++		return ret;
++
++	pci_set_master(dev);
++
++	ret = find_remapped_devices(nrdev, &resources);
++	if (ret)
++		return ret;
++
++	/* Add resources from the original AHCI device */
++	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++		struct resource *res = &dev->resource[i];
++
++		if (res->start) {
++			struct resource *nr_res = &nrdev->ahci_resources[i];
++
++			nr_res->start = res->start;
++			nr_res->end = res->end;
++			nr_res->flags = res->flags;
++			pci_add_resource(&resources, nr_res);
++		}
++	}
++
++	/* Create virtual interrupts */
++	nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0,
++					       nrdev->num_remapped_devices + 1,
++					       0);
++	if (nrdev->irq_base < 0)
++		return nrdev->irq_base;
++
++	/* Create and populate PCI bus */
++	nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops,
++					 &nrdev->sysdata, &resources);
++	if (!nrdev->bus)
++		return -ENODEV;
++
++	if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus,
++				     nrdev->bus))
++		return -ENOMEM;
++
++	/* We don't support sharing MSI interrupts between these devices */
++	nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
++
++	pci_scan_child_bus(nrdev->bus);
++
++	list_for_each_entry(child, &nrdev->bus->devices, bus_list) {
++		/*
++		 * Prevent PCI core from trying to move memory BARs around.
++		 * The hidden NVMe devices are at fixed locations.
++		 */
++		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++			struct resource *res = &child->resource[i];
++
++			if (res->flags & IORESOURCE_MEM)
++				res->flags |= IORESOURCE_PCI_FIXED;
++		}
++
++		/* Share the legacy IRQ between all devices */
++		child->irq = dev->irq;
++	}
++
++	pci_assign_unassigned_bus_resources(nrdev->bus);
++	pci_bus_add_devices(nrdev->bus);
++
++	return 0;
++}
++
++static const struct pci_device_id nvme_remap_ids[] = {
++	/*
++	 * Match all Intel RAID controllers.
++	 *
++	 * There's overlap here with the set of devices detected by the ahci
++	 * driver, but ahci will only successfully probe when there
++	 * *aren't* any remapped NVMe devices, and this driver will only
++	 * successfully probe when there *are* remapped NVMe devices that
++	 * need handling.
++	 */
++	{
++		PCI_VDEVICE(INTEL, PCI_ANY_ID),
++		.class = PCI_CLASS_STORAGE_RAID << 8,
++		.class_mask = 0xffffff00,
++	},
++	{0,}
++};
++MODULE_DEVICE_TABLE(pci, nvme_remap_ids);
++
++static struct pci_driver nvme_remap_drv = {
++	.name		= MODULE_NAME,
++	.id_table	= nvme_remap_ids,
++	.probe		= nvme_remap_probe,
++};
++module_pci_driver(nvme_remap_drv);
++
++MODULE_AUTHOR("Daniel Drake <drake@endlessm.com>");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index dccb60c1d9cc..d9a8af789de8 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -3747,6 +3747,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
+ 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
+ }
+ 
++static bool acs_on_downstream;
++static bool acs_on_multifunction;
++
++#define NUM_ACS_IDS 16
++struct acs_on_id {
++	unsigned short vendor;
++	unsigned short device;
++};
++static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
++static u8 max_acs_id;
++
++static __init int pcie_acs_override_setup(char *p)
++{
++	if (!p)
++		return -EINVAL;
++
++	while (*p) {
++		if (!strncmp(p, "downstream", 10))
++			acs_on_downstream = true;
++		if (!strncmp(p, "multifunction", 13))
++			acs_on_multifunction = true;
++		if (!strncmp(p, "id:", 3)) {
++			char opt[5];
++			int ret;
++			long val;
++
++			if (max_acs_id >= NUM_ACS_IDS - 1) {
++				pr_warn("Out of PCIe ACS override slots (%d)\n",
++						NUM_ACS_IDS);
++				goto next;
++			}
++
++			p += 3;
++			snprintf(opt, 5, "%s", p);
++			ret = kstrtol(opt, 16, &val);
++			if (ret) {
++				pr_warn("PCIe ACS ID parse error %d\n", ret);
++				goto next;
++			}
++			acs_on_ids[max_acs_id].vendor = val;
++
++			p += strcspn(p, ":");
++			if (*p != ':') {
++				pr_warn("PCIe ACS invalid ID\n");
++				goto next;
++			}
++
++			p++;
++			snprintf(opt, 5, "%s", p);
++			ret = kstrtol(opt, 16, &val);
++			if (ret) {
++				pr_warn("PCIe ACS ID parse error %d\n", ret);
++				goto next;
++			}
++			acs_on_ids[max_acs_id].device = val;
++			max_acs_id++;
++		}
++next:
++		p += strcspn(p, ",");
++		if (*p == ',')
++			p++;
++	}
++
++	if (acs_on_downstream || acs_on_multifunction || max_acs_id)
++		pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
++
++	return 0;
++}
++early_param("pcie_acs_override", pcie_acs_override_setup);
++
++static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
++{
++	int i;
++
++	/* Never override ACS for legacy devices or devices with ACS caps */
++	if (!pci_is_pcie(dev) ||
++		pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
++			return -ENOTTY;
++
++	for (i = 0; i < max_acs_id; i++)
++		if (acs_on_ids[i].vendor == dev->vendor &&
++			acs_on_ids[i].device == dev->device)
++				return 1;
++
++	switch (pci_pcie_type(dev)) {
++	case PCI_EXP_TYPE_DOWNSTREAM:
++	case PCI_EXP_TYPE_ROOT_PORT:
++		if (acs_on_downstream)
++			return 1;
++		break;
++	case PCI_EXP_TYPE_ENDPOINT:
++	case PCI_EXP_TYPE_UPSTREAM:
++	case PCI_EXP_TYPE_LEG_END:
++	case PCI_EXP_TYPE_RC_END:
++		if (acs_on_multifunction && dev->multifunction)
++			return 1;
++	}
++
++	return -ENOTTY;
++}
+ /*
+  * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
+  * prevented for those affected devices.
+@@ -5168,6 +5268,7 @@ static const struct pci_dev_acs_enabled {
+ 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ 	/* Wangxun nics */
+ 	{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
++	{ PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
+ 	{ 0 }
+ };
+ 
+diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
+index aabec598f79a..7fe0981a7e46 100644
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
+ #define CPUFREQ_POLICY_POWERSAVE	(1)
+ #define CPUFREQ_POLICY_PERFORMANCE	(2)
+ 
+-/*
+- * The polling frequency depends on the capability of the processor. Default
+- * polling frequency is 1000 times the transition latency of the processor.
+- */
+-#define LATENCY_MULTIPLIER		(1000)
+-
+ struct cpufreq_governor {
+ 	char	name[CPUFREQ_NAME_LEN];
+ 	int	(*init)(struct cpufreq_policy *policy);
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index d9c7edb6422b..b57c72793580 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -1264,7 +1264,7 @@ struct readahead_control {
+ 		._index = i,						\
+ 	}
+ 
+-#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
++#define VM_READAHEAD_PAGES	(SZ_8M / PAGE_SIZE)
+ 
+ void page_cache_ra_unbounded(struct readahead_control *,
+ 		unsigned long nr_to_read, unsigned long lookahead_count);
+diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
+index 6030a8235617..60b7fe5fa74a 100644
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -156,6 +156,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
+ 
+ #ifdef CONFIG_USER_NS
+ 
++extern int unprivileged_userns_clone;
++
+ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+ {
+ 	if (ns)
+@@ -189,6 +191,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
+ struct ns_common *ns_get_owner(struct ns_common *ns);
+ #else
+ 
++#define unprivileged_userns_clone 0
++
+ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+ {
+ 	return &init_user_ns;
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index 8aa3372f21a0..924778a426ce 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
+ 
+ extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+ extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
++extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+ extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+ extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+ 
+@@ -1191,6 +1192,7 @@ do {										\
+  */
+ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+ bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
++void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+ long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
+diff --git a/init/Kconfig b/init/Kconfig
+index 5783a0b87517..08a0d51afaae 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -134,6 +134,10 @@ config THREAD_INFO_IN_TASK
+ 
+ menu "General setup"
+ 
++config CACHY
++    bool "Some kernel tweaks by CachyOS"
++    default y
++
+ config BROKEN
+ 	bool
+ 
+@@ -1265,6 +1269,22 @@ config USER_NS
+ 
+ 	  If unsure, say N.
+ 
++config USER_NS_UNPRIVILEGED
++	bool "Allow unprivileged users to create namespaces"
++	default y
++	depends on USER_NS
++	help
++	  When disabled, unprivileged users will not be able to create
++	  new namespaces. Allowing users to create their own namespaces
++	  has been part of several recent local privilege escalation
++	  exploits, so if you need user namespaces but are
++	  paranoid^Wsecurity-conscious you want to disable this.
++
++	  This setting can be overridden at runtime via the
++	  kernel.unprivileged_userns_clone sysctl.
++
++	  If unsure, say Y.
++
+ config PID_NS
+ 	bool "PID Namespaces"
+ 	default y
+@@ -1407,6 +1427,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
+ 	  with the "-O2" compiler flag for best performance and most
+ 	  helpful compile-time warnings.
+ 
++config CC_OPTIMIZE_FOR_PERFORMANCE_O3
++	bool "Optimize more for performance (-O3)"
++	help
++	  Choosing this option will pass "-O3" to your compiler to optimize
++	  the kernel yet more for performance.
++
+ config CC_OPTIMIZE_FOR_SIZE
+ 	bool "Optimize for size (-Os)"
+ 	help
+diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
+index 38ef6d06888e..0f78364efd4f 100644
+--- a/kernel/Kconfig.hz
++++ b/kernel/Kconfig.hz
+@@ -40,6 +40,27 @@ choice
+ 	 on SMP and NUMA systems and exactly dividing by both PAL and
+ 	 NTSC frame rates for video and multimedia work.
+ 
++	config HZ_500
++		bool "500 HZ"
++	help
++	 500 Hz is a balanced timer frequency. Provides fast interactivity
++	 on desktops with good smoothness without increasing CPU power
++	 consumption and sacrificing the battery life on laptops.
++
++	config HZ_600
++		bool "600 HZ"
++	help
++	 600 Hz is a balanced timer frequency. Provides fast interactivity
++	 on desktops with good smoothness without increasing CPU power
++	 consumption and sacrificing the battery life on laptops.
++
++	config HZ_750
++		bool "750 HZ"
++	help
++	 750 Hz is a balanced timer frequency. Provides fast interactivity
++	 on desktops with good smoothness without increasing CPU power
++	 consumption and sacrificing the battery life on laptops.
++
+ 	config HZ_1000
+ 		bool "1000 HZ"
+ 	help
+@@ -53,6 +74,9 @@ config HZ
+ 	default 100 if HZ_100
+ 	default 250 if HZ_250
+ 	default 300 if HZ_300
++	default 500 if HZ_500
++	default 600 if HZ_600
++	default 750 if HZ_750
+ 	default 1000 if HZ_1000
+ 
+ config SCHED_HRTICK
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 6b97fb2ac4af..003de4829c15 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -105,6 +105,10 @@
+ #include <uapi/linux/pidfd.h>
+ #include <linux/pidfs.h>
+ 
++#ifdef CONFIG_USER_NS
++#include <linux/user_namespace.h>
++#endif
++
+ #include <asm/pgalloc.h>
+ #include <linux/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -2135,6 +2139,10 @@ __latent_entropy struct task_struct *copy_process(
+ 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
+ 		return ERR_PTR(-EINVAL);
+ 
++	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
++		if (!capable(CAP_SYS_ADMIN))
++			return ERR_PTR(-EPERM);
++
+ 	/*
+ 	 * Thread groups must share signals as well, and detached threads
+ 	 * can only be started up within the thread group.
+@@ -3283,6 +3291,12 @@ int ksys_unshare(unsigned long unshare_flags)
+ 	if (unshare_flags & CLONE_NEWNS)
+ 		unshare_flags |= CLONE_FS;
+ 
++	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
++		err = -EPERM;
++		if (!capable(CAP_SYS_ADMIN))
++			goto bad_unshare_out;
++	}
++
+ 	err = check_unshare_flags(unshare_flags);
+ 	if (err)
+ 		goto bad_unshare_out;
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index 33cac79e3994..3277df47ab3c 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -749,6 +749,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
+ 	struct task_struct *new, *owner;
+ 	unsigned long flags, new_flags;
+ 	enum owner_state state;
++	int i = 0;
+ 
+ 	lockdep_assert_preemption_disabled();
+ 
+@@ -785,7 +786,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
+ 			break;
+ 		}
+ 
+-		cpu_relax();
++		if (i++ > 1000)
++			cpu_relax();
+ 	}
+ 
+ 	return state;
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 1d2cbdb162a6..91b242e47db7 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
+  *
+  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
+  */
++#ifdef CONFIG_CACHY
++unsigned int sysctl_sched_base_slice			= 350000ULL;
++static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
++#else
+ unsigned int sysctl_sched_base_slice			= 750000ULL;
+ static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
++#endif
+ 
++#ifdef CONFIG_CACHY
++const_debug unsigned int sysctl_sched_migration_cost	= 300000UL;
++#else
+ const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
++#endif
+ 
+ static int __init setup_sched_thermal_decay_shift(char *str)
+ {
+@@ -121,8 +130,12 @@ int __weak arch_asym_cpu_priority(int cpu)
+  *
+  * (default: 5 msec, units: microseconds)
+  */
++#ifdef CONFIG_CACHY
++static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
++#else
+ static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
+ #endif
++#endif
+ 
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 4c36cc680361..432b43aa091c 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2591,7 +2591,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+ 
+ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
+ 
+-#ifdef CONFIG_PREEMPT_RT
++#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY)
+ # define SCHED_NR_MIGRATE_BREAK 8
+ #else
+ # define SCHED_NR_MIGRATE_BREAK 32
+diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
+index 51e38f5f4701..c5cc616484ba 100644
+--- a/kernel/sched/wait.c
++++ b/kernel/sched/wait.c
+@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
+ }
+ EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+ 
++void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
++{
++	unsigned long flags;
++
++	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
++	spin_lock_irqsave(&wq_head->lock, flags);
++	__add_wait_queue(wq_head, wq_entry);
++	spin_unlock_irqrestore(&wq_head->lock, flags);
++}
++EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
++
+ void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+ {
+ 	unsigned long flags;
+@@ -258,6 +269,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
+ }
+ EXPORT_SYMBOL(prepare_to_wait_exclusive);
+ 
++void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
++{
++	unsigned long flags;
++
++	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
++	spin_lock_irqsave(&wq_head->lock, flags);
++	if (list_empty(&wq_entry->entry))
++		__add_wait_queue(wq_head, wq_entry);
++	set_current_state(state);
++	spin_unlock_irqrestore(&wq_head->lock, flags);
++}
++EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
++
+ void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
+ {
+ 	wq_entry->flags = flags;
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 79e6cb1d5c48..676e89dc38c3 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -80,6 +80,9 @@
+ #ifdef CONFIG_RT_MUTEXES
+ #include <linux/rtmutex.h>
+ #endif
++#ifdef CONFIG_USER_NS
++#include <linux/user_namespace.h>
++#endif
+ 
+ /* shared constants to be used in various sysctls */
+ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
+@@ -1618,6 +1621,15 @@ static struct ctl_table kern_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_dointvec,
+ 	},
++#ifdef CONFIG_USER_NS
++	{
++		.procname	= "unprivileged_userns_clone",
++		.data		= &unprivileged_userns_clone,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec,
++	},
++#endif
+ #ifdef CONFIG_PROC_SYSCTL
+ 	{
+ 		.procname	= "tainted",
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 0b0b95418b16..c4b835b91fc0 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -22,6 +22,13 @@
+ #include <linux/bsearch.h>
+ #include <linux/sort.h>
+ 
++/* sysctl */
++#ifdef CONFIG_USER_NS_UNPRIVILEGED
++int unprivileged_userns_clone = 1;
++#else
++int unprivileged_userns_clone;
++#endif
++
+ static struct kmem_cache *user_ns_cachep __ro_after_init;
+ static DEFINE_MUTEX(userns_state_mutex);
+ 
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 03395624bc70..676ff8d1266b 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -649,7 +649,7 @@ config COMPACTION
+ config COMPACT_UNEVICTABLE_DEFAULT
+ 	int
+ 	depends on COMPACTION
+-	default 0 if PREEMPT_RT
++	default 0 if PREEMPT_RT || CACHY
+ 	default 1
+ 
+ #
+diff --git a/mm/compaction.c b/mm/compaction.c
+index eb95e9b435d0..ae03cdc3e76e 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1950,7 +1950,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
+  * aggressively the kernel should compact memory in the
+  * background. It takes values in the range [0, 100].
+  */
++#ifdef CONFIG_CACHY
++static unsigned int __read_mostly sysctl_compaction_proactiveness;
++#else
+ static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
++#endif
+ static int sysctl_extfrag_threshold = 500;
+ static int __read_mostly sysctl_compact_memory;
+ 
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 99b146d16a18..4d2839fcf688 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
+ 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
+ #endif
++#ifdef CONFIG_CACHY
++	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
++#else
+ 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
++#endif
+ 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
+ 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+ 
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index 4430ac68e4c4..3bd08b60a9b3 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
+ /*
+  * Start background writeback (via writeback threads) at this percentage
+  */
++#ifdef CONFIG_CACHY
++static int dirty_background_ratio = 5;
++#else
+ static int dirty_background_ratio = 10;
++#endif
+ 
+ /*
+  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+@@ -99,7 +103,11 @@ static unsigned long vm_dirty_bytes;
+ /*
+  * The interval between `kupdate'-style writebacks
+  */
++#ifdef CONFIG_CACHY
++unsigned int dirty_writeback_interval = 10 * 100; /* centiseconds */
++#else
+ unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
++#endif
+ 
+ EXPORT_SYMBOL_GPL(dirty_writeback_interval);
+ 
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 91ace8ca97e2..f8b4dae35fc3 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -271,7 +271,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
+ 
+ int min_free_kbytes = 1024;
+ int user_min_free_kbytes = -1;
++#ifdef CONFIG_CACHY
++static int watermark_boost_factor __read_mostly;
++#else
+ static int watermark_boost_factor __read_mostly = 15000;
++#endif
+ static int watermark_scale_factor = 10;
+ 
+ /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
+diff --git a/mm/swap.c b/mm/swap.c
+index 9caf6b017cf0..00c1307f3874 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -1126,6 +1126,10 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
+  */
+ void __init swap_setup(void)
+ {
++#ifdef CONFIG_CACHY
++	/* Only swap-in pages requested, avoid readahead */
++	page_cluster = 0;
++#else
+ 	unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
+ 
+ 	/* Use a smaller cluster for small-memory machines */
+@@ -1137,4 +1141,5 @@ void __init swap_setup(void)
+ 	 * Right now other parts of the system means that we
+ 	 * _really_ don't want to cluster much more
+ 	 */
++#endif
+ }
+diff --git a/mm/vmpressure.c b/mm/vmpressure.c
+index bd5183dfd879..3a410f53a07c 100644
+--- a/mm/vmpressure.c
++++ b/mm/vmpressure.c
+@@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
+  * essence, they are percents: the higher the value, the more number
+  * unsuccessful reclaims there were.
+  */
++#ifdef CONFIG_CACHY
++static const unsigned int vmpressure_level_med = 65;
++#else
+ static const unsigned int vmpressure_level_med = 60;
++#endif
+ static const unsigned int vmpressure_level_critical = 95;
+ 
+ /*
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 128f307da6ee..35b67785907b 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -199,7 +199,11 @@ struct scan_control {
+ /*
+  * From 0 .. MAX_SWAPPINESS.  Higher means more swappy.
+  */
++#ifdef CONFIG_CACHY
++int vm_swappiness = 20;
++#else
+ int vm_swappiness = 60;
++#endif
+ 
+ #ifdef CONFIG_MEMCG
+ 
+@@ -3968,7 +3972,11 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
+ }
+ 
+ /* to protect the working set of the last N jiffies */
++#ifdef CONFIG_CACHY
++static unsigned long lru_gen_min_ttl __read_mostly = 1000;
++#else
+ static unsigned long lru_gen_min_ttl __read_mostly;
++#endif
+ 
+ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
+ {
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 64d07b842e73..a0ac138e7bf8 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -634,7 +634,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
+ 	 * having to remove and re-insert us on the wait queue.
+ 	 */
+ 	for (;;) {
+-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
++		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
+ 					  TASK_INTERRUPTIBLE);
+ 		release_sock(sk);
+ 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
+diff --git a/scripts/Makefile.package b/scripts/Makefile.package
+index 4a80584ec771..11d53f240a2b 100644
+--- a/scripts/Makefile.package
++++ b/scripts/Makefile.package
+@@ -147,8 +147,7 @@ snap-pkg:
+ PHONY += pacman-pkg
+ pacman-pkg:
+ 	@ln -srf $(srctree)/scripts/package/PKGBUILD $(objtree)/PKGBUILD
+-	+objtree="$(realpath $(objtree))" \
+-		BUILDDIR="$(realpath $(objtree))/pacman" \
++	+BUILDDIR="$(realpath $(objtree))/pacman" \
+ 		CARCH="$(UTS_MACHINE)" \
+ 		KBUILD_MAKEFLAGS="$(MAKEFLAGS)" \
+ 		KBUILD_REVISION="$(shell $(srctree)/scripts/build-version)" \
+diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD
+index 663ce300dd06..f83493838cf9 100644
+--- a/scripts/package/PKGBUILD
++++ b/scripts/package/PKGBUILD
+@@ -3,10 +3,13 @@
+ # Contributor: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
+ 
+ pkgbase=${PACMAN_PKGBASE:-linux-upstream}
+-pkgname=("${pkgbase}" "${pkgbase}-api-headers")
+-if grep -q CONFIG_MODULES=y include/config/auto.conf; then
+-	pkgname+=("${pkgbase}-headers")
+-fi
++pkgname=("${pkgbase}")
++
++_extrapackages=${PACMAN_EXTRAPACKAGES-headers api-headers debug}
++for pkg in $_extrapackages; do
++	pkgname+=("${pkgbase}-${pkg}")
++done
++
+ pkgver="${KERNELRELEASE//-/_}"
+ # The PKGBUILD is evaluated multiple times.
+ # Running scripts/build-version from here would introduce inconsistencies.
+@@ -33,11 +36,17 @@ makedepends=(
+ )
+ options=(!debug !strip !buildflags !makeflags)
+ 
+-build() {
++_prologue() {
+ 	# MAKEFLAGS from makepkg.conf override the ones inherited from kbuild.
+ 	# Bypass this override with a custom variable.
+ 	export MAKEFLAGS="${KBUILD_MAKEFLAGS}"
+-	cd "${objtree}"
++
++	# Kbuild works in the output directory, where this PKGBUILD is located.
++	cd "$(dirname "${BASH_SOURCE[0]}")"
++}
++
++build() {
++	_prologue
+ 
+ 	${MAKE} KERNELRELEASE="${KERNELRELEASE}" KBUILD_BUILD_VERSION="${pkgrel}"
+ }
+@@ -45,10 +54,10 @@ build() {
+ _package() {
+ 	pkgdesc="The ${pkgdesc} kernel and modules"
+ 
+-	export MAKEFLAGS="${KBUILD_MAKEFLAGS}"
+-	cd "${objtree}"
+ 	local modulesdir="${pkgdir}/usr/${MODLIB}"
+ 
++	_prologue
++
+ 	echo "Installing boot image..."
+ 	# systemd expects to find the kernel here to allow hibernation
+ 	# https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
+@@ -73,14 +82,17 @@ _package() {
+ _package-headers() {
+ 	pkgdesc="Headers and scripts for building modules for the ${pkgdesc} kernel"
+ 
+-	export MAKEFLAGS="${KBUILD_MAKEFLAGS}"
+-	cd "${objtree}"
+ 	local builddir="${pkgdir}/usr/${MODLIB}/build"
+ 
+-	echo "Installing build files..."
+-	"${srctree}/scripts/package/install-extmod-build" "${builddir}"
++	_prologue
++
++	if grep -q CONFIG_MODULES=y include/config/auto.conf; then
++		echo "Installing build files..."
++		"${srctree}/scripts/package/install-extmod-build" "${builddir}"
++	fi
+ 
+ 	echo "Installing System.map and config..."
++	mkdir -p "${builddir}"
+ 	cp System.map "${builddir}/System.map"
+ 	cp .config "${builddir}/.config"
+ 
+@@ -94,12 +106,24 @@ _package-api-headers() {
+ 	provides=(linux-api-headers)
+ 	conflicts=(linux-api-headers)
+ 
+-	export MAKEFLAGS="${KBUILD_MAKEFLAGS}"
+-	cd "${objtree}"
++	_prologue
+ 
+ 	${MAKE} headers_install INSTALL_HDR_PATH="${pkgdir}/usr"
+ }
+ 
++_package-debug(){
++	pkgdesc="Non-stripped vmlinux file for the ${pkgdesc} kernel"
++
++	local debugdir="${pkgdir}/usr/src/debug/${pkgbase}"
++	local builddir="${pkgdir}/usr/${MODLIB}/build"
++
++	_prologue
++
++	install -Dt "${debugdir}" -m644 vmlinux
++	mkdir -p "${builddir}"
++	ln -sr "${debugdir}/vmlinux" "${builddir}/vmlinux"
++}
++
+ for _p in "${pkgname[@]}"; do
+ 	eval "package_$_p() {
+ 		$(declare -f "_package${_p#$pkgbase}")
+-- 
+2.47.0.rc0
+
+From 6c98e17d041435fde0a3c49fce29a562935c8cb6 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:52:25 +0800
+Subject: [PATCH 06/13] fixes
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ arch/Kconfig                                  |  4 +-
+ drivers/bluetooth/btusb.c                     |  4 ++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  5 ++
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 10 +++-
+ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  3 +-
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h  |  2 +-
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h  |  2 +-
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  2 +-
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h  |  2 +-
+ .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  2 +-
+ .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  2 +-
+ .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c    | 25 +++++++---
+ .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 19 ++++----
+ .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 14 +++---
+ .../gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c    |  2 +-
+ .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c    | 15 +++---
+ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 34 +++++++++----
+ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  3 +-
+ .../drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c  | 22 +++++----
+ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 15 +++---
+ .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  3 +-
+ .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 36 ++++++++------
+ .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c    | 33 +++++++++----
+ .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  |  3 +-
+ drivers/gpu/drm/drm_edid.c                    | 47 ++++++++++++++++--
+ drivers/net/wireless/realtek/rtw89/pci.c      | 48 ++++++++++++++++---
+ drivers/platform/x86/dell/dell-wmi-base.c     |  9 ++++
+ mm/mmap.c                                     |  4 --
+ mm/shrinker.c                                 |  4 +-
+ net/netfilter/xt_NFLOG.c                      |  2 +-
+ net/netfilter/xt_TRACE.c                      |  1 +
+ net/netfilter/xt_mark.c                       |  2 +-
+ 32 files changed, 269 insertions(+), 110 deletions(-)
+
+diff --git a/arch/Kconfig b/arch/Kconfig
+index 975dd22a2dbd..de69b8f5b5be 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -1050,7 +1050,7 @@ config ARCH_MMAP_RND_BITS
+ 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
+ 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+ 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+-	default ARCH_MMAP_RND_BITS_MIN
++	default ARCH_MMAP_RND_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
+@@ -1084,7 +1084,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
+ 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+ 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+-	default ARCH_MMAP_RND_COMPAT_BITS_MIN
++	default ARCH_MMAP_RND_COMPAT_BITS_MAX
+ 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ 	help
+ 	  This value can be used to select the number of bits to use to
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index 2408e50743ca..73c54e92afa9 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -692,6 +692,10 @@ static const struct usb_device_id quirks_table[] = {
+ 	{ USB_DEVICE(0x0489, 0xe113), .driver_info = BTUSB_MEDIATEK |
+ 						     BTUSB_WIDEBAND_SPEECH |
+ 						     BTUSB_VALID_LE_STATES },
++	{ USB_DEVICE(0x0489, 0xe118), .driver_info = BTUSB_MEDIATEK |
++						     BTUSB_WIDEBAND_SPEECH },
++	{ USB_DEVICE(0x0489, 0xe11e), .driver_info = BTUSB_MEDIATEK |
++						     BTUSB_WIDEBAND_SPEECH },
+ 	{ USB_DEVICE(0x13d3, 0x3602), .driver_info = BTUSB_MEDIATEK |
+ 						     BTUSB_WIDEBAND_SPEECH |
+ 						     BTUSB_VALID_LE_STATES },
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 9c3b7b027485..ad5c05ee92f3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -3056,6 +3056,11 @@ static int __init amdgpu_init(void)
+ 	/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+ 	amdgpu_amdkfd_init();
+ 
++	if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
++		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
++		pr_crit("Overdrive is enabled, please disable it before reporting any bugs.\n");
++	}
++
+ 	/* let modprobe override vga console setting */
+ 	return pci_register_driver(&amdgpu_kms_pci_driver);
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+index 21442469791c..18eaab929540 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -140,7 +140,8 @@ int smu_set_soft_freq_range(struct smu_context *smu,
+ 		ret = smu->ppt_funcs->set_soft_freq_limited_range(smu,
+ 								  clk_type,
+ 								  min,
+-								  max);
++								  max,
++								  false);
+ 
+ 	return ret;
+ }
+@@ -1257,7 +1258,6 @@ static int smu_sw_init(void *handle)
+ 	atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
+ 	atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
+ 
+-	smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
+ 	smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
+ 	smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
+ 	smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
+@@ -1266,6 +1266,12 @@ static int smu_sw_init(void *handle)
+ 	smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
+ 	smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
+ 
++	if (smu->is_apu ||
++	    (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)))
++		smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
++	else
++		smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
++
+ 	smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+ 	smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ 	smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+index b44a185d07e8..5eb4e5c75981 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+@@ -1260,7 +1260,8 @@ struct pptable_funcs {
+ 	 * @set_soft_freq_limited_range: Set the soft frequency range of a clock
+ 	 *                               domain in MHz.
+ 	 */
+-	int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max);
++	int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max,
++					   bool automatic);
+ 
+ 	/**
+ 	 * @set_power_source: Notify the SMU of the current power source.
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
+index c2ab336bb530..ed8304d82831 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
+@@ -255,7 +255,7 @@ int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ 						 uint32_t *min, uint32_t *max);
+ 
+ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+-			    uint32_t min, uint32_t max);
++					  uint32_t min, uint32_t max, bool automatic);
+ 
+ int smu_v11_0_set_hard_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h
+index 1ad2dff71090..0886d8cffbd0 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h
+@@ -56,7 +56,7 @@ int smu_v12_0_set_default_dpm_tables(struct smu_context *smu);
+ int smu_v12_0_mode2_reset(struct smu_context *smu);
+ 
+ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+-			    uint32_t min, uint32_t max);
++					  uint32_t min, uint32_t max, bool automatic);
+ 
+ int smu_v12_0_set_driver_table_location(struct smu_context *smu);
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+index e58220a7ee2f..044d6893b43e 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+@@ -219,7 +219,7 @@ int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ 				    uint32_t *min, uint32_t *max);
+ 
+ int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+-					  uint32_t min, uint32_t max);
++					  uint32_t min, uint32_t max, bool automatic);
+ 
+ int smu_v13_0_set_hard_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+index 46b456590a08..6cada19a8482 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+@@ -186,7 +186,7 @@ int smu_v14_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ 				    uint32_t *min, uint32_t *max);
+ 
+ int smu_v14_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+-					  uint32_t min, uint32_t max);
++					  uint32_t min, uint32_t max, bool automatic);
+ 
+ int smu_v14_0_set_hard_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+index 076620fa3ef5..306a07b366a8 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+@@ -1689,7 +1689,7 @@ static int navi10_force_clk_levels(struct smu_context *smu,
+ 		if (ret)
+ 			return 0;
+ 
+-		ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 		if (ret)
+ 			return 0;
+ 		break;
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+index 0d3e1a121b67..cbd5fcbb5547 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+@@ -1469,7 +1469,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
+ 		if (ret)
+ 			goto forec_level_out;
+ 
+-		ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 		if (ret)
+ 			goto forec_level_out;
+ 		break;
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+index 16fcd9dcd202..16e7959879d4 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+@@ -1763,7 +1763,8 @@ int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+ 					  uint32_t min,
+-					  uint32_t max)
++					  uint32_t max,
++					  bool automatic)
+ {
+ 	int ret = 0, clk_id = 0;
+ 	uint32_t param;
+@@ -1778,7 +1779,10 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 		return clk_id;
+ 
+ 	if (max > 0) {
+-		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0xffff);
++		else
++			param = (uint32_t)((clk_id << 16) | (max & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq,
+ 						  param, NULL);
+ 		if (ret)
+@@ -1786,7 +1790,10 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 	}
+ 
+ 	if (min > 0) {
+-		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0);
++		else
++			param = (uint32_t)((clk_id << 16) | (min & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq,
+ 						  param, NULL);
+ 		if (ret)
+@@ -1854,6 +1861,7 @@ int smu_v11_0_set_performance_level(struct smu_context *smu,
+ 	uint32_t mclk_min = 0, mclk_max = 0;
+ 	uint32_t socclk_min = 0, socclk_max = 0;
+ 	int ret = 0;
++	bool auto_level = false;
+ 
+ 	switch (level) {
+ 	case AMD_DPM_FORCED_LEVEL_HIGH:
+@@ -1873,6 +1881,7 @@ int smu_v11_0_set_performance_level(struct smu_context *smu,
+ 		mclk_max = mem_table->max;
+ 		socclk_min = soc_table->min;
+ 		socclk_max = soc_table->max;
++		auto_level = true;
+ 		break;
+ 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ 		sclk_min = sclk_max = pstate_table->gfxclk_pstate.standard;
+@@ -1905,13 +1914,15 @@ int smu_v11_0_set_performance_level(struct smu_context *smu,
+ 	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 2)) {
+ 		mclk_min = mclk_max = 0;
+ 		socclk_min = socclk_max = 0;
++		auto_level = false;
+ 	}
+ 
+ 	if (sclk_min && sclk_max) {
+ 		ret = smu_v11_0_set_soft_freq_limited_range(smu,
+ 							    SMU_GFXCLK,
+ 							    sclk_min,
+-							    sclk_max);
++							    sclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1920,7 +1931,8 @@ int smu_v11_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v11_0_set_soft_freq_limited_range(smu,
+ 							    SMU_MCLK,
+ 							    mclk_min,
+-							    mclk_max);
++							    mclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1929,7 +1941,8 @@ int smu_v11_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v11_0_set_soft_freq_limited_range(smu,
+ 							    SMU_SOCCLK,
+ 							    socclk_min,
+-							    socclk_max);
++							    socclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 	}
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+index 22737b11b1bf..a333ab827f48 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+@@ -1091,9 +1091,10 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
+ }
+ 
+ static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
+-					  enum smu_clk_type clk_type,
+-					  uint32_t min,
+-					  uint32_t max)
++					       enum smu_clk_type clk_type,
++					       uint32_t min,
++					       uint32_t max,
++					       bool automatic)
+ {
+ 	int ret = 0;
+ 
+@@ -1299,7 +1300,7 @@ static int vangogh_force_dpm_limit_value(struct smu_context *smu, bool highest)
+ 			return ret;
+ 
+ 		force_freq = highest ? max_freq : min_freq;
+-		ret = vangogh_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq);
++		ret = vangogh_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq, false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1335,7 +1336,7 @@ static int vangogh_unforce_dpm_levels(struct smu_context *smu)
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = vangogh_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = vangogh_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 
+ 		if (ret)
+ 			return ret;
+@@ -1354,7 +1355,7 @@ static int vangogh_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = vangogh_set_soft_freq_limited_range(smu, SMU_FCLK, fclk_freq, fclk_freq);
++	ret = vangogh_set_soft_freq_limited_range(smu, SMU_FCLK, fclk_freq, fclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1362,7 +1363,7 @@ static int vangogh_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = vangogh_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk_freq, socclk_freq);
++	ret = vangogh_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk_freq, socclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1370,7 +1371,7 @@ static int vangogh_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = vangogh_set_soft_freq_limited_range(smu, SMU_VCLK, vclk_freq, vclk_freq);
++	ret = vangogh_set_soft_freq_limited_range(smu, SMU_VCLK, vclk_freq, vclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1378,7 +1379,7 @@ static int vangogh_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = vangogh_set_soft_freq_limited_range(smu, SMU_DCLK, dclk_freq, dclk_freq);
++	ret = vangogh_set_soft_freq_limited_range(smu, SMU_DCLK, dclk_freq, dclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+index cc0504b063fa..0b210b1f2628 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+@@ -707,7 +707,7 @@ static int renoir_force_dpm_limit_value(struct smu_context *smu, bool highest)
+ 			return ret;
+ 
+ 		force_freq = highest ? max_freq : min_freq;
+-		ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq);
++		ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq, false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -740,7 +740,7 @@ static int renoir_unforce_dpm_levels(struct smu_context *smu) {
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -911,7 +911,7 @@ static int renoir_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_freq, sclk_freq);
++	ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -919,7 +919,7 @@ static int renoir_set_peak_clock_by_device(struct smu_context *smu)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_UCLK, uclk_freq, uclk_freq);
++	ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -961,13 +961,13 @@ static int renior_set_dpm_profile_freq(struct smu_context *smu,
+ 	}
+ 
+ 	if (sclk)
+-		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk, sclk);
++		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk, sclk, false);
+ 
+ 	if (socclk)
+-		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk, socclk);
++		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk, socclk, false);
+ 
+ 	if (fclk)
+-		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_FCLK, fclk, fclk);
++		ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_FCLK, fclk, fclk, false);
+ 
+ 	return ret;
+ }
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
+index ed15f5a0fd11..3d3cd546f0ad 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
+@@ -211,7 +211,7 @@ int smu_v12_0_mode2_reset(struct smu_context *smu)
+ }
+ 
+ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
+-			    uint32_t min, uint32_t max)
++					  uint32_t min, uint32_t max, bool automatic)
+ {
+ 	int ret = 0;
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+index 2c35eb31475a..f6b029354327 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+@@ -1297,9 +1297,10 @@ static int aldebaran_set_performance_level(struct smu_context *smu,
+ }
+ 
+ static int aldebaran_set_soft_freq_limited_range(struct smu_context *smu,
+-					  enum smu_clk_type clk_type,
+-					  uint32_t min,
+-					  uint32_t max)
++						 enum smu_clk_type clk_type,
++						 uint32_t min,
++						 uint32_t max,
++						 bool automatic)
+ {
+ 	struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
+ 	struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+@@ -1328,7 +1329,7 @@ static int aldebaran_set_soft_freq_limited_range(struct smu_context *smu,
+ 			return 0;
+ 
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_GFXCLK,
+-							    min, max);
++							    min, max, false);
+ 		if (!ret) {
+ 			pstate_table->gfxclk_pstate.curr.min = min;
+ 			pstate_table->gfxclk_pstate.curr.max = max;
+@@ -1348,7 +1349,7 @@ static int aldebaran_set_soft_freq_limited_range(struct smu_context *smu,
+ 		/* Restore default min/max clocks and enable determinism */
+ 		min_clk = dpm_context->dpm_tables.gfx_table.min;
+ 		max_clk = dpm_context->dpm_tables.gfx_table.max;
+-		ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk);
++		ret = smu_v13_0_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk, false);
+ 		if (!ret) {
+ 			usleep_range(500, 1000);
+ 			ret = smu_cmn_send_smc_msg_with_param(smu,
+@@ -1422,7 +1423,7 @@ static int aldebaran_usr_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_
+ 			min_clk = dpm_context->dpm_tables.gfx_table.min;
+ 			max_clk = dpm_context->dpm_tables.gfx_table.max;
+ 
+-			return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk);
++			return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk, false);
+ 		}
+ 		break;
+ 	case PP_OD_COMMIT_DPM_TABLE:
+@@ -1441,7 +1442,7 @@ static int aldebaran_usr_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_
+ 			min_clk = pstate_table->gfxclk_pstate.custom.min;
+ 			max_clk = pstate_table->gfxclk_pstate.custom.max;
+ 
+-			return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk);
++			return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk, false);
+ 		}
+ 		break;
+ 	default:
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+index e17466cc1952..6cfd66363915 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+@@ -1608,7 +1608,8 @@ int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+ 					  uint32_t min,
+-					  uint32_t max)
++					  uint32_t max,
++					  bool automatic)
+ {
+ 	int ret = 0, clk_id = 0;
+ 	uint32_t param;
+@@ -1623,7 +1624,10 @@ int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 		return clk_id;
+ 
+ 	if (max > 0) {
+-		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0xffff);
++		else
++			param = (uint32_t)((clk_id << 16) | (max & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq,
+ 						      param, NULL);
+ 		if (ret)
+@@ -1631,7 +1635,10 @@ int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 	}
+ 
+ 	if (min > 0) {
+-		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0);
++		else
++			param = (uint32_t)((clk_id << 16) | (min & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq,
+ 						      param, NULL);
+ 		if (ret)
+@@ -1708,6 +1715,7 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 	uint32_t dclk_min = 0, dclk_max = 0;
+ 	uint32_t fclk_min = 0, fclk_max = 0;
+ 	int ret = 0, i;
++	bool auto_level = false;
+ 
+ 	switch (level) {
+ 	case AMD_DPM_FORCED_LEVEL_HIGH:
+@@ -1739,6 +1747,7 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 		dclk_max = dclk_table->max;
+ 		fclk_min = fclk_table->min;
+ 		fclk_max = fclk_table->max;
++		auto_level = true;
+ 		break;
+ 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ 		sclk_min = sclk_max = pstate_table->gfxclk_pstate.standard;
+@@ -1780,13 +1789,15 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 		vclk_min = vclk_max = 0;
+ 		dclk_min = dclk_max = 0;
+ 		fclk_min = fclk_max = 0;
++		auto_level = false;
+ 	}
+ 
+ 	if (sclk_min && sclk_max) {
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    SMU_GFXCLK,
+ 							    sclk_min,
+-							    sclk_max);
++							    sclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1798,7 +1809,8 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    SMU_MCLK,
+ 							    mclk_min,
+-							    mclk_max);
++							    mclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1810,7 +1822,8 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    SMU_SOCCLK,
+ 							    socclk_min,
+-							    socclk_max);
++							    socclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1825,7 +1838,8 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 			ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 								    i ? SMU_VCLK1 : SMU_VCLK,
+ 								    vclk_min,
+-								    vclk_max);
++								    vclk_max,
++								    auto_level);
+ 			if (ret)
+ 				return ret;
+ 		}
+@@ -1840,7 +1854,8 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 			ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 								    i ? SMU_DCLK1 : SMU_DCLK,
+ 								    dclk_min,
+-								    dclk_max);
++								    dclk_max,
++								    auto_level);
+ 			if (ret)
+ 				return ret;
+ 		}
+@@ -1852,7 +1867,8 @@ int smu_v13_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    SMU_FCLK,
+ 							    fclk_min,
+-							    fclk_max);
++							    fclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+index cb923e33fd6f..f69fe75352de 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -1975,7 +1975,8 @@ static int smu_v13_0_0_force_clk_levels(struct smu_context *smu,
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    clk_type,
+ 							    min_freq,
+-							    max_freq);
++							    max_freq,
++							    false);
+ 		break;
+ 	case SMU_DCEFCLK:
+ 	case SMU_PCIE:
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
+index 9c2c43bfed0b..a71b7c0803f1 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
+@@ -811,9 +811,10 @@ static int smu_v13_0_5_get_dpm_ultimate_freq(struct smu_context *smu,
+ }
+ 
+ static int smu_v13_0_5_set_soft_freq_limited_range(struct smu_context *smu,
+-							enum smu_clk_type clk_type,
+-							uint32_t min,
+-							uint32_t max)
++						   enum smu_clk_type clk_type,
++						   uint32_t min,
++						   uint32_t max,
++						   bool automatic)
+ {
+ 	enum smu_message_type msg_set_min, msg_set_max;
+ 	uint32_t min_clk = min;
+@@ -950,7 +951,7 @@ static int smu_v13_0_5_force_clk_levels(struct smu_context *smu,
+ 		if (ret)
+ 			goto force_level_out;
+ 
+-		ret = smu_v13_0_5_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = smu_v13_0_5_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 		if (ret)
+ 			goto force_level_out;
+ 		break;
+@@ -1046,9 +1047,10 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu,
+ 
+ 	if (sclk_min && sclk_max) {
+ 		ret = smu_v13_0_5_set_soft_freq_limited_range(smu,
+-							    SMU_SCLK,
+-							    sclk_min,
+-							    sclk_max);
++							      SMU_SCLK,
++							      sclk_min,
++							      sclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1060,7 +1062,8 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v13_0_5_set_soft_freq_limited_range(smu,
+ 							      SMU_VCLK,
+ 							      vclk_min,
+-							      vclk_max);
++							      vclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1069,7 +1072,8 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v13_0_5_set_soft_freq_limited_range(smu,
+ 							      SMU_DCLK,
+ 							      dclk_min,
+-							      dclk_max);
++							      dclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+index 9974c9f8135e..8d2ccd8a8b0c 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+@@ -1739,7 +1739,7 @@ static int smu_v13_0_6_set_performance_level(struct smu_context *smu,
+ 		if (uclk_table->max != pstate_table->uclk_pstate.curr.max) {
+ 			/* Min UCLK is not expected to be changed */
+ 			ret = smu_v13_0_set_soft_freq_limited_range(
+-				smu, SMU_UCLK, 0, uclk_table->max);
++				smu, SMU_UCLK, 0, uclk_table->max, false);
+ 			if (ret)
+ 				return ret;
+ 			pstate_table->uclk_pstate.curr.max = uclk_table->max;
+@@ -1758,7 +1758,8 @@ static int smu_v13_0_6_set_performance_level(struct smu_context *smu,
+ 
+ static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
+ 						   enum smu_clk_type clk_type,
+-						   uint32_t min, uint32_t max)
++						   uint32_t min, uint32_t max,
++						   bool automatic)
+ {
+ 	struct smu_dpm_context *smu_dpm = &(smu->smu_dpm);
+ 	struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+@@ -1806,7 +1807,7 @@ static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu,
+ 				return -EOPNOTSUPP;
+ 			/* Only max clock limiting is allowed for UCLK */
+ 			ret = smu_v13_0_set_soft_freq_limited_range(
+-				smu, SMU_UCLK, 0, max);
++				smu, SMU_UCLK, 0, max, false);
+ 			if (!ret)
+ 				pstate_table->uclk_pstate.curr.max = max;
+ 		}
+@@ -1946,7 +1947,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
+ 			max_clk = dpm_context->dpm_tables.gfx_table.max;
+ 
+ 			ret = smu_v13_0_6_set_soft_freq_limited_range(
+-				smu, SMU_GFXCLK, min_clk, max_clk);
++				smu, SMU_GFXCLK, min_clk, max_clk, false);
+ 
+ 			if (ret)
+ 				return ret;
+@@ -1954,7 +1955,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
+ 			min_clk = dpm_context->dpm_tables.uclk_table.min;
+ 			max_clk = dpm_context->dpm_tables.uclk_table.max;
+ 			ret = smu_v13_0_6_set_soft_freq_limited_range(
+-				smu, SMU_UCLK, min_clk, max_clk);
++				smu, SMU_UCLK, min_clk, max_clk, false);
+ 			if (ret)
+ 				return ret;
+ 			pstate_table->uclk_pstate.custom.max = 0;
+@@ -1978,7 +1979,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
+ 			max_clk = pstate_table->gfxclk_pstate.custom.max;
+ 
+ 			ret = smu_v13_0_6_set_soft_freq_limited_range(
+-				smu, SMU_GFXCLK, min_clk, max_clk);
++				smu, SMU_GFXCLK, min_clk, max_clk, false);
+ 
+ 			if (ret)
+ 				return ret;
+@@ -1989,7 +1990,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
+ 			min_clk = pstate_table->uclk_pstate.curr.min;
+ 			max_clk = pstate_table->uclk_pstate.custom.max;
+ 			return smu_v13_0_6_set_soft_freq_limited_range(
+-				smu, SMU_UCLK, min_clk, max_clk);
++				smu, SMU_UCLK, min_clk, max_clk, false);
+ 		}
+ 		break;
+ 	default:
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+index b891a5e0a396..2077506ef336 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -1964,7 +1964,8 @@ static int smu_v13_0_7_force_clk_levels(struct smu_context *smu,
+ 		ret = smu_v13_0_set_soft_freq_limited_range(smu,
+ 							    clk_type,
+ 							    min_freq,
+-							    max_freq);
++							    max_freq,
++							    false);
+ 		break;
+ 	case SMU_DCEFCLK:
+ 	case SMU_PCIE:
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+index 260c339f89c5..71d58c8c8cc0 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+@@ -945,9 +945,10 @@ static int yellow_carp_get_dpm_ultimate_freq(struct smu_context *smu,
+ }
+ 
+ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu,
+-							enum smu_clk_type clk_type,
+-							uint32_t min,
+-							uint32_t max)
++						   enum smu_clk_type clk_type,
++						   uint32_t min,
++						   uint32_t max,
++						   bool automatic)
+ {
+ 	enum smu_message_type msg_set_min, msg_set_max;
+ 	uint32_t min_clk = min;
+@@ -1134,7 +1135,7 @@ static int yellow_carp_force_clk_levels(struct smu_context *smu,
+ 		if (ret)
+ 			goto force_level_out;
+ 
+-		ret = yellow_carp_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq);
++		ret = yellow_carp_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false);
+ 		if (ret)
+ 			goto force_level_out;
+ 		break;
+@@ -1254,9 +1255,10 @@ static int yellow_carp_set_performance_level(struct smu_context *smu,
+ 
+ 	if (sclk_min && sclk_max) {
+ 		ret = yellow_carp_set_soft_freq_limited_range(smu,
+-							    SMU_SCLK,
+-							    sclk_min,
+-							    sclk_max);
++							      SMU_SCLK,
++							      sclk_min,
++							      sclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1266,18 +1268,20 @@ static int yellow_carp_set_performance_level(struct smu_context *smu,
+ 
+ 	if (fclk_min && fclk_max) {
+ 		ret = yellow_carp_set_soft_freq_limited_range(smu,
+-							    SMU_FCLK,
+-							    fclk_min,
+-							    fclk_max);
++							      SMU_FCLK,
++							      fclk_min,
++							      fclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+ 
+ 	if (socclk_min && socclk_max) {
+ 		ret = yellow_carp_set_soft_freq_limited_range(smu,
+-							    SMU_SOCCLK,
+-							    socclk_min,
+-							    socclk_max);
++							      SMU_SOCCLK,
++							      socclk_min,
++							      socclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1286,7 +1290,8 @@ static int yellow_carp_set_performance_level(struct smu_context *smu,
+ 		ret = yellow_carp_set_soft_freq_limited_range(smu,
+ 							      SMU_VCLK,
+ 							      vclk_min,
+-							      vclk_max);
++							      vclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1295,7 +1300,8 @@ static int yellow_carp_set_performance_level(struct smu_context *smu,
+ 		ret = yellow_carp_set_soft_freq_limited_range(smu,
+ 							      SMU_DCLK,
+ 							      dclk_min,
+-							      dclk_max);
++							      dclk_max,
++							      false);
+ 		if (ret)
+ 			return ret;
+ 	}
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+index 09973615f210..a7a6c4eea153 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+@@ -1093,7 +1093,8 @@ int smu_v14_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
+ int smu_v14_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 					  enum smu_clk_type clk_type,
+ 					  uint32_t min,
+-					  uint32_t max)
++					  uint32_t max,
++					  bool automatic)
+ {
+ 	int ret = 0, clk_id = 0;
+ 	uint32_t param;
+@@ -1108,7 +1109,10 @@ int smu_v14_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 		return clk_id;
+ 
+ 	if (max > 0) {
+-		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0xffff);
++		else
++			param = (uint32_t)((clk_id << 16) | (max & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq,
+ 						      param, NULL);
+ 		if (ret)
+@@ -1116,7 +1120,10 @@ int smu_v14_0_set_soft_freq_limited_range(struct smu_context *smu,
+ 	}
+ 
+ 	if (min > 0) {
+-		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
++		if (automatic)
++			param = (uint32_t)((clk_id << 16) | 0);
++		else
++			param = (uint32_t)((clk_id << 16) | (min & 0xffff));
+ 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq,
+ 						      param, NULL);
+ 		if (ret)
+@@ -1193,6 +1200,7 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 	uint32_t dclk_min = 0, dclk_max = 0;
+ 	uint32_t fclk_min = 0, fclk_max = 0;
+ 	int ret = 0, i;
++	bool auto_level = false;
+ 
+ 	switch (level) {
+ 	case AMD_DPM_FORCED_LEVEL_HIGH:
+@@ -1224,6 +1232,7 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 		dclk_max = dclk_table->max;
+ 		fclk_min = fclk_table->min;
+ 		fclk_max = fclk_table->max;
++		auto_level = true;
+ 		break;
+ 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ 		sclk_min = sclk_max = pstate_table->gfxclk_pstate.standard;
+@@ -1259,7 +1268,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 							    SMU_GFXCLK,
+ 							    sclk_min,
+-							    sclk_max);
++							    sclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1271,7 +1281,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 							    SMU_MCLK,
+ 							    mclk_min,
+-							    mclk_max);
++							    mclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1283,7 +1294,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 							    SMU_SOCCLK,
+ 							    socclk_min,
+-							    socclk_max);
++							    socclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+@@ -1298,7 +1310,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 			ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 								    i ? SMU_VCLK1 : SMU_VCLK,
+ 								    vclk_min,
+-								    vclk_max);
++								    vclk_max,
++								    auto_level);
+ 			if (ret)
+ 				return ret;
+ 		}
+@@ -1313,7 +1326,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 			ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 								    i ? SMU_DCLK1 : SMU_DCLK,
+ 								    dclk_min,
+-								    dclk_max);
++								    dclk_max,
++								    auto_level);
+ 			if (ret)
+ 				return ret;
+ 		}
+@@ -1325,7 +1339,8 @@ int smu_v14_0_set_performance_level(struct smu_context *smu,
+ 		ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 							    SMU_FCLK,
+ 							    fclk_min,
+-							    fclk_max);
++							    fclk_max,
++							    auto_level);
+ 		if (ret)
+ 			return ret;
+ 
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+index ba17d01e6439..6c0f3505bb55 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+@@ -1245,7 +1245,8 @@ static int smu_v14_0_2_force_clk_levels(struct smu_context *smu,
+ 		ret = smu_v14_0_set_soft_freq_limited_range(smu,
+ 							    clk_type,
+ 							    min_freq,
+-							    max_freq);
++							    max_freq,
++							    false);
+ 		break;
+ 	case SMU_DCEFCLK:
+ 	case SMU_PCIE:
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index f68a41eeb1fa..6cd386d0fccb 100644
+--- a/drivers/gpu/drm/drm_edid.c
++++ b/drivers/gpu/drm/drm_edid.c
+@@ -94,6 +94,8 @@ static int oui(u8 first, u8 second, u8 third)
+ #define EDID_QUIRK_NON_DESKTOP			(1 << 12)
+ /* Cap the DSC target bitrate to 15bpp */
+ #define EDID_QUIRK_CAP_DSC_15BPP		(1 << 13)
++/* Fix up a particular 5120x1440@240Hz timing */
++#define EDID_QUIRK_FIXUP_5120_1440_240		(1 << 14)
+ 
+ #define MICROSOFT_IEEE_OUI	0xca125c
+ 
+@@ -182,6 +184,12 @@ static const struct edid_quirk {
+ 	EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60),
+ 	EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60),
+ 
++	/* Samsung C49G95T */
++	EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240),
++
++	/* Samsung S49AG95 */
++	EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240),
++
+ 	/* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */
+ 	EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC),
+ 
+@@ -6757,7 +6765,37 @@ static void update_display_info(struct drm_connector *connector,
+ 	drm_edid_to_eld(connector, drm_edid);
+ }
+ 
+-static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev,
++static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector *connector,
++						    struct drm_display_mode *mode)
++{
++	unsigned int hsync_width;
++	unsigned int vsync_width;
++
++	if (connector->display_info.quirks & EDID_QUIRK_FIXUP_5120_1440_240) {
++		if (mode->hdisplay == 5120 && mode->vdisplay == 1440 &&
++		    mode->clock == 1939490) {
++			hsync_width = mode->hsync_end - mode->hsync_start;
++			vsync_width = mode->vsync_end - mode->vsync_start;
++
++			mode->clock = 2018490;
++			mode->hdisplay = 5120;
++			mode->hsync_start = 5120 + 8;
++			mode->hsync_end = 5120 + 8 + hsync_width;
++			mode->htotal = 5200;
++
++			mode->vdisplay = 1440;
++			mode->vsync_start = 1440 + 165;
++			mode->vsync_end = 1440 + 165 + vsync_width;
++			mode->vtotal = 1619;
++
++			drm_dbg_kms(connector->dev,
++				    "[CONNECTOR:%d:%s] Samsung 240Hz mode quirk applied\n",
++				    connector->base.id, connector->name);
++		}
++	}
++}
++
++static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_connector *connector,
+ 							    struct displayid_detailed_timings_1 *timings,
+ 							    bool type_7)
+ {
+@@ -6776,7 +6814,7 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d
+ 	bool hsync_positive = (timings->hsync[1] >> 7) & 0x1;
+ 	bool vsync_positive = (timings->vsync[1] >> 7) & 0x1;
+ 
+-	mode = drm_mode_create(dev);
++	mode = drm_mode_create(connector->dev);
+ 	if (!mode)
+ 		return NULL;
+ 
+@@ -6799,6 +6837,9 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d
+ 
+ 	if (timings->flags & 0x80)
+ 		mode->type |= DRM_MODE_TYPE_PREFERRED;
++
++	drm_mode_displayid_detailed_edid_quirks(connector, mode);
++
+ 	drm_mode_set_name(mode);
+ 
+ 	return mode;
+@@ -6821,7 +6862,7 @@ static int add_displayid_detailed_1_modes(struct drm_connector *connector,
+ 	for (i = 0; i < num_timings; i++) {
+ 		struct displayid_detailed_timings_1 *timings = &det->timings[i];
+ 
+-		newmode = drm_mode_displayid_detailed(connector->dev, timings, type_7);
++		newmode = drm_mode_displayid_detailed(connector, timings, type_7);
+ 		if (!newmode)
+ 			continue;
+ 
+diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c
+index 02afeb3acce4..5aef7fa37878 100644
+--- a/drivers/net/wireless/realtek/rtw89/pci.c
++++ b/drivers/net/wireless/realtek/rtw89/pci.c
+@@ -3026,24 +3026,54 @@ static void rtw89_pci_declaim_device(struct rtw89_dev *rtwdev,
+ 	pci_disable_device(pdev);
+ }
+ 
+-static void rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev)
++static bool rtw89_pci_chip_is_manual_dac(struct rtw89_dev *rtwdev)
+ {
+-	struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
+ 	const struct rtw89_chip_info *chip = rtwdev->chip;
+ 
+-	if (!rtwpci->enable_dac)
+-		return;
+-
+ 	switch (chip->chip_id) {
+ 	case RTL8852A:
+ 	case RTL8852B:
+ 	case RTL8851B:
+ 	case RTL8852BT:
+-		break;
++		return true;
+ 	default:
+-		return;
++		return false;
++	}
++}
++
++static bool rtw89_pci_is_dac_compatible_bridge(struct rtw89_dev *rtwdev)
++{
++	struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
++	struct pci_dev *bridge = pci_upstream_bridge(rtwpci->pdev);
++
++	if (!rtw89_pci_chip_is_manual_dac(rtwdev))
++		return true;
++
++	if (!bridge)
++		return false;
++
++	switch (bridge->vendor) {
++	case PCI_VENDOR_ID_INTEL:
++		return true;
++	case PCI_VENDOR_ID_ASMEDIA:
++		if (bridge->device == 0x2806)
++			return true;
++		break;
+ 	}
+ 
++	return false;
++}
++
++static void rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev)
++{
++	struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv;
++
++	if (!rtwpci->enable_dac)
++		return;
++
++	if (!rtw89_pci_chip_is_manual_dac(rtwdev))
++		return;
++
+ 	rtw89_pci_config_byte_set(rtwdev, RTW89_PCIE_L1_CTRL, RTW89_PCIE_BIT_EN_64BITS);
+ }
+ 
+@@ -3061,6 +3091,9 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev,
+ 		goto err;
+ 	}
+ 
++	if (!rtw89_pci_is_dac_compatible_bridge(rtwdev))
++		goto no_dac;
++
+ 	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
+ 	if (!ret) {
+ 		rtwpci->enable_dac = true;
+@@ -3073,6 +3106,7 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev,
+ 			goto err_release_regions;
+ 		}
+ 	}
++no_dac:
+ 
+ 	resource_len = pci_resource_len(pdev, bar_id);
+ 	rtwpci->mmap = pci_iomap(pdev, bar_id, resource_len);
+diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
+index 502783a7adb1..24fd7ffadda9 100644
+--- a/drivers/platform/x86/dell/dell-wmi-base.c
++++ b/drivers/platform/x86/dell/dell-wmi-base.c
+@@ -264,6 +264,15 @@ static const struct key_entry dell_wmi_keymap_type_0010[] = {
+ 	/*Speaker Mute*/
+ 	{ KE_KEY, 0x109, { KEY_MUTE} },
+ 
++	/* S2Idle screen off */
++	{ KE_IGNORE, 0x120, { KEY_RESERVED }},
++
++	/* Leaving S4 or S2Idle suspend */
++	{ KE_IGNORE, 0x130, { KEY_RESERVED }},
++
++	/* Entering S2Idle suspend */
++	{ KE_IGNORE, 0x140, { KEY_RESERVED }},
++
+ 	/* Mic mute */
+ 	{ KE_KEY, 0x150, { KEY_MICMUTE } },
+ 
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 18fddcce03b8..d84d6dd8771c 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1952,10 +1952,6 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ 
+ 	if (get_area) {
+ 		addr = get_area(file, addr, len, pgoff, flags);
+-	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+-		/* Ensures that larger anonymous mappings are THP aligned. */
+-		addr = thp_get_unmapped_area_vmflags(file, addr, len,
+-						     pgoff, flags, vm_flags);
+ 	} else {
+ 		addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len,
+ 						    pgoff, flags, vm_flags);
+diff --git a/mm/shrinker.c b/mm/shrinker.c
+index dc5d2a6fcfc4..e4b795ee6d2e 100644
+--- a/mm/shrinker.c
++++ b/mm/shrinker.c
+@@ -87,8 +87,10 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
+ 		if (!info)
+ 			goto err;
+ 		info->map_nr_max = shrinker_nr_max;
+-		if (shrinker_unit_alloc(info, NULL, nid))
++		if (shrinker_unit_alloc(info, NULL, nid)) {
++			kvfree(info);
+ 			goto err;
++		}
+ 		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
+ 	}
+ 	mutex_unlock(&shrinker_mutex);
+diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
+index d80abd6ccaf8..6dcf4bc7e30b 100644
+--- a/net/netfilter/xt_NFLOG.c
++++ b/net/netfilter/xt_NFLOG.c
+@@ -79,7 +79,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = {
+ 	{
+ 		.name       = "NFLOG",
+ 		.revision   = 0,
+-		.family     = NFPROTO_IPV4,
++		.family     = NFPROTO_IPV6,
+ 		.checkentry = nflog_tg_check,
+ 		.destroy    = nflog_tg_destroy,
+ 		.target     = nflog_tg,
+diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
+index f3fa4f11348c..a642ff09fc8e 100644
+--- a/net/netfilter/xt_TRACE.c
++++ b/net/netfilter/xt_TRACE.c
+@@ -49,6 +49,7 @@ static struct xt_target trace_tg_reg[] __read_mostly = {
+ 		.target		= trace_tg,
+ 		.checkentry	= trace_tg_check,
+ 		.destroy	= trace_tg_destroy,
++		.me		= THIS_MODULE,
+ 	},
+ #endif
+ };
+diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
+index f76fe04fc9a4..65b965ca40ea 100644
+--- a/net/netfilter/xt_mark.c
++++ b/net/netfilter/xt_mark.c
+@@ -62,7 +62,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
+ 	{
+ 		.name           = "MARK",
+ 		.revision       = 2,
+-		.family         = NFPROTO_IPV4,
++		.family         = NFPROTO_IPV6,
+ 		.target         = mark_tg,
+ 		.targetsize     = sizeof(struct xt_mark_tginfo2),
+ 		.me             = THIS_MODULE,
+-- 
+2.47.0.rc0
+
+From 13dcfcc62c4c4467d7f8c9d1436097cdd70c0cec Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:52:43 +0800
+Subject: [PATCH 07/13] intel-pstate
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ arch/x86/include/asm/topology.h  |  13 ++
+ arch/x86/kernel/cpu/aperfmperf.c |  89 +++++++++++-
+ drivers/cpufreq/intel_pstate.c   | 232 ++++++++++++++++++++++++++++++-
+ 3 files changed, 328 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
+index 94d9832a5bc8..9f9376db64e3 100644
+--- a/arch/x86/include/asm/topology.h
++++ b/arch/x86/include/asm/topology.h
+@@ -291,9 +291,22 @@ static inline long arch_scale_freq_capacity(int cpu)
+ }
+ #define arch_scale_freq_capacity arch_scale_freq_capacity
+ 
++bool arch_enable_hybrid_capacity_scale(void);
++void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
++			   unsigned long cap_freq, unsigned long base_freq);
++
++unsigned long arch_scale_cpu_capacity(int cpu);
++#define arch_scale_cpu_capacity arch_scale_cpu_capacity
++
+ extern void arch_set_max_freq_ratio(bool turbo_disabled);
+ extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
+ #else
++static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
++static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
++					 unsigned long max_cap,
++					 unsigned long cap_freq,
++					 unsigned long base_freq) { }
++
+ static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
+ #endif
+diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
+index 0b69bfbf345d..ec07678c641b 100644
+--- a/arch/x86/kernel/cpu/aperfmperf.c
++++ b/arch/x86/kernel/cpu/aperfmperf.c
+@@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work,
+ DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+ EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
+ 
++static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);
++
++struct arch_hybrid_cpu_scale {
++	unsigned long capacity;
++	unsigned long freq_ratio;
++};
++
++static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;
++
++/**
++ * arch_enable_hybrid_capacity_scale - Enable hybrid CPU capacity scaling
++ *
++ * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
++ * initialize it and set the static key controlling its code paths.
++ *
++ * Must be called before arch_set_cpu_capacity().
++ */
++bool arch_enable_hybrid_capacity_scale(void)
++{
++	int cpu;
++
++	if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
++		WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
++		return true;
++	}
++
++	arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
++	if (!arch_cpu_scale)
++		return false;
++
++	for_each_possible_cpu(cpu) {
++		per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
++		per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
++	}
++
++	static_branch_enable(&arch_hybrid_cap_scale_key);
++
++	pr_info("Hybrid CPU capacity scaling enabled\n");
++
++	return true;
++}
++
++/**
++ * arch_set_cpu_capacity - Set scale-invariance parameters for a CPU
++ * @cpu: Target CPU.
++ * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
++ * @max_cap: System-wide maximum CPU capacity.
++ * @cap_freq: Frequency of @cpu corresponding to @cap.
++ * @base_freq: Frequency of @cpu at which MPERF counts.
++ *
++ * The units in which @cap and @max_cap are expressed do not matter, so long
++ * as they are consistent, because the former is effectively divided by the
++ * latter.  Analogously for @cap_freq and @base_freq.
++ *
++ * After calling this function for all CPUs, call arch_rebuild_sched_domains()
++ * to let the scheduler know that capacity-aware scheduling can be used going
++ * forward.
++ */
++void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
++			   unsigned long cap_freq, unsigned long base_freq)
++{
++	if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
++		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
++			   div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
++		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
++			   div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
++	} else {
++		WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
++	}
++}
++
++unsigned long arch_scale_cpu_capacity(int cpu)
++{
++	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
++		return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
++
++	return SCHED_CAPACITY_SCALE;
++}
++EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);
++
+ static void scale_freq_tick(u64 acnt, u64 mcnt)
+ {
+-	u64 freq_scale;
++	u64 freq_scale, freq_ratio;
+ 
+ 	if (!arch_scale_freq_invariant())
+ 		return;
+@@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt)
+ 	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
+ 		goto error;
+ 
+-	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
++	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
++		freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
++	else
++		freq_ratio = arch_max_freq_ratio;
++
++	if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
+ 		goto error;
+ 
+ 	freq_scale = div64_u64(acnt, mcnt);
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index 348a330678bd..c11be253bfa3 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -16,6 +16,7 @@
+ #include <linux/tick.h>
+ #include <linux/slab.h>
+ #include <linux/sched/cpufreq.h>
++#include <linux/sched/smt.h>
+ #include <linux/list.h>
+ #include <linux/cpu.h>
+ #include <linux/cpufreq.h>
+@@ -215,6 +216,7 @@ struct global_params {
+  * @hwp_req_cached:	Cached value of the last HWP Request MSR
+  * @hwp_cap_cached:	Cached value of the last HWP Capabilities MSR
+  * @last_io_update:	Last time when IO wake flag was set
++ * @capacity_perf:	Highest perf used for scale invariance
+  * @sched_flags:	Store scheduler flags for possible cross CPU update
+  * @hwp_boost_min:	Last HWP boosted min performance
+  * @suspended:		Whether or not the driver has been suspended.
+@@ -253,6 +255,7 @@ struct cpudata {
+ 	u64 hwp_req_cached;
+ 	u64 hwp_cap_cached;
+ 	u64 last_io_update;
++	unsigned int capacity_perf;
+ 	unsigned int sched_flags;
+ 	u32 hwp_boost_min;
+ 	bool suspended;
+@@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init;
+ static bool per_cpu_limits __ro_after_init;
+ static bool hwp_forced __ro_after_init;
+ static bool hwp_boost __read_mostly;
++static bool hwp_is_hybrid;
+ 
+ static struct cpufreq_driver *intel_pstate_driver __read_mostly;
+ 
+@@ -934,6 +938,135 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
+ 	NULL,
+ };
+ 
++static struct cpudata *hybrid_max_perf_cpu __read_mostly;
++/*
++ * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
++ * and the x86 arch scale-invariance information from concurrent updates.
++ */
++static DEFINE_MUTEX(hybrid_capacity_lock);
++
++static void hybrid_set_cpu_capacity(struct cpudata *cpu)
++{
++	arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
++			      hybrid_max_perf_cpu->capacity_perf,
++			      cpu->capacity_perf,
++			      cpu->pstate.max_pstate_physical);
++
++	pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
++		 cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
++		 cpu->pstate.max_pstate_physical);
++}
++
++static void hybrid_clear_cpu_capacity(unsigned int cpunum)
++{
++	arch_set_cpu_capacity(cpunum, 1, 1, 1, 1);
++}
++
++static void hybrid_get_capacity_perf(struct cpudata *cpu)
++{
++	if (READ_ONCE(global.no_turbo)) {
++		cpu->capacity_perf = cpu->pstate.max_pstate_physical;
++		return;
++	}
++
++	cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
++}
++
++static void hybrid_set_capacity_of_cpus(void)
++{
++	int cpunum;
++
++	for_each_online_cpu(cpunum) {
++		struct cpudata *cpu = all_cpu_data[cpunum];
++
++		if (cpu)
++			hybrid_set_cpu_capacity(cpu);
++	}
++}
++
++static void hybrid_update_cpu_scaling(void)
++{
++	struct cpudata *max_perf_cpu = NULL;
++	unsigned int max_cap_perf = 0;
++	int cpunum;
++
++	for_each_online_cpu(cpunum) {
++		struct cpudata *cpu = all_cpu_data[cpunum];
++
++		if (!cpu)
++			continue;
++
++		/*
++		 * During initialization, CPU performance at full capacity needs
++		 * to be determined.
++		 */
++		if (!hybrid_max_perf_cpu)
++			hybrid_get_capacity_perf(cpu);
++
++		/*
++		 * If hybrid_max_perf_cpu is not NULL at this point, it is
++		 * being replaced, so don't take it into account when looking
++		 * for the new one.
++		 */
++		if (cpu == hybrid_max_perf_cpu)
++			continue;
++
++		if (cpu->capacity_perf > max_cap_perf) {
++			max_cap_perf = cpu->capacity_perf;
++			max_perf_cpu = cpu;
++		}
++	}
++
++	if (max_perf_cpu) {
++		hybrid_max_perf_cpu = max_perf_cpu;
++		hybrid_set_capacity_of_cpus();
++	} else {
++		pr_info("Found no CPUs with nonzero maximum performance\n");
++		/* Revert to the flat CPU capacity structure. */
++		for_each_online_cpu(cpunum)
++			hybrid_clear_cpu_capacity(cpunum);
++	}
++}
++
++static void __hybrid_init_cpu_scaling(void)
++{
++	hybrid_max_perf_cpu = NULL;
++	hybrid_update_cpu_scaling();
++}
++
++static void hybrid_init_cpu_scaling(void)
++{
++	bool disable_itmt = false;
++
++	mutex_lock(&hybrid_capacity_lock);
++
++	/*
++	 * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
++	 * scaling has been enabled already and the driver is just changing the
++	 * operation mode.
++	 */
++	if (hybrid_max_perf_cpu) {
++		__hybrid_init_cpu_scaling();
++		goto unlock;
++	}
++
++	/*
++	 * On hybrid systems, use asym capacity instead of ITMT, but because
++	 * the capacity of SMT threads is not deterministic even approximately,
++	 * do not do that when SMT is in use.
++	 */
++	if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
++		__hybrid_init_cpu_scaling();
++		disable_itmt = true;
++	}
++
++unlock:
++	mutex_unlock(&hybrid_capacity_lock);
++
++	if (disable_itmt)
++		sched_clear_itmt_support();
++}
++
+ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
+ {
+ 	u64 cap;
+@@ -962,6 +1095,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
+ 	}
+ }
+ 
++static void hybrid_update_capacity(struct cpudata *cpu)
++{
++	unsigned int max_cap_perf;
++
++	mutex_lock(&hybrid_capacity_lock);
++
++	if (!hybrid_max_perf_cpu)
++		goto unlock;
++
++	/*
++	 * The maximum performance of the CPU may have changed, but assume
++	 * that the performance of the other CPUs has not changed.
++	 */
++	max_cap_perf = hybrid_max_perf_cpu->capacity_perf;
++
++	intel_pstate_get_hwp_cap(cpu);
++
++	hybrid_get_capacity_perf(cpu);
++	/* Should hybrid_max_perf_cpu be replaced by this CPU? */
++	if (cpu->capacity_perf > max_cap_perf) {
++		hybrid_max_perf_cpu = cpu;
++		hybrid_set_capacity_of_cpus();
++		goto unlock;
++	}
++
++	/* If this CPU is hybrid_max_perf_cpu, should it be replaced? */
++	if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) {
++		hybrid_update_cpu_scaling();
++		goto unlock;
++	}
++
++	hybrid_set_cpu_capacity(cpu);
++
++unlock:
++	mutex_unlock(&hybrid_capacity_lock);
++}
++
+ static void intel_pstate_hwp_set(unsigned int cpu)
+ {
+ 	struct cpudata *cpu_data = all_cpu_data[cpu];
+@@ -1070,6 +1240,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
+ 		value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
+ 
+ 	wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
++
++	mutex_lock(&hybrid_capacity_lock);
++
++	if (!hybrid_max_perf_cpu) {
++		mutex_unlock(&hybrid_capacity_lock);
++
++		return;
++	}
++
++	if (hybrid_max_perf_cpu == cpu)
++		hybrid_update_cpu_scaling();
++
++	mutex_unlock(&hybrid_capacity_lock);
++
++	/* Reset the capacity of the CPU going offline to the initial value. */
++	hybrid_clear_cpu_capacity(cpu->cpu);
+ }
+ 
+ #define POWER_CTL_EE_ENABLE	1
+@@ -1165,21 +1351,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
+ static void intel_pstate_update_limits(unsigned int cpu)
+ {
+ 	struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
++	struct cpudata *cpudata;
+ 
+ 	if (!policy)
+ 		return;
+ 
+-	__intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
++	cpudata = all_cpu_data[cpu];
++
++	__intel_pstate_update_max_freq(cpudata, policy);
++
++	/* Prevent the driver from being unregistered now. */
++	mutex_lock(&intel_pstate_driver_lock);
+ 
+ 	cpufreq_cpu_release(policy);
++
++	hybrid_update_capacity(cpudata);
++
++	mutex_unlock(&intel_pstate_driver_lock);
+ }
+ 
+ static void intel_pstate_update_limits_for_all(void)
+ {
+ 	int cpu;
+ 
+-	for_each_possible_cpu(cpu)
+-		intel_pstate_update_limits(cpu);
++	for_each_possible_cpu(cpu) {
++		struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
++
++		if (!policy)
++			continue;
++
++		__intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
++
++		cpufreq_cpu_release(policy);
++	}
++
++	mutex_lock(&hybrid_capacity_lock);
++
++	if (hybrid_max_perf_cpu)
++		__hybrid_init_cpu_scaling();
++
++	mutex_unlock(&hybrid_capacity_lock);
+ }
+ 
+ /************************** sysfs begin ************************/
+@@ -1618,6 +1829,13 @@ static void intel_pstate_notify_work(struct work_struct *work)
+ 		__intel_pstate_update_max_freq(cpudata, policy);
+ 
+ 		cpufreq_cpu_release(policy);
++
++		/*
++		 * The driver will not be unregistered while this function is
++		 * running, so update the capacity without acquiring the driver
++		 * lock.
++		 */
++		hybrid_update_capacity(cpudata);
+ 	}
+ 
+ 	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
+@@ -2034,8 +2252,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+ 
+ 		if (pstate_funcs.get_cpu_scaling) {
+ 			cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
+-			if (cpu->pstate.scaling != perf_ctl_scaling)
++			if (cpu->pstate.scaling != perf_ctl_scaling) {
+ 				intel_pstate_hybrid_hwp_adjust(cpu);
++				hwp_is_hybrid = true;
++			}
+ 		} else {
+ 			cpu->pstate.scaling = perf_ctl_scaling;
+ 		}
+@@ -2703,6 +2923,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
+ 		 */
+ 		intel_pstate_hwp_reenable(cpu);
+ 		cpu->suspended = false;
++
++		hybrid_update_capacity(cpu);
+ 	}
+ 
+ 	return 0;
+@@ -3143,6 +3365,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
+ 
+ 	global.min_perf_pct = min_perf_pct_min();
+ 
++	hybrid_init_cpu_scaling();
++
+ 	return 0;
+ }
+ 
+-- 
+2.47.0.rc0
+
+From ac05835cead1a0a20b6297fdb0f7c47326d0ff71 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:52:52 +0800
+Subject: [PATCH 08/13] ksm
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ arch/alpha/kernel/syscalls/syscall.tbl        |   3 +
+ arch/arm/tools/syscall.tbl                    |   3 +
+ arch/m68k/kernel/syscalls/syscall.tbl         |   3 +
+ arch/microblaze/kernel/syscalls/syscall.tbl   |   3 +
+ arch/mips/kernel/syscalls/syscall_n32.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_n64.tbl     |   3 +
+ arch/mips/kernel/syscalls/syscall_o32.tbl     |   3 +
+ arch/parisc/kernel/syscalls/syscall.tbl       |   3 +
+ arch/powerpc/kernel/syscalls/syscall.tbl      |   3 +
+ arch/s390/kernel/syscalls/syscall.tbl         |   3 +
+ arch/sh/kernel/syscalls/syscall.tbl           |   3 +
+ arch/sparc/kernel/syscalls/syscall.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_32.tbl        |   3 +
+ arch/x86/entry/syscalls/syscall_64.tbl        |   3 +
+ arch/xtensa/kernel/syscalls/syscall.tbl       |   3 +
+ include/linux/syscalls.h                      |   3 +
+ include/uapi/asm-generic/unistd.h             |   9 +-
+ kernel/sys.c                                  | 138 ++++++++++++++++++
+ kernel/sys_ni.c                               |   3 +
+ scripts/syscall.tbl                           |   3 +
+ .../arch/powerpc/entry/syscalls/syscall.tbl   |   3 +
+ .../perf/arch/s390/entry/syscalls/syscall.tbl |   3 +
+ 22 files changed, 206 insertions(+), 1 deletion(-)
+
+diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
+index 74720667fe09..e6a11f3c0a2e 100644
+--- a/arch/alpha/kernel/syscalls/syscall.tbl
++++ b/arch/alpha/kernel/syscalls/syscall.tbl
+@@ -502,3 +502,6 @@
+ 570	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 571	common	lsm_list_modules		sys_lsm_list_modules
+ 572	common  mseal				sys_mseal
++573	common	process_ksm_enable		sys_process_ksm_enable
++574	common	process_ksm_disable		sys_process_ksm_disable
++575	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
+index 23c98203c40f..10a3099decbe 100644
+--- a/arch/arm/tools/syscall.tbl
++++ b/arch/arm/tools/syscall.tbl
+@@ -477,3 +477,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
+index 22a3cbd4c602..12d2c7594bf0 100644
+--- a/arch/m68k/kernel/syscalls/syscall.tbl
++++ b/arch/m68k/kernel/syscalls/syscall.tbl
+@@ -462,3 +462,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
+index 2b81a6bd78b2..e2a93c856eed 100644
+--- a/arch/microblaze/kernel/syscalls/syscall.tbl
++++ b/arch/microblaze/kernel/syscalls/syscall.tbl
+@@ -468,3 +468,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
+index 953f5b7dc723..b921fbf56fa6 100644
+--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
++++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
+@@ -401,3 +401,6 @@
+ 460	n32	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	n32	lsm_list_modules		sys_lsm_list_modules
+ 462	n32	mseal				sys_mseal
++463	n32	process_ksm_enable		sys_process_ksm_enable
++464	n32	process_ksm_disable		sys_process_ksm_disable
++465	n32	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
+index 1464c6be6eb3..8d7f9ddd66f4 100644
+--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
++++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
+@@ -377,3 +377,6 @@
+ 460	n64	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	n64	lsm_list_modules		sys_lsm_list_modules
+ 462	n64	mseal				sys_mseal
++463	n64	process_ksm_enable		sys_process_ksm_enable
++464	n64	process_ksm_disable		sys_process_ksm_disable
++465	n64	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
+index 2439a2491cff..9d6142739954 100644
+--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
++++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
+@@ -450,3 +450,6 @@
+ 460	o32	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	o32	lsm_list_modules		sys_lsm_list_modules
+ 462	o32	mseal				sys_mseal
++463	o32	process_ksm_enable		sys_process_ksm_enable
++464	o32	process_ksm_disable		sys_process_ksm_disable
++465	o32	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
+index 66dc406b12e4..9d46476fd908 100644
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
++++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -461,3 +461,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
+index ebae8415dfbb..16f71bc2f6f0 100644
+--- a/arch/powerpc/kernel/syscalls/syscall.tbl
++++ b/arch/powerpc/kernel/syscalls/syscall.tbl
+@@ -553,3 +553,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
+index 01071182763e..7394bad8178e 100644
+--- a/arch/s390/kernel/syscalls/syscall.tbl
++++ b/arch/s390/kernel/syscalls/syscall.tbl
+@@ -465,3 +465,6 @@
+ 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
+ 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+ 462  common	mseal			sys_mseal			sys_mseal
++463  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
++464  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
++465  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
+diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
+index c55fd7696d40..b9fc31221b87 100644
+--- a/arch/sh/kernel/syscalls/syscall.tbl
++++ b/arch/sh/kernel/syscalls/syscall.tbl
+@@ -466,3 +466,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
+index cfdfb3707c16..0d79fd772854 100644
+--- a/arch/sparc/kernel/syscalls/syscall.tbl
++++ b/arch/sparc/kernel/syscalls/syscall.tbl
+@@ -508,3 +508,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal 				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
+index 534c74b14fab..c546a30575f1 100644
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
++++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -468,3 +468,6 @@
+ 460	i386	lsm_set_self_attr	sys_lsm_set_self_attr
+ 461	i386	lsm_list_modules	sys_lsm_list_modules
+ 462	i386	mseal 			sys_mseal
++463	i386	process_ksm_enable		sys_process_ksm_enable
++464	i386	process_ksm_disable		sys_process_ksm_disable
++465	i386	process_ksm_status		sys_process_ksm_status
+diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
+index 7093ee21c0d1..0fcd10ba8dfe 100644
+--- a/arch/x86/entry/syscalls/syscall_64.tbl
++++ b/arch/x86/entry/syscalls/syscall_64.tbl
+@@ -386,6 +386,9 @@
+ 460	common	lsm_set_self_attr	sys_lsm_set_self_attr
+ 461	common	lsm_list_modules	sys_lsm_list_modules
+ 462 	common  mseal			sys_mseal
++463	common	process_ksm_enable	sys_process_ksm_enable
++464	common	process_ksm_disable	sys_process_ksm_disable
++465	common	process_ksm_status	sys_process_ksm_status
+ 
+ #
+ # Due to a historical design error, certain syscalls are numbered differently
+diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
+index 67083fc1b2f5..c1aecee4ad9b 100644
+--- a/arch/xtensa/kernel/syscalls/syscall.tbl
++++ b/arch/xtensa/kernel/syscalls/syscall.tbl
+@@ -433,3 +433,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal 				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
+index 4bcf6754738d..b3ea08e920f7 100644
+--- a/include/linux/syscalls.h
++++ b/include/linux/syscalls.h
+@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
+ asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ 			size_t vlen, int behavior, unsigned int flags);
+ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
++asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
++asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
++asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
+ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
+ 			unsigned long prot, unsigned long pgoff,
+ 			unsigned long flags);
+diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
+index 5bf6148cac2b..613e559ad6e0 100644
+--- a/include/uapi/asm-generic/unistd.h
++++ b/include/uapi/asm-generic/unistd.h
+@@ -841,8 +841,15 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+ #define __NR_mseal 462
+ __SYSCALL(__NR_mseal, sys_mseal)
+ 
++#define __NR_process_ksm_enable 463
++__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
++#define __NR_process_ksm_disable 464
++__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
++#define __NR_process_ksm_status 465
++__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
++
+ #undef __NR_syscalls
+-#define __NR_syscalls 463
++#define __NR_syscalls 466
+ 
+ /*
+  * 32 bit systems traditionally used different
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 3a2df1bd9f64..bc77dc784527 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -2789,6 +2789,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ 	return error;
+ }
+ 
++#ifdef CONFIG_KSM
++enum pkc_action {
++	PKSM_ENABLE = 0,
++	PKSM_DISABLE,
++	PKSM_STATUS,
++};
++
++static long do_process_ksm_control(int pidfd, enum pkc_action action)
++{
++	long ret;
++	struct task_struct *task;
++	struct mm_struct *mm;
++	unsigned int f_flags;
++
++	task = pidfd_get_task(pidfd, &f_flags);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++
++	/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
++	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
++	if (IS_ERR_OR_NULL(mm)) {
++		ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
++		goto release_task;
++	}
++
++	/* Require CAP_SYS_NICE for influencing process performance. */
++	if (!capable(CAP_SYS_NICE)) {
++		ret = -EPERM;
++		goto release_mm;
++	}
++
++	if (mmap_write_lock_killable(mm)) {
++		ret = -EINTR;
++		goto release_mm;
++	}
++
++	switch (action) {
++		case PKSM_ENABLE:
++			ret = ksm_enable_merge_any(mm);
++			break;
++		case PKSM_DISABLE:
++			ret = ksm_disable_merge_any(mm);
++			break;
++		case PKSM_STATUS:
++			ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
++			break;
++	}
++
++	mmap_write_unlock(mm);
++
++release_mm:
++	mmput(mm);
++release_task:
++	put_task_struct(task);
++out:
++	return ret;
++}
++#endif /* CONFIG_KSM */
++
++SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
++{
++#ifdef CONFIG_KSM
++	if (flags != 0)
++		return -EINVAL;
++
++	return do_process_ksm_control(pidfd, PKSM_ENABLE);
++#else /* CONFIG_KSM */
++	return -ENOSYS;
++#endif /* CONFIG_KSM */
++}
++
++SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
++{
++#ifdef CONFIG_KSM
++	if (flags != 0)
++		return -EINVAL;
++
++	return do_process_ksm_control(pidfd, PKSM_DISABLE);
++#else /* CONFIG_KSM */
++	return -ENOSYS;
++#endif /* CONFIG_KSM */
++}
++
++SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
++{
++#ifdef CONFIG_KSM
++	if (flags != 0)
++		return -EINVAL;
++
++	return do_process_ksm_control(pidfd, PKSM_STATUS);
++#else /* CONFIG_KSM */
++	return -ENOSYS;
++#endif /* CONFIG_KSM */
++}
++
++#ifdef CONFIG_KSM
++static ssize_t process_ksm_enable_show(struct kobject *kobj,
++		struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", __NR_process_ksm_enable);
++}
++static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
++
++static ssize_t process_ksm_disable_show(struct kobject *kobj,
++		struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", __NR_process_ksm_disable);
++}
++static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
++
++static ssize_t process_ksm_status_show(struct kobject *kobj,
++		struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", __NR_process_ksm_status);
++}
++static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
++
++static struct attribute *process_ksm_sysfs_attrs[] = {
++	&process_ksm_enable_attr.attr,
++	&process_ksm_disable_attr.attr,
++	&process_ksm_status_attr.attr,
++	NULL,
++};
++
++static const struct attribute_group process_ksm_sysfs_attr_group = {
++	.attrs = process_ksm_sysfs_attrs,
++	.name = "process_ksm",
++};
++
++static int __init process_ksm_sysfs_init(void)
++{
++	return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
++}
++subsys_initcall(process_ksm_sysfs_init);
++#endif /* CONFIG_KSM */
++
+ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
+ 		struct getcpu_cache __user *, unused)
+ {
+diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
+index c00a86931f8c..d82213d68522 100644
+--- a/kernel/sys_ni.c
++++ b/kernel/sys_ni.c
+@@ -186,6 +186,9 @@ COND_SYSCALL(mincore);
+ COND_SYSCALL(madvise);
+ COND_SYSCALL(process_madvise);
+ COND_SYSCALL(process_mrelease);
++COND_SYSCALL(process_ksm_enable);
++COND_SYSCALL(process_ksm_disable);
++COND_SYSCALL(process_ksm_status);
+ COND_SYSCALL(remap_file_pages);
+ COND_SYSCALL(mbind);
+ COND_SYSCALL(get_mempolicy);
+diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
+index 845e24eb372e..227d9cc12365 100644
+--- a/scripts/syscall.tbl
++++ b/scripts/syscall.tbl
+@@ -403,3 +403,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable			sys_process_ksm_enable
++464	common	process_ksm_disable			sys_process_ksm_disable
++465	common	process_ksm_status			sys_process_ksm_status
+diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+index ebae8415dfbb..16f71bc2f6f0 100644
+--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
++++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+@@ -553,3 +553,6 @@
+ 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+ 461	common	lsm_list_modules		sys_lsm_list_modules
+ 462	common	mseal				sys_mseal
++463	common	process_ksm_enable		sys_process_ksm_enable
++464	common	process_ksm_disable		sys_process_ksm_disable
++465	common	process_ksm_status		sys_process_ksm_status
+diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+index 01071182763e..7394bad8178e 100644
+--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
++++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+@@ -465,3 +465,6 @@
+ 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
+ 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+ 462  common	mseal			sys_mseal			sys_mseal
++463  common	process_ksm_enable	sys_process_ksm_enable		sys_process_ksm_enable
++464  common	process_ksm_disable	sys_process_ksm_disable		sys_process_ksm_disable
++465  common	process_ksm_status	sys_process_ksm_status		sys_process_ksm_status
+-- 
+2.47.0.rc0
+
+From 727728c9e456e4484ef8e1a05a66f78a90e1c24a Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:53:03 +0800
+Subject: [PATCH 09/13] ntsync
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ Documentation/userspace-api/index.rst         |    1 +
+ Documentation/userspace-api/ntsync.rst        |  398 +++++
+ MAINTAINERS                                   |    9 +
+ drivers/misc/Kconfig                          |    1 -
+ drivers/misc/ntsync.c                         |  989 +++++++++++-
+ include/uapi/linux/ntsync.h                   |   39 +
+ tools/testing/selftests/Makefile              |    1 +
+ .../selftests/drivers/ntsync/.gitignore       |    1 +
+ .../testing/selftests/drivers/ntsync/Makefile |    7 +
+ tools/testing/selftests/drivers/ntsync/config |    1 +
+ .../testing/selftests/drivers/ntsync/ntsync.c | 1407 +++++++++++++++++
+ 11 files changed, 2850 insertions(+), 4 deletions(-)
+ create mode 100644 Documentation/userspace-api/ntsync.rst
+ create mode 100644 tools/testing/selftests/drivers/ntsync/.gitignore
+ create mode 100644 tools/testing/selftests/drivers/ntsync/Makefile
+ create mode 100644 tools/testing/selftests/drivers/ntsync/config
+ create mode 100644 tools/testing/selftests/drivers/ntsync/ntsync.c
+
+diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
+index 274cc7546efc..9c1b15cd89ab 100644
+--- a/Documentation/userspace-api/index.rst
++++ b/Documentation/userspace-api/index.rst
+@@ -63,6 +63,7 @@ Everything else
+    vduse
+    futex2
+    perf_ring_buffer
++   ntsync
+ 
+ .. only::  subproject and html
+ 
+diff --git a/Documentation/userspace-api/ntsync.rst b/Documentation/userspace-api/ntsync.rst
+new file mode 100644
+index 000000000000..767844637a7d
+--- /dev/null
++++ b/Documentation/userspace-api/ntsync.rst
+@@ -0,0 +1,398 @@
++===================================
++NT synchronization primitive driver
++===================================
++
++This page documents the user-space API for the ntsync driver.
++
++ntsync is a support driver for emulation of NT synchronization
++primitives by user-space NT emulators. It exists because implementation
++in user-space, using existing tools, cannot match Windows performance
++while offering accurate semantics. It is implemented entirely in
++software, and does not drive any hardware device.
++
++This interface is meant as a compatibility tool only, and should not
++be used for general synchronization. Instead use generic, versatile
++interfaces such as futex(2) and poll(2).
++
++Synchronization primitives
++==========================
++
++The ntsync driver exposes three types of synchronization primitives:
++semaphores, mutexes, and events.
++
++A semaphore holds a single volatile 32-bit counter, and a static 32-bit
++integer denoting the maximum value. It is considered signaled (that is,
++can be acquired without contention, or will wake up a waiting thread)
++when the counter is nonzero. The counter is decremented by one when a
++wait is satisfied. Both the initial and maximum count are established
++when the semaphore is created.
++
++A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit
++identifier denoting its owner. A mutex is considered signaled when its
++owner is zero (indicating that it is not owned). The recursion count is
++incremented when a wait is satisfied, and ownership is set to the given
++identifier.
++
++A mutex also holds an internal flag denoting whether its previous owner
++has died; such a mutex is said to be abandoned. Owner death is not
++tracked automatically based on thread death, but rather must be
++communicated using ``NTSYNC_IOC_MUTEX_KILL``. An abandoned mutex is
++inherently considered unowned.
++
++Except for the "unowned" semantics of zero, the actual value of the
++owner identifier is not interpreted by the ntsync driver at all. The
++intended use is to store a thread identifier; however, the ntsync
++driver does not actually validate that a calling thread provides
++consistent or unique identifiers.
++
++An event is similar to a semaphore with a maximum count of one. It holds
++a volatile boolean state denoting whether it is signaled or not. There
++are two types of events, auto-reset and manual-reset. An auto-reset
++event is designaled when a wait is satisfied; a manual-reset event is
++not. The event type is specified when the event is created.
++
++Unless specified otherwise, all operations on an object are atomic and
++totally ordered with respect to other operations on the same object.
++
++Objects are represented by files. When all file descriptors to an
++object are closed, that object is deleted.
++
++Char device
++===========
++
++The ntsync driver creates a single char device /dev/ntsync. Each file
++description opened on the device represents a unique instance intended
++to back an individual NT virtual machine. Objects created by one ntsync
++instance may only be used with other objects created by the same
++instance.
++
++ioctl reference
++===============
++
++All operations on the device are done through ioctls. There are four
++structures used in ioctl calls::
++
++   struct ntsync_sem_args {
++   	__u32 sem;
++   	__u32 count;
++   	__u32 max;
++   };
++
++   struct ntsync_mutex_args {
++   	__u32 mutex;
++   	__u32 owner;
++   	__u32 count;
++   };
++
++   struct ntsync_event_args {
++   	__u32 event;
++   	__u32 signaled;
++   	__u32 manual;
++   };
++
++   struct ntsync_wait_args {
++   	__u64 timeout;
++   	__u64 objs;
++   	__u32 count;
++   	__u32 owner;
++   	__u32 index;
++   	__u32 alert;
++   	__u32 flags;
++   	__u32 pad;
++   };
++
++Depending on the ioctl, members of the structure may be used as input,
++output, or not at all. All ioctls return 0 on success.
++
++The ioctls on the device file are as follows:
++
++.. c:macro:: NTSYNC_IOC_CREATE_SEM
++
++  Create a semaphore object. Takes a pointer to struct
++  :c:type:`ntsync_sem_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``sem``
++       - On output, contains a file descriptor to the created semaphore.
++     * - ``count``
++       - Initial count of the semaphore.
++     * - ``max``
++       - Maximum count of the semaphore.
++
++  Fails with ``EINVAL`` if ``count`` is greater than ``max``.
++
++.. c:macro:: NTSYNC_IOC_CREATE_MUTEX
++
++  Create a mutex object. Takes a pointer to struct
++  :c:type:`ntsync_mutex_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``mutex``
++       - On output, contains a file descriptor to the created mutex.
++     * - ``count``
++       - Initial recursion count of the mutex.
++     * - ``owner``
++       - Initial owner of the mutex.
++
++  If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is
++  zero and ``count`` is nonzero, the function fails with ``EINVAL``.
++
++.. c:macro:: NTSYNC_IOC_CREATE_EVENT
++
++  Create an event object. Takes a pointer to struct
++  :c:type:`ntsync_event_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``event``
++       - On output, contains a file descriptor to the created event.
++     * - ``signaled``
++       - If nonzero, the event is initially signaled, otherwise
++         nonsignaled.
++     * - ``manual``
++       - If nonzero, the event is a manual-reset event, otherwise
++         auto-reset.
++
++The ioctls on the individual objects are as follows:
++
++.. c:macro:: NTSYNC_IOC_SEM_POST
++
++  Post to a semaphore object. Takes a pointer to a 32-bit integer,
++  which on input holds the count to be added to the semaphore, and on
++  output contains its previous count.
++
++  If adding to the semaphore's current count would raise the latter
++  past the semaphore's maximum count, the ioctl fails with
++  ``EOVERFLOW`` and the semaphore is not affected. If raising the
++  semaphore's count causes it to become signaled, eligible threads
++  waiting on this semaphore will be woken and the semaphore's count
++  decremented appropriately.
++
++.. c:macro:: NTSYNC_IOC_MUTEX_UNLOCK
++
++  Release a mutex object. Takes a pointer to struct
++  :c:type:`ntsync_mutex_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``mutex``
++       - Ignored.
++     * - ``owner``
++       - Specifies the owner trying to release this mutex.
++     * - ``count``
++       - On output, contains the previous recursion count.
++
++  If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner``
++  is not the current owner of the mutex, the ioctl fails with
++  ``EPERM``.
++
++  The mutex's count will be decremented by one. If decrementing the
++  mutex's count causes it to become zero, the mutex is marked as
++  unowned and signaled, and eligible threads waiting on it will be
++  woken as appropriate.
++
++.. c:macro:: NTSYNC_IOC_SET_EVENT
++
++  Signal an event object. Takes a pointer to a 32-bit integer, which on
++  output contains the previous state of the event.
++
++  Eligible threads will be woken, and auto-reset events will be
++  designaled appropriately.
++
++.. c:macro:: NTSYNC_IOC_RESET_EVENT
++
++  Designal an event object. Takes a pointer to a 32-bit integer, which
++  on output contains the previous state of the event.
++
++.. c:macro:: NTSYNC_IOC_PULSE_EVENT
++
++  Wake threads waiting on an event object while leaving it in an
++  unsignaled state. Takes a pointer to a 32-bit integer, which on
++  output contains the previous state of the event.
++
++  A pulse operation can be thought of as a set followed by a reset,
++  performed as a single atomic operation. If two threads are waiting on
++  an auto-reset event which is pulsed, only one will be woken. If two
++  threads are waiting a manual-reset event which is pulsed, both will
++  be woken. However, in both cases, the event will be unsignaled
++  afterwards, and a simultaneous read operation will always report the
++  event as unsignaled.
++
++.. c:macro:: NTSYNC_IOC_READ_SEM
++
++  Read the current state of a semaphore object. Takes a pointer to
++  struct :c:type:`ntsync_sem_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``sem``
++       - Ignored.
++     * - ``count``
++       - On output, contains the current count of the semaphore.
++     * - ``max``
++       - On output, contains the maximum count of the semaphore.
++
++.. c:macro:: NTSYNC_IOC_READ_MUTEX
++
++  Read the current state of a mutex object. Takes a pointer to struct
++  :c:type:`ntsync_mutex_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``mutex``
++       - Ignored.
++     * - ``owner``
++       - On output, contains the current owner of the mutex, or zero
++         if the mutex is not currently owned.
++     * - ``count``
++       - On output, contains the current recursion count of the mutex.
++
++  If the mutex is marked as abandoned, the function fails with
++  ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to
++  zero.
++
++.. c:macro:: NTSYNC_IOC_READ_EVENT
++
++  Read the current state of an event object. Takes a pointer to struct
++  :c:type:`ntsync_event_args`, which is used as follows:
++
++  .. list-table::
++
++     * - ``event``
++       - Ignored.
++     * - ``signaled``
++       - On output, contains the current state of the event.
++     * - ``manual``
++       - On output, contains 1 if the event is a manual-reset event,
++         and 0 otherwise.
++
++.. c:macro:: NTSYNC_IOC_KILL_OWNER
++
++  Mark a mutex as unowned and abandoned if it is owned by the given
++  owner. Takes an input-only pointer to a 32-bit integer denoting the
++  owner. If the owner is zero, the ioctl fails with ``EINVAL``. If the
++  owner does not own the mutex, the function fails with ``EPERM``.
++
++  Eligible threads waiting on the mutex will be woken as appropriate
++  (and such waits will fail with ``EOWNERDEAD``, as described below).
++
++.. c:macro:: NTSYNC_IOC_WAIT_ANY
++
++  Poll on any of a list of objects, atomically acquiring at most one.
++  Takes a pointer to struct :c:type:`ntsync_wait_args`, which is
++  used as follows:
++
++  .. list-table::
++
++     * - ``timeout``
++       - Absolute timeout in nanoseconds. If ``NTSYNC_WAIT_REALTIME``
++         is set, the timeout is measured against the REALTIME clock;
++         otherwise it is measured against the MONOTONIC clock. If the
++         timeout is equal to or earlier than the current time, the
++         function returns immediately without sleeping. If ``timeout``
++         is U64_MAX, the function will sleep until an object is
++         signaled, and will not fail with ``ETIMEDOUT``.
++     * - ``objs``
++       - Pointer to an array of ``count`` file descriptors
++         (specified as an integer so that the structure has the same
++         size regardless of architecture). If any object is
++         invalid, the function fails with ``EINVAL``.
++     * - ``count``
++       - Number of objects specified in the ``objs`` array.
++         If greater than ``NTSYNC_MAX_WAIT_COUNT``, the function fails
++         with ``EINVAL``.
++     * - ``owner``
++       - Mutex owner identifier. If any object in ``objs`` is a mutex,
++         the ioctl will attempt to acquire that mutex on behalf of
++         ``owner``. If ``owner`` is zero, the ioctl fails with
++         ``EINVAL``.
++     * - ``index``
++       - On success, contains the index (into ``objs``) of the object
++         which was signaled. If ``alert`` was signaled instead,
++         this contains ``count``.
++     * - ``alert``
++       - Optional event object file descriptor. If nonzero, this
++         specifies an "alert" event object which, if signaled, will
++         terminate the wait. If nonzero, the identifier must point to a
++         valid event.
++     * - ``flags``
++       - Zero or more flags. Currently the only flag is
++         ``NTSYNC_WAIT_REALTIME``, which causes the timeout to be
++         measured against the REALTIME clock instead of MONOTONIC.
++     * - ``pad``
++       - Unused, must be set to zero.
++
++  This function attempts to acquire one of the given objects. If unable
++  to do so, it sleeps until an object becomes signaled, subsequently
++  acquiring it, or the timeout expires. In the latter case the ioctl
++  fails with ``ETIMEDOUT``. The function only acquires one object, even
++  if multiple objects are signaled.
++
++  A semaphore is considered to be signaled if its count is nonzero, and
++  is acquired by decrementing its count by one. A mutex is considered
++  to be signaled if it is unowned or if its owner matches the ``owner``
++  argument, and is acquired by incrementing its recursion count by one
++  and setting its owner to the ``owner`` argument. An auto-reset event
++  is acquired by designaling it; a manual-reset event is not affected
++  by acquisition.
++
++  Acquisition is atomic and totally ordered with respect to other
++  operations on the same object. If two wait operations (with different
++  ``owner`` identifiers) are queued on the same mutex, only one is
++  signaled. If two wait operations are queued on the same semaphore,
++  and a value of one is posted to it, only one is signaled.
++
++  If an abandoned mutex is acquired, the ioctl fails with
++  ``EOWNERDEAD``. Although this is a failure return, the function may
++  otherwise be considered successful. The mutex is marked as owned by
++  the given owner (with a recursion count of 1) and as no longer
++  abandoned, and ``index`` is still set to the index of the mutex.
++
++  The ``alert`` argument is an "extra" event which can terminate the
++  wait, independently of all other objects.
++
++  It is valid to pass the same object more than once, including by
++  passing the same event in the ``objs`` array and in ``alert``. If a
++  wakeup occurs due to that object being signaled, ``index`` is set to
++  the lowest index corresponding to that object.
++
++  The function may fail with ``EINTR`` if a signal is received.
++
++.. c:macro:: NTSYNC_IOC_WAIT_ALL
++
++  Poll on a list of objects, atomically acquiring all of them. Takes a
++  pointer to struct :c:type:`ntsync_wait_args`, which is used
++  identically to ``NTSYNC_IOC_WAIT_ANY``, except that ``index`` is
++  always filled with zero on success if not woken via alert.
++
++  This function attempts to simultaneously acquire all of the given
++  objects. If unable to do so, it sleeps until all objects become
++  simultaneously signaled, subsequently acquiring them, or the timeout
++  expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and no
++  objects are modified.
++
++  Objects may become signaled and subsequently designaled (through
++  acquisition by other threads) while this thread is sleeping. Only
++  once all objects are simultaneously signaled does the ioctl acquire
++  them and return. The entire acquisition is atomic and totally ordered
++  with respect to other operations on any of the given objects.
++
++  If an abandoned mutex is acquired, the ioctl fails with
++  ``EOWNERDEAD``. Similarly to ``NTSYNC_IOC_WAIT_ANY``, all objects are
++  nevertheless marked as acquired. Note that if multiple mutex objects
++  are specified, there is no way to know which were marked as
++  abandoned.
++
++  As with "any" waits, the ``alert`` argument is an "extra" event which
++  can terminate the wait. Critically, however, an "all" wait will
++  succeed if all members in ``objs`` are signaled, *or* if ``alert`` is
++  signaled. In the latter case ``index`` will be set to ``count``. As
++  with "any" waits, if both conditions are filled, the former takes
++  priority, and objects in ``objs`` will be acquired.
++
++  Unlike ``NTSYNC_IOC_WAIT_ANY``, it is not valid to pass the same
++  object more than once, nor is it valid to pass the same object in
++  ``objs`` and in ``alert``. If this is attempted, the function fails
++  with ``EINVAL``.
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 2ba00c0cd701..0bcfbc58a9ab 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -16327,6 +16327,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
+ F:	Documentation/filesystems/ntfs3.rst
+ F:	fs/ntfs3/
+ 
++NTSYNC SYNCHRONIZATION PRIMITIVE DRIVER
++M:	Elizabeth Figura <zfigura@codeweavers.com>
++L:	wine-devel@winehq.org
++S:	Supported
++F:	Documentation/userspace-api/ntsync.rst
++F:	drivers/misc/ntsync.c
++F:	include/uapi/linux/ntsync.h
++F:	tools/testing/selftests/drivers/ntsync/
++
+ NUBUS SUBSYSTEM
+ M:	Finn Thain <fthain@linux-m68k.org>
+ L:	linux-m68k@lists.linux-m68k.org
+diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
+index 41c54051347a..bde398e12696 100644
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -507,7 +507,6 @@ config OPEN_DICE
+ 
+ config NTSYNC
+ 	tristate "NT synchronization primitive emulation"
+-	depends on BROKEN
+ 	help
+ 	  This module provides kernel support for emulation of Windows NT
+ 	  synchronization primitives. It is not a hardware driver.
+diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
+index 3c2f743c58b0..87a24798a5c7 100644
+--- a/drivers/misc/ntsync.c
++++ b/drivers/misc/ntsync.c
+@@ -6,11 +6,17 @@
+  */
+ 
+ #include <linux/anon_inodes.h>
++#include <linux/atomic.h>
+ #include <linux/file.h>
+ #include <linux/fs.h>
++#include <linux/hrtimer.h>
++#include <linux/ktime.h>
+ #include <linux/miscdevice.h>
+ #include <linux/module.h>
++#include <linux/mutex.h>
+ #include <linux/overflow.h>
++#include <linux/sched.h>
++#include <linux/sched/signal.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+ #include <uapi/linux/ntsync.h>
+@@ -19,6 +25,8 @@
+ 
+ enum ntsync_type {
+ 	NTSYNC_TYPE_SEM,
++	NTSYNC_TYPE_MUTEX,
++	NTSYNC_TYPE_EVENT,
+ };
+ 
+ /*
+@@ -30,10 +38,13 @@ enum ntsync_type {
+  *
+  * Both rely on struct file for reference counting. Individual
+  * ntsync_obj objects take a reference to the device when created.
++ * Wait operations take a reference to each object being waited on for
++ * the duration of the wait.
+  */
+ 
+ struct ntsync_obj {
+ 	spinlock_t lock;
++	int dev_locked;
+ 
+ 	enum ntsync_type type;
+ 
+@@ -46,13 +57,335 @@ struct ntsync_obj {
+ 			__u32 count;
+ 			__u32 max;
+ 		} sem;
++		struct {
++			__u32 count;
++			pid_t owner;
++			bool ownerdead;
++		} mutex;
++		struct {
++			bool manual;
++			bool signaled;
++		} event;
+ 	} u;
++
++	/*
++	 * any_waiters is protected by the object lock, but all_waiters is
++	 * protected by the device wait_all_lock.
++	 */
++	struct list_head any_waiters;
++	struct list_head all_waiters;
++
++	/*
++	 * Hint describing how many tasks are queued on this object in a
++	 * wait-all operation.
++	 *
++	 * Any time we do a wake, we may need to wake "all" waiters as well as
++	 * "any" waiters. In order to atomically wake "all" waiters, we must
++	 * lock all of the objects, and that means grabbing the wait_all_lock
++	 * below (and, due to lock ordering rules, before locking this object).
++	 * However, wait-all is a rare operation, and grabbing the wait-all
++	 * lock for every wake would create unnecessary contention.
++	 * Therefore we first check whether all_hint is zero, and, if it is,
++	 * we skip trying to wake "all" waiters.
++	 *
++	 * Since wait requests must originate from user-space threads, we're
++	 * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
++	 */
++	atomic_t all_hint;
++};
++
++struct ntsync_q_entry {
++	struct list_head node;
++	struct ntsync_q *q;
++	struct ntsync_obj *obj;
++	__u32 index;
++};
++
++struct ntsync_q {
++	struct task_struct *task;
++	__u32 owner;
++
++	/*
++	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
++	 * compare-and-swap may actually change object states and wake this
++	 * task.
++	 */
++	atomic_t signaled;
++
++	bool all;
++	bool ownerdead;
++	__u32 count;
++	struct ntsync_q_entry entries[];
+ };
+ 
+ struct ntsync_device {
++	/*
++	 * Wait-all operations must atomically grab all objects, and be totally
++	 * ordered with respect to each other and wait-any operations.
++	 * If one thread is trying to acquire several objects, another thread
++	 * cannot touch the object at the same time.
++	 *
++	 * This device-wide lock is used to serialize wait-for-all
++	 * operations, and operations on an object that is involved in a
++	 * wait-for-all.
++	 */
++	struct mutex wait_all_lock;
++
+ 	struct file *file;
+ };
+ 
++/*
++ * Single objects are locked using obj->lock.
++ *
++ * Multiple objects are 'locked' while holding dev->wait_all_lock.
++ * In this case however, individual objects are not locked by holding
++ * obj->lock, but by setting obj->dev_locked.
++ *
++ * This means that in order to lock a single object, the sequence is slightly
++ * more complicated than usual. Specifically it needs to check obj->dev_locked
++ * after acquiring obj->lock, if set, it needs to drop the lock and acquire
++ * dev->wait_all_lock in order to serialize against the multi-object operation.
++ */
++
++static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
++{
++	lockdep_assert_held(&dev->wait_all_lock);
++	lockdep_assert(obj->dev == dev);
++	spin_lock(&obj->lock);
++	/*
++	 * By setting obj->dev_locked inside obj->lock, it is ensured that
++	 * anyone holding obj->lock must see the value.
++	 */
++	obj->dev_locked = 1;
++	spin_unlock(&obj->lock);
++}
++
++static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
++{
++	lockdep_assert_held(&dev->wait_all_lock);
++	lockdep_assert(obj->dev == dev);
++	spin_lock(&obj->lock);
++	obj->dev_locked = 0;
++	spin_unlock(&obj->lock);
++}
++
++static void obj_lock(struct ntsync_obj *obj)
++{
++	struct ntsync_device *dev = obj->dev;
++
++	for (;;) {
++		spin_lock(&obj->lock);
++		if (likely(!obj->dev_locked))
++			break;
++
++		spin_unlock(&obj->lock);
++		mutex_lock(&dev->wait_all_lock);
++		spin_lock(&obj->lock);
++		/*
++		 * obj->dev_locked should be set and released under the same
++		 * wait_all_lock section, since we now own this lock, it should
++		 * be clear.
++		 */
++		lockdep_assert(!obj->dev_locked);
++		spin_unlock(&obj->lock);
++		mutex_unlock(&dev->wait_all_lock);
++	}
++}
++
++static void obj_unlock(struct ntsync_obj *obj)
++{
++	spin_unlock(&obj->lock);
++}
++
++static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
++{
++	bool all;
++
++	obj_lock(obj);
++	all = atomic_read(&obj->all_hint);
++	if (unlikely(all)) {
++		obj_unlock(obj);
++		mutex_lock(&dev->wait_all_lock);
++		dev_lock_obj(dev, obj);
++	}
++
++	return all;
++}
++
++static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
++{
++	if (all) {
++		dev_unlock_obj(dev, obj);
++		mutex_unlock(&dev->wait_all_lock);
++	} else {
++		obj_unlock(obj);
++	}
++}
++
++#define ntsync_assert_held(obj) \
++	lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
++		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
++			(obj)->dev_locked))
++
++static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
++{
++	ntsync_assert_held(obj);
++
++	switch (obj->type) {
++	case NTSYNC_TYPE_SEM:
++		return !!obj->u.sem.count;
++	case NTSYNC_TYPE_MUTEX:
++		if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
++			return false;
++		return obj->u.mutex.count < UINT_MAX;
++	case NTSYNC_TYPE_EVENT:
++		return obj->u.event.signaled;
++	}
++
++	WARN(1, "bad object type %#x\n", obj->type);
++	return false;
++}
++
++/*
++ * "locked_obj" is an optional pointer to an object which is already locked and
++ * should not be locked again. This is necessary so that changing an object's
++ * state and waking it can be a single atomic operation.
++ */
++static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
++			 struct ntsync_obj *locked_obj)
++{
++	__u32 count = q->count;
++	bool can_wake = true;
++	int signaled = -1;
++	__u32 i;
++
++	lockdep_assert_held(&dev->wait_all_lock);
++	if (locked_obj)
++		lockdep_assert(locked_obj->dev_locked);
++
++	for (i = 0; i < count; i++) {
++		if (q->entries[i].obj != locked_obj)
++			dev_lock_obj(dev, q->entries[i].obj);
++	}
++
++	for (i = 0; i < count; i++) {
++		if (!is_signaled(q->entries[i].obj, q->owner)) {
++			can_wake = false;
++			break;
++		}
++	}
++
++	if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
++		for (i = 0; i < count; i++) {
++			struct ntsync_obj *obj = q->entries[i].obj;
++
++			switch (obj->type) {
++			case NTSYNC_TYPE_SEM:
++				obj->u.sem.count--;
++				break;
++			case NTSYNC_TYPE_MUTEX:
++				if (obj->u.mutex.ownerdead)
++					q->ownerdead = true;
++				obj->u.mutex.ownerdead = false;
++				obj->u.mutex.count++;
++				obj->u.mutex.owner = q->owner;
++				break;
++			case NTSYNC_TYPE_EVENT:
++				if (!obj->u.event.manual)
++					obj->u.event.signaled = false;
++				break;
++			}
++		}
++		wake_up_process(q->task);
++	}
++
++	for (i = 0; i < count; i++) {
++		if (q->entries[i].obj != locked_obj)
++			dev_unlock_obj(dev, q->entries[i].obj);
++	}
++}
++
++static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
++{
++	struct ntsync_q_entry *entry;
++
++	lockdep_assert_held(&dev->wait_all_lock);
++	lockdep_assert(obj->dev_locked);
++
++	list_for_each_entry(entry, &obj->all_waiters, node)
++		try_wake_all(dev, entry->q, obj);
++}
++
++static void try_wake_any_sem(struct ntsync_obj *sem)
++{
++	struct ntsync_q_entry *entry;
++
++	ntsync_assert_held(sem);
++	lockdep_assert(sem->type == NTSYNC_TYPE_SEM);
++
++	list_for_each_entry(entry, &sem->any_waiters, node) {
++		struct ntsync_q *q = entry->q;
++		int signaled = -1;
++
++		if (!sem->u.sem.count)
++			break;
++
++		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
++			sem->u.sem.count--;
++			wake_up_process(q->task);
++		}
++	}
++}
++
++static void try_wake_any_mutex(struct ntsync_obj *mutex)
++{
++	struct ntsync_q_entry *entry;
++
++	ntsync_assert_held(mutex);
++	lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX);
++
++	list_for_each_entry(entry, &mutex->any_waiters, node) {
++		struct ntsync_q *q = entry->q;
++		int signaled = -1;
++
++		if (mutex->u.mutex.count == UINT_MAX)
++			break;
++		if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner)
++			continue;
++
++		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
++			if (mutex->u.mutex.ownerdead)
++				q->ownerdead = true;
++			mutex->u.mutex.ownerdead = false;
++			mutex->u.mutex.count++;
++			mutex->u.mutex.owner = q->owner;
++			wake_up_process(q->task);
++		}
++	}
++}
++
++static void try_wake_any_event(struct ntsync_obj *event)
++{
++	struct ntsync_q_entry *entry;
++
++	ntsync_assert_held(event);
++	lockdep_assert(event->type == NTSYNC_TYPE_EVENT);
++
++	list_for_each_entry(entry, &event->any_waiters, node) {
++		struct ntsync_q *q = entry->q;
++		int signaled = -1;
++
++		if (!event->u.event.signaled)
++			break;
++
++		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
++			if (!event->u.event.manual)
++				event->u.event.signaled = false;
++			wake_up_process(q->task);
++		}
++	}
++}
++
+ /*
+  * Actually change the semaphore state, returning -EOVERFLOW if it is made
+  * invalid.
+@@ -61,7 +394,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
+ {
+ 	__u32 sum;
+ 
+-	lockdep_assert_held(&sem->lock);
++	ntsync_assert_held(sem);
+ 
+ 	if (check_add_overflow(sem->u.sem.count, count, &sum) ||
+ 	    sum > sem->u.sem.max)
+@@ -73,9 +406,11 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
+ 
+ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
+ {
++	struct ntsync_device *dev = sem->dev;
+ 	__u32 __user *user_args = argp;
+ 	__u32 prev_count;
+ 	__u32 args;
++	bool all;
+ 	int ret;
+ 
+ 	if (copy_from_user(&args, argp, sizeof(args)))
+@@ -84,12 +419,17 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
+ 	if (sem->type != NTSYNC_TYPE_SEM)
+ 		return -EINVAL;
+ 
+-	spin_lock(&sem->lock);
++	all = ntsync_lock_obj(dev, sem);
+ 
+ 	prev_count = sem->u.sem.count;
+ 	ret = post_sem_state(sem, args);
++	if (!ret) {
++		if (all)
++			try_wake_all_obj(dev, sem);
++		try_wake_any_sem(sem);
++	}
+ 
+-	spin_unlock(&sem->lock);
++	ntsync_unlock_obj(dev, sem, all);
+ 
+ 	if (!ret && put_user(prev_count, user_args))
+ 		ret = -EFAULT;
+@@ -97,6 +437,226 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
+ 	return ret;
+ }
+ 
++/*
++ * Actually change the mutex state, returning -EPERM if not the owner.
++ */
++static int unlock_mutex_state(struct ntsync_obj *mutex,
++			      const struct ntsync_mutex_args *args)
++{
++	ntsync_assert_held(mutex);
++
++	if (mutex->u.mutex.owner != args->owner)
++		return -EPERM;
++
++	if (!--mutex->u.mutex.count)
++		mutex->u.mutex.owner = 0;
++	return 0;
++}
++
++static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
++{
++	struct ntsync_mutex_args __user *user_args = argp;
++	struct ntsync_device *dev = mutex->dev;
++	struct ntsync_mutex_args args;
++	__u32 prev_count;
++	bool all;
++	int ret;
++
++	if (copy_from_user(&args, argp, sizeof(args)))
++		return -EFAULT;
++	if (!args.owner)
++		return -EINVAL;
++
++	if (mutex->type != NTSYNC_TYPE_MUTEX)
++		return -EINVAL;
++
++	all = ntsync_lock_obj(dev, mutex);
++
++	prev_count = mutex->u.mutex.count;
++	ret = unlock_mutex_state(mutex, &args);
++	if (!ret) {
++		if (all)
++			try_wake_all_obj(dev, mutex);
++		try_wake_any_mutex(mutex);
++	}
++
++	ntsync_unlock_obj(dev, mutex, all);
++
++	if (!ret && put_user(prev_count, &user_args->count))
++		ret = -EFAULT;
++
++	return ret;
++}
++
++/*
++ * Actually change the mutex state to mark its owner as dead,
++ * returning -EPERM if not the owner.
++ */
++static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner)
++{
++	ntsync_assert_held(mutex);
++
++	if (mutex->u.mutex.owner != owner)
++		return -EPERM;
++
++	mutex->u.mutex.ownerdead = true;
++	mutex->u.mutex.owner = 0;
++	mutex->u.mutex.count = 0;
++	return 0;
++}
++
++static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
++{
++	struct ntsync_device *dev = mutex->dev;
++	__u32 owner;
++	bool all;
++	int ret;
++
++	if (get_user(owner, (__u32 __user *)argp))
++		return -EFAULT;
++	if (!owner)
++		return -EINVAL;
++
++	if (mutex->type != NTSYNC_TYPE_MUTEX)
++		return -EINVAL;
++
++	all = ntsync_lock_obj(dev, mutex);
++
++	ret = kill_mutex_state(mutex, owner);
++	if (!ret) {
++		if (all)
++			try_wake_all_obj(dev, mutex);
++		try_wake_any_mutex(mutex);
++	}
++
++	ntsync_unlock_obj(dev, mutex, all);
++
++	return ret;
++}
++
++static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse)
++{
++	struct ntsync_device *dev = event->dev;
++	__u32 prev_state;
++	bool all;
++
++	if (event->type != NTSYNC_TYPE_EVENT)
++		return -EINVAL;
++
++	all = ntsync_lock_obj(dev, event);
++
++	prev_state = event->u.event.signaled;
++	event->u.event.signaled = true;
++	if (all)
++		try_wake_all_obj(dev, event);
++	try_wake_any_event(event);
++	if (pulse)
++		event->u.event.signaled = false;
++
++	ntsync_unlock_obj(dev, event, all);
++
++	if (put_user(prev_state, (__u32 __user *)argp))
++		return -EFAULT;
++
++	return 0;
++}
++
++static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
++{
++	struct ntsync_device *dev = event->dev;
++	__u32 prev_state;
++	bool all;
++
++	if (event->type != NTSYNC_TYPE_EVENT)
++		return -EINVAL;
++
++	all = ntsync_lock_obj(dev, event);
++
++	prev_state = event->u.event.signaled;
++	event->u.event.signaled = false;
++
++	ntsync_unlock_obj(dev, event, all);
++
++	if (put_user(prev_state, (__u32 __user *)argp))
++		return -EFAULT;
++
++	return 0;
++}
++
++static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
++{
++	struct ntsync_sem_args __user *user_args = argp;
++	struct ntsync_device *dev = sem->dev;
++	struct ntsync_sem_args args;
++	bool all;
++
++	if (sem->type != NTSYNC_TYPE_SEM)
++		return -EINVAL;
++
++	args.sem = 0;
++
++	all = ntsync_lock_obj(dev, sem);
++
++	args.count = sem->u.sem.count;
++	args.max = sem->u.sem.max;
++
++	ntsync_unlock_obj(dev, sem, all);
++
++	if (copy_to_user(user_args, &args, sizeof(args)))
++		return -EFAULT;
++	return 0;
++}
++
++static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
++{
++	struct ntsync_mutex_args __user *user_args = argp;
++	struct ntsync_device *dev = mutex->dev;
++	struct ntsync_mutex_args args;
++	bool all;
++	int ret;
++
++	if (mutex->type != NTSYNC_TYPE_MUTEX)
++		return -EINVAL;
++
++	args.mutex = 0;
++
++	all = ntsync_lock_obj(dev, mutex);
++
++	args.count = mutex->u.mutex.count;
++	args.owner = mutex->u.mutex.owner;
++	ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
++
++	ntsync_unlock_obj(dev, mutex, all);
++
++	if (copy_to_user(user_args, &args, sizeof(args)))
++		return -EFAULT;
++	return ret;
++}
++
++static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
++{
++	struct ntsync_event_args __user *user_args = argp;
++	struct ntsync_device *dev = event->dev;
++	struct ntsync_event_args args;
++	bool all;
++
++	if (event->type != NTSYNC_TYPE_EVENT)
++		return -EINVAL;
++
++	args.event = 0;
++
++	all = ntsync_lock_obj(dev, event);
++
++	args.manual = event->u.event.manual;
++	args.signaled = event->u.event.signaled;
++
++	ntsync_unlock_obj(dev, event, all);
++
++	if (copy_to_user(user_args, &args, sizeof(args)))
++		return -EFAULT;
++	return 0;
++}
++
+ static int ntsync_obj_release(struct inode *inode, struct file *file)
+ {
+ 	struct ntsync_obj *obj = file->private_data;
+@@ -116,6 +676,22 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
+ 	switch (cmd) {
+ 	case NTSYNC_IOC_SEM_POST:
+ 		return ntsync_sem_post(obj, argp);
++	case NTSYNC_IOC_SEM_READ:
++		return ntsync_sem_read(obj, argp);
++	case NTSYNC_IOC_MUTEX_UNLOCK:
++		return ntsync_mutex_unlock(obj, argp);
++	case NTSYNC_IOC_MUTEX_KILL:
++		return ntsync_mutex_kill(obj, argp);
++	case NTSYNC_IOC_MUTEX_READ:
++		return ntsync_mutex_read(obj, argp);
++	case NTSYNC_IOC_EVENT_SET:
++		return ntsync_event_set(obj, argp, false);
++	case NTSYNC_IOC_EVENT_RESET:
++		return ntsync_event_reset(obj, argp);
++	case NTSYNC_IOC_EVENT_PULSE:
++		return ntsync_event_set(obj, argp, true);
++	case NTSYNC_IOC_EVENT_READ:
++		return ntsync_event_read(obj, argp);
+ 	default:
+ 		return -ENOIOCTLCMD;
+ 	}
+@@ -141,6 +717,9 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
+ 	obj->dev = dev;
+ 	get_file(dev->file);
+ 	spin_lock_init(&obj->lock);
++	INIT_LIST_HEAD(&obj->any_waiters);
++	INIT_LIST_HEAD(&obj->all_waiters);
++	atomic_set(&obj->all_hint, 0);
+ 
+ 	return obj;
+ }
+@@ -191,6 +770,400 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
+ 	return put_user(fd, &user_args->sem);
+ }
+ 
++static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
++{
++	struct ntsync_mutex_args __user *user_args = argp;
++	struct ntsync_mutex_args args;
++	struct ntsync_obj *mutex;
++	int fd;
++
++	if (copy_from_user(&args, argp, sizeof(args)))
++		return -EFAULT;
++
++	if (!args.owner != !args.count)
++		return -EINVAL;
++
++	mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX);
++	if (!mutex)
++		return -ENOMEM;
++	mutex->u.mutex.count = args.count;
++	mutex->u.mutex.owner = args.owner;
++	fd = ntsync_obj_get_fd(mutex);
++	if (fd < 0) {
++		kfree(mutex);
++		return fd;
++	}
++
++	return put_user(fd, &user_args->mutex);
++}
++
++static int ntsync_create_event(struct ntsync_device *dev, void __user *argp)
++{
++	struct ntsync_event_args __user *user_args = argp;
++	struct ntsync_event_args args;
++	struct ntsync_obj *event;
++	int fd;
++
++	if (copy_from_user(&args, argp, sizeof(args)))
++		return -EFAULT;
++
++	event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT);
++	if (!event)
++		return -ENOMEM;
++	event->u.event.manual = args.manual;
++	event->u.event.signaled = args.signaled;
++	fd = ntsync_obj_get_fd(event);
++	if (fd < 0) {
++		kfree(event);
++		return fd;
++	}
++
++	return put_user(fd, &user_args->event);
++}
++
++static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
++{
++	struct file *file = fget(fd);
++	struct ntsync_obj *obj;
++
++	if (!file)
++		return NULL;
++
++	if (file->f_op != &ntsync_obj_fops) {
++		fput(file);
++		return NULL;
++	}
++
++	obj = file->private_data;
++	if (obj->dev != dev) {
++		fput(file);
++		return NULL;
++	}
++
++	return obj;
++}
++
++static void put_obj(struct ntsync_obj *obj)
++{
++	fput(obj->file);
++}
++
++static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
++{
++	ktime_t timeout = ns_to_ktime(args->timeout);
++	clockid_t clock = CLOCK_MONOTONIC;
++	ktime_t *timeout_ptr;
++	int ret = 0;
++
++	timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
++
++	if (args->flags & NTSYNC_WAIT_REALTIME)
++		clock = CLOCK_REALTIME;
++
++	do {
++		if (signal_pending(current)) {
++			ret = -ERESTARTSYS;
++			break;
++		}
++
++		set_current_state(TASK_INTERRUPTIBLE);
++		if (atomic_read(&q->signaled) != -1) {
++			ret = 0;
++			break;
++		}
++		ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock);
++	} while (ret < 0);
++	__set_current_state(TASK_RUNNING);
++
++	return ret;
++}
++
++/*
++ * Allocate and initialize the ntsync_q structure, but do not queue us yet.
++ */
++static int setup_wait(struct ntsync_device *dev,
++		      const struct ntsync_wait_args *args, bool all,
++		      struct ntsync_q **ret_q)
++{
++	int fds[NTSYNC_MAX_WAIT_COUNT + 1];
++	const __u32 count = args->count;
++	struct ntsync_q *q;
++	__u32 total_count;
++	__u32 i, j;
++
++	if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME))
++		return -EINVAL;
++
++	if (args->count > NTSYNC_MAX_WAIT_COUNT)
++		return -EINVAL;
++
++	total_count = count;
++	if (args->alert)
++		total_count++;
++
++	if (copy_from_user(fds, u64_to_user_ptr(args->objs),
++			   array_size(count, sizeof(*fds))))
++		return -EFAULT;
++	if (args->alert)
++		fds[count] = args->alert;
++
++	q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL);
++	if (!q)
++		return -ENOMEM;
++	q->task = current;
++	q->owner = args->owner;
++	atomic_set(&q->signaled, -1);
++	q->all = all;
++	q->ownerdead = false;
++	q->count = count;
++
++	for (i = 0; i < total_count; i++) {
++		struct ntsync_q_entry *entry = &q->entries[i];
++		struct ntsync_obj *obj = get_obj(dev, fds[i]);
++
++		if (!obj)
++			goto err;
++
++		if (all) {
++			/* Check that the objects are all distinct. */
++			for (j = 0; j < i; j++) {
++				if (obj == q->entries[j].obj) {
++					put_obj(obj);
++					goto err;
++				}
++			}
++		}
++
++		entry->obj = obj;
++		entry->q = q;
++		entry->index = i;
++	}
++
++	*ret_q = q;
++	return 0;
++
++err:
++	for (j = 0; j < i; j++)
++		put_obj(q->entries[j].obj);
++	kfree(q);
++	return -EINVAL;
++}
++
++static void try_wake_any_obj(struct ntsync_obj *obj)
++{
++	switch (obj->type) {
++	case NTSYNC_TYPE_SEM:
++		try_wake_any_sem(obj);
++		break;
++	case NTSYNC_TYPE_MUTEX:
++		try_wake_any_mutex(obj);
++		break;
++	case NTSYNC_TYPE_EVENT:
++		try_wake_any_event(obj);
++		break;
++	}
++}
++
++static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
++{
++	struct ntsync_wait_args args;
++	__u32 i, total_count;
++	struct ntsync_q *q;
++	int signaled;
++	bool all;
++	int ret;
++
++	if (copy_from_user(&args, argp, sizeof(args)))
++		return -EFAULT;
++
++	ret = setup_wait(dev, &args, false, &q);
++	if (ret < 0)
++		return ret;
++
++	total_count = args.count;
++	if (args.alert)
++		total_count++;
++
++	/* queue ourselves */
++
++	for (i = 0; i < total_count; i++) {
++		struct ntsync_q_entry *entry = &q->entries[i];
++		struct ntsync_obj *obj = entry->obj;
++
++		all = ntsync_lock_obj(dev, obj);
++		list_add_tail(&entry->node, &obj->any_waiters);
++		ntsync_unlock_obj(dev, obj, all);
++	}
++
++	/*
++	 * Check if we are already signaled.
++	 *
++	 * Note that the API requires that normal objects are checked before
++	 * the alert event. Hence we queue the alert event last, and check
++	 * objects in order.
++	 */
++
++	for (i = 0; i < total_count; i++) {
++		struct ntsync_obj *obj = q->entries[i].obj;
++
++		if (atomic_read(&q->signaled) != -1)
++			break;
++
++		all = ntsync_lock_obj(dev, obj);
++		try_wake_any_obj(obj);
++		ntsync_unlock_obj(dev, obj, all);
++	}
++
++	/* sleep */
++
++	ret = ntsync_schedule(q, &args);
++
++	/* and finally, unqueue */
++
++	for (i = 0; i < total_count; i++) {
++		struct ntsync_q_entry *entry = &q->entries[i];
++		struct ntsync_obj *obj = entry->obj;
++
++		all = ntsync_lock_obj(dev, obj);
++		list_del(&entry->node);
++		ntsync_unlock_obj(dev, obj, all);
++
++		put_obj(obj);
++	}
++
++	signaled = atomic_read(&q->signaled);
++	if (signaled != -1) {
++		struct ntsync_wait_args __user *user_args = argp;
++
++		/* even if we caught a signal, we need to communicate success */
++		ret = q->ownerdead ? -EOWNERDEAD : 0;
++
++		if (put_user(signaled, &user_args->index))
++			ret = -EFAULT;
++	} else if (!ret) {
++		ret = -ETIMEDOUT;
++	}
++
++	kfree(q);
++	return ret;
++}
++
++static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
++{
++	struct ntsync_wait_args args;
++	struct ntsync_q *q;
++	int signaled;
++	__u32 i;
++	int ret;
++
++	if (copy_from_user(&args, argp, sizeof(args)))
++		return -EFAULT;
++
++	ret = setup_wait(dev, &args, true, &q);
++	if (ret < 0)
++		return ret;
++
++	/* queue ourselves */
++
++	mutex_lock(&dev->wait_all_lock);
++
++	for (i = 0; i < args.count; i++) {
++		struct ntsync_q_entry *entry = &q->entries[i];
++		struct ntsync_obj *obj = entry->obj;
++
++		atomic_inc(&obj->all_hint);
++
++		/*
++		 * obj->all_waiters is protected by dev->wait_all_lock rather
++		 * than obj->lock, so there is no need to acquire obj->lock
++		 * here.
++		 */
++		list_add_tail(&entry->node, &obj->all_waiters);
++	}
++	if (args.alert) {
++		struct ntsync_q_entry *entry = &q->entries[args.count];
++		struct ntsync_obj *obj = entry->obj;
++
++		dev_lock_obj(dev, obj);
++		list_add_tail(&entry->node, &obj->any_waiters);
++		dev_unlock_obj(dev, obj);
++	}
++
++	/* check if we are already signaled */
++
++	try_wake_all(dev, q, NULL);
++
++	mutex_unlock(&dev->wait_all_lock);
++
++	/*
++	 * Check if the alert event is signaled, making sure to do so only
++	 * after checking if the other objects are signaled.
++	 */
++
++	if (args.alert) {
++		struct ntsync_obj *obj = q->entries[args.count].obj;
++
++		if (atomic_read(&q->signaled) == -1) {
++			bool all = ntsync_lock_obj(dev, obj);
++			try_wake_any_obj(obj);
++			ntsync_unlock_obj(dev, obj, all);
++		}
++	}
++
++	/* sleep */
++
++	ret = ntsync_schedule(q, &args);
++
++	/* and finally, unqueue */
++
++	mutex_lock(&dev->wait_all_lock);
++
++	for (i = 0; i < args.count; i++) {
++		struct ntsync_q_entry *entry = &q->entries[i];
++		struct ntsync_obj *obj = entry->obj;
++
++		/*
++		 * obj->all_waiters is protected by dev->wait_all_lock rather
++		 * than obj->lock, so there is no need to acquire it here.
++		 */
++		list_del(&entry->node);
++
++		atomic_dec(&obj->all_hint);
++
++		put_obj(obj);
++	}
++
++	mutex_unlock(&dev->wait_all_lock);
++
++	if (args.alert) {
++		struct ntsync_q_entry *entry = &q->entries[args.count];
++		struct ntsync_obj *obj = entry->obj;
++		bool all;
++
++		all = ntsync_lock_obj(dev, obj);
++		list_del(&entry->node);
++		ntsync_unlock_obj(dev, obj, all);
++
++		put_obj(obj);
++	}
++
++	signaled = atomic_read(&q->signaled);
++	if (signaled != -1) {
++		struct ntsync_wait_args __user *user_args = argp;
++
++		/* even if we caught a signal, we need to communicate success */
++		ret = q->ownerdead ? -EOWNERDEAD : 0;
++
++		if (put_user(signaled, &user_args->index))
++			ret = -EFAULT;
++	} else if (!ret) {
++		ret = -ETIMEDOUT;
++	}
++
++	kfree(q);
++	return ret;
++}
++
+ static int ntsync_char_open(struct inode *inode, struct file *file)
+ {
+ 	struct ntsync_device *dev;
+@@ -199,6 +1172,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file)
+ 	if (!dev)
+ 		return -ENOMEM;
+ 
++	mutex_init(&dev->wait_all_lock);
++
+ 	file->private_data = dev;
+ 	dev->file = file;
+ 	return nonseekable_open(inode, file);
+@@ -220,8 +1195,16 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
+ 	void __user *argp = (void __user *)parm;
+ 
+ 	switch (cmd) {
++	case NTSYNC_IOC_CREATE_EVENT:
++		return ntsync_create_event(dev, argp);
++	case NTSYNC_IOC_CREATE_MUTEX:
++		return ntsync_create_mutex(dev, argp);
+ 	case NTSYNC_IOC_CREATE_SEM:
+ 		return ntsync_create_sem(dev, argp);
++	case NTSYNC_IOC_WAIT_ALL:
++		return ntsync_wait_all(dev, argp);
++	case NTSYNC_IOC_WAIT_ANY:
++		return ntsync_wait_any(dev, argp);
+ 	default:
+ 		return -ENOIOCTLCMD;
+ 	}
+diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
+index dcfa38fdc93c..4a8095a3fc34 100644
+--- a/include/uapi/linux/ntsync.h
++++ b/include/uapi/linux/ntsync.h
+@@ -16,8 +16,47 @@ struct ntsync_sem_args {
+ 	__u32 max;
+ };
+ 
++struct ntsync_mutex_args {
++	__u32 mutex;
++	__u32 owner;
++	__u32 count;
++};
++
++struct ntsync_event_args {
++	__u32 event;
++	__u32 manual;
++	__u32 signaled;
++};
++
++#define NTSYNC_WAIT_REALTIME	0x1
++
++struct ntsync_wait_args {
++	__u64 timeout;
++	__u64 objs;
++	__u32 count;
++	__u32 index;
++	__u32 flags;
++	__u32 owner;
++	__u32 alert;
++	__u32 pad;
++};
++
++#define NTSYNC_MAX_WAIT_COUNT 64
++
+ #define NTSYNC_IOC_CREATE_SEM		_IOWR('N', 0x80, struct ntsync_sem_args)
++#define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
++#define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
++#define NTSYNC_IOC_CREATE_MUTEX		_IOWR('N', 0x84, struct ntsync_sem_args)
++#define NTSYNC_IOC_CREATE_EVENT		_IOWR('N', 0x87, struct ntsync_event_args)
+ 
+ #define NTSYNC_IOC_SEM_POST		_IOWR('N', 0x81, __u32)
++#define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
++#define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
++#define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
++#define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
++#define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
++#define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
++#define NTSYNC_IOC_MUTEX_READ		_IOR ('N', 0x8c, struct ntsync_mutex_args)
++#define NTSYNC_IOC_EVENT_READ		_IOR ('N', 0x8d, struct ntsync_event_args)
+ 
+ #endif
+diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
+index bc8fe9e8f7f2..b1296bd8eb3f 100644
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -17,6 +17,7 @@ TARGETS += devices/error_logs
+ TARGETS += devices/probe
+ TARGETS += dmabuf-heaps
+ TARGETS += drivers/dma-buf
++TARGETS += drivers/ntsync
+ TARGETS += drivers/s390x/uvdevice
+ TARGETS += drivers/net
+ TARGETS += drivers/net/bonding
+diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
+new file mode 100644
+index 000000000000..848573a3d3ea
+--- /dev/null
++++ b/tools/testing/selftests/drivers/ntsync/.gitignore
+@@ -0,0 +1 @@
++ntsync
+diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
+new file mode 100644
+index 000000000000..dbf2b055c0b2
+--- /dev/null
++++ b/tools/testing/selftests/drivers/ntsync/Makefile
+@@ -0,0 +1,7 @@
++# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
++TEST_GEN_PROGS := ntsync
++
++CFLAGS += $(KHDR_INCLUDES)
++LDLIBS += -lpthread
++
++include ../../lib.mk
+diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
+new file mode 100644
+index 000000000000..60539c826d06
+--- /dev/null
++++ b/tools/testing/selftests/drivers/ntsync/config
+@@ -0,0 +1 @@
++CONFIG_WINESYNC=y
+diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
+new file mode 100644
+index 000000000000..5fa2c9a0768c
+--- /dev/null
++++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
+@@ -0,0 +1,1407 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * Various unit tests for the "ntsync" synchronization primitive driver.
++ *
++ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
++ */
++
++#define _GNU_SOURCE
++#include <sys/ioctl.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++#include <time.h>
++#include <pthread.h>
++#include <linux/ntsync.h>
++#include "../../kselftest_harness.h"
++
++static int read_sem_state(int sem, __u32 *count, __u32 *max)
++{
++	struct ntsync_sem_args args;
++	int ret;
++
++	memset(&args, 0xcc, sizeof(args));
++	ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
++	*count = args.count;
++	*max = args.max;
++	return ret;
++}
++
++#define check_sem_state(sem, count, max) \
++	({ \
++		__u32 __count, __max; \
++		int ret = read_sem_state((sem), &__count, &__max); \
++		EXPECT_EQ(0, ret); \
++		EXPECT_EQ((count), __count); \
++		EXPECT_EQ((max), __max); \
++	})
++
++static int post_sem(int sem, __u32 *count)
++{
++	return ioctl(sem, NTSYNC_IOC_SEM_POST, count);
++}
++
++static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
++{
++	struct ntsync_mutex_args args;
++	int ret;
++
++	memset(&args, 0xcc, sizeof(args));
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
++	*count = args.count;
++	*owner = args.owner;
++	return ret;
++}
++
++#define check_mutex_state(mutex, count, owner) \
++	({ \
++		__u32 __count, __owner; \
++		int ret = read_mutex_state((mutex), &__count, &__owner); \
++		EXPECT_EQ(0, ret); \
++		EXPECT_EQ((count), __count); \
++		EXPECT_EQ((owner), __owner); \
++	})
++
++static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
++{
++	struct ntsync_mutex_args args;
++	int ret;
++
++	args.owner = owner;
++	args.count = 0xdeadbeef;
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
++	*count = args.count;
++	return ret;
++}
++
++static int read_event_state(int event, __u32 *signaled, __u32 *manual)
++{
++	struct ntsync_event_args args;
++	int ret;
++
++	memset(&args, 0xcc, sizeof(args));
++	ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
++	*signaled = args.signaled;
++	*manual = args.manual;
++	return ret;
++}
++
++#define check_event_state(event, signaled, manual) \
++	({ \
++		__u32 __signaled, __manual; \
++		int ret = read_event_state((event), &__signaled, &__manual); \
++		EXPECT_EQ(0, ret); \
++		EXPECT_EQ((signaled), __signaled); \
++		EXPECT_EQ((manual), __manual); \
++	})
++
++static int wait_objs(int fd, unsigned long request, __u32 count,
++		     const int *objs, __u32 owner, int alert, __u32 *index)
++{
++	struct ntsync_wait_args args = {0};
++	struct timespec timeout;
++	int ret;
++
++	clock_gettime(CLOCK_MONOTONIC, &timeout);
++
++	args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
++	args.count = count;
++	args.objs = (uintptr_t)objs;
++	args.owner = owner;
++	args.index = 0xdeadbeef;
++	args.alert = alert;
++	ret = ioctl(fd, request, &args);
++	*index = args.index;
++	return ret;
++}
++
++static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
++{
++	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
++}
++
++static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
++{
++	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
++}
++
++static int wait_any_alert(int fd, __u32 count, const int *objs,
++			  __u32 owner, int alert, __u32 *index)
++{
++	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
++			 count, objs, owner, alert, index);
++}
++
++static int wait_all_alert(int fd, __u32 count, const int *objs,
++			  __u32 owner, int alert, __u32 *index)
++{
++	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
++			 count, objs, owner, alert, index);
++}
++
++TEST(semaphore_state)
++{
++	struct ntsync_sem_args sem_args;
++	struct timespec timeout;
++	__u32 count, index;
++	int fd, ret, sem;
++
++	clock_gettime(CLOCK_MONOTONIC, &timeout);
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 3;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	sem_args.count = 2;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++	sem = sem_args.sem;
++	check_sem_state(sem, 2, 2);
++
++	count = 0;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, count);
++	check_sem_state(sem, 2, 2);
++
++	count = 1;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOVERFLOW, errno);
++	check_sem_state(sem, 2, 2);
++
++	ret = wait_any(fd, 1, &sem, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem, 1, 2);
++
++	ret = wait_any(fd, 1, &sem, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem, 0, 2);
++
++	ret = wait_any(fd, 1, &sem, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	count = 3;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOVERFLOW, errno);
++	check_sem_state(sem, 0, 2);
++
++	count = 2;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++	check_sem_state(sem, 2, 2);
++
++	ret = wait_any(fd, 1, &sem, 123, &index);
++	EXPECT_EQ(0, ret);
++	ret = wait_any(fd, 1, &sem, 123, &index);
++	EXPECT_EQ(0, ret);
++
++	count = 1;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++	check_sem_state(sem, 1, 2);
++
++	count = ~0u;
++	ret = post_sem(sem, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOVERFLOW, errno);
++	check_sem_state(sem, 1, 2);
++
++	close(sem);
++
++	close(fd);
++}
++
++TEST(mutex_state)
++{
++	struct ntsync_mutex_args mutex_args;
++	__u32 owner, count, index;
++	struct timespec timeout;
++	int fd, ret, mutex;
++
++	clock_gettime(CLOCK_MONOTONIC, &timeout);
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	mutex_args.owner = 123;
++	mutex_args.count = 0;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	mutex_args.owner = 0;
++	mutex_args.count = 2;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	mutex_args.owner = 123;
++	mutex_args.count = 2;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++	mutex = mutex_args.mutex;
++	check_mutex_state(mutex, 2, 123);
++
++	ret = unlock_mutex(mutex, 0, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	ret = unlock_mutex(mutex, 456, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EPERM, errno);
++	check_mutex_state(mutex, 2, 123);
++
++	ret = unlock_mutex(mutex, 123, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, count);
++	check_mutex_state(mutex, 1, 123);
++
++	ret = unlock_mutex(mutex, 123, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, count);
++	check_mutex_state(mutex, 0, 0);
++
++	ret = unlock_mutex(mutex, 123, &count);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EPERM, errno);
++
++	ret = wait_any(fd, 1, &mutex, 456, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_mutex_state(mutex, 1, 456);
++
++	ret = wait_any(fd, 1, &mutex, 456, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_mutex_state(mutex, 2, 456);
++
++	ret = unlock_mutex(mutex, 456, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, count);
++	check_mutex_state(mutex, 1, 456);
++
++	ret = wait_any(fd, 1, &mutex, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	owner = 0;
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	owner = 123;
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EPERM, errno);
++	check_mutex_state(mutex, 1, 456);
++
++	owner = 456;
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(0, ret);
++
++	memset(&mutex_args, 0xcc, sizeof(mutex_args));
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(0, mutex_args.count);
++	EXPECT_EQ(0, mutex_args.owner);
++
++	memset(&mutex_args, 0xcc, sizeof(mutex_args));
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(0, mutex_args.count);
++	EXPECT_EQ(0, mutex_args.owner);
++
++	ret = wait_any(fd, 1, &mutex, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(0, index);
++	check_mutex_state(mutex, 1, 123);
++
++	owner = 123;
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(0, ret);
++
++	memset(&mutex_args, 0xcc, sizeof(mutex_args));
++	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(0, mutex_args.count);
++	EXPECT_EQ(0, mutex_args.owner);
++
++	ret = wait_any(fd, 1, &mutex, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(0, index);
++	check_mutex_state(mutex, 1, 123);
++
++	close(mutex);
++
++	mutex_args.owner = 0;
++	mutex_args.count = 0;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++	mutex = mutex_args.mutex;
++	check_mutex_state(mutex, 0, 0);
++
++	ret = wait_any(fd, 1, &mutex, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_mutex_state(mutex, 1, 123);
++
++	close(mutex);
++
++	mutex_args.owner = 123;
++	mutex_args.count = ~0u;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++	mutex = mutex_args.mutex;
++	check_mutex_state(mutex, ~0u, 123);
++
++	ret = wait_any(fd, 1, &mutex, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	close(mutex);
++
++	close(fd);
++}
++
++TEST(manual_event_state)
++{
++	struct ntsync_event_args event_args;
++	__u32 index, signaled;
++	int fd, event, ret;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	event_args.manual = 1;
++	event_args.signaled = 0;
++	event_args.event = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, event_args.event);
++	event = event_args.event;
++	check_event_state(event, 0, 1);
++
++	signaled = 0xdeadbeef;
++	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event, 1, 1);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++	check_event_state(event, 1, 1);
++
++	ret = wait_any(fd, 1, &event, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_event_state(event, 1, 1);
++
++	signaled = 0xdeadbeef;
++	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++	check_event_state(event, 0, 1);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event, 0, 1);
++
++	ret = wait_any(fd, 1, &event, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++	check_event_state(event, 0, 1);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event, 0, 1);
++
++	close(event);
++
++	close(fd);
++}
++
++TEST(auto_event_state)
++{
++	struct ntsync_event_args event_args;
++	__u32 index, signaled;
++	int fd, event, ret;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	event_args.manual = 0;
++	event_args.signaled = 1;
++	event_args.event = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, event_args.event);
++	event = event_args.event;
++
++	check_event_state(event, 1, 0);
++
++	signaled = 0xdeadbeef;
++	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++	check_event_state(event, 1, 0);
++
++	ret = wait_any(fd, 1, &event, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_event_state(event, 0, 0);
++
++	signaled = 0xdeadbeef;
++	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event, 0, 0);
++
++	ret = wait_any(fd, 1, &event, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++	check_event_state(event, 0, 0);
++
++	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event, 0, 0);
++
++	close(event);
++
++	close(fd);
++}
++
++TEST(test_wait_any)
++{
++	int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
++	struct ntsync_mutex_args mutex_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	__u32 owner, index, count, i;
++	struct timespec timeout;
++
++	clock_gettime(CLOCK_MONOTONIC, &timeout);
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 2;
++	sem_args.max = 3;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++
++	mutex_args.owner = 0;
++	mutex_args.count = 0;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++
++	objs[0] = sem_args.sem;
++	objs[1] = mutex_args.mutex;
++
++	ret = wait_any(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 1, 3);
++	check_mutex_state(mutex_args.mutex, 0, 0);
++
++	ret = wait_any(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 0, 0);
++
++	ret = wait_any(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 1, 123);
++
++	count = 1;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++
++	ret = wait_any(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 1, 123);
++
++	ret = wait_any(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 2, 123);
++
++	ret = wait_any(fd, 2, objs, 456, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	owner = 123;
++	ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_any(fd, 2, objs, 456, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	EXPECT_EQ(1, index);
++
++	ret = wait_any(fd, 2, objs, 456, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, index);
++
++	/* test waiting on the same object twice */
++	count = 2;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++
++	objs[0] = objs[1] = sem_args.sem;
++	ret = wait_any(fd, 2, objs, 456, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 1, 3);
++
++	ret = wait_any(fd, 0, NULL, 456, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	for (i = 0; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
++		objs[i] = sem_args.sem;
++
++	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	ret = wait_any(fd, -1, objs, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	close(sem_args.sem);
++	close(mutex_args.mutex);
++
++	close(fd);
++}
++
++TEST(test_wait_all)
++{
++	struct ntsync_event_args event_args = {0};
++	struct ntsync_mutex_args mutex_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	__u32 owner, index, count;
++	int objs[2], fd, ret;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 2;
++	sem_args.max = 3;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++
++	mutex_args.owner = 0;
++	mutex_args.count = 0;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++
++	event_args.manual = true;
++	event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	objs[0] = sem_args.sem;
++	objs[1] = mutex_args.mutex;
++
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 1, 3);
++	check_mutex_state(mutex_args.mutex, 1, 123);
++
++	ret = wait_all(fd, 2, objs, 456, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++	check_sem_state(sem_args.sem, 1, 3);
++	check_mutex_state(mutex_args.mutex, 1, 123);
++
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 2, 123);
++
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_mutex_state(mutex_args.mutex, 2, 123);
++
++	count = 3;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 2, 3);
++	check_mutex_state(mutex_args.mutex, 3, 123);
++
++	owner = 123;
++	ret = ioctl(mutex_args.mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EOWNERDEAD, errno);
++	check_sem_state(sem_args.sem, 1, 3);
++	check_mutex_state(mutex_args.mutex, 1, 123);
++
++	objs[0] = sem_args.sem;
++	objs[1] = event_args.event;
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++	check_sem_state(sem_args.sem, 0, 3);
++	check_event_state(event_args.event, 1, 1);
++
++	/* test waiting on the same object twice */
++	objs[0] = objs[1] = sem_args.sem;
++	ret = wait_all(fd, 2, objs, 123, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(EINVAL, errno);
++
++	close(sem_args.sem);
++	close(mutex_args.mutex);
++	close(event_args.event);
++
++	close(fd);
++}
++
++struct wake_args {
++	int fd;
++	int obj;
++};
++
++struct wait_args {
++	int fd;
++	unsigned long request;
++	struct ntsync_wait_args *args;
++	int ret;
++	int err;
++};
++
++static void *wait_thread(void *arg)
++{
++	struct wait_args *args = arg;
++
++	args->ret = ioctl(args->fd, args->request, args->args);
++	args->err = errno;
++	return NULL;
++}
++
++static __u64 get_abs_timeout(unsigned int ms)
++{
++	struct timespec timeout;
++	clock_gettime(CLOCK_MONOTONIC, &timeout);
++	return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
++}
++
++static int wait_for_thread(pthread_t thread, unsigned int ms)
++{
++	struct timespec timeout;
++
++	clock_gettime(CLOCK_REALTIME, &timeout);
++	timeout.tv_nsec += ms * 1000000;
++	timeout.tv_sec += (timeout.tv_nsec / 1000000000);
++	timeout.tv_nsec %= 1000000000;
++	return pthread_timedjoin_np(thread, NULL, &timeout);
++}
++
++TEST(wake_any)
++{
++	struct ntsync_event_args event_args = {0};
++	struct ntsync_mutex_args mutex_args = {0};
++	struct ntsync_wait_args wait_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	struct wait_args thread_args;
++	__u32 count, index, signaled;
++	int objs[2], fd, ret;
++	pthread_t thread;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 0;
++	sem_args.max = 3;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++
++	mutex_args.owner = 123;
++	mutex_args.count = 1;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++
++	objs[0] = sem_args.sem;
++	objs[1] = mutex_args.mutex;
++
++	/* test waking the semaphore */
++
++	wait_args.timeout = get_abs_timeout(1000);
++	wait_args.objs = (uintptr_t)objs;
++	wait_args.count = 2;
++	wait_args.owner = 456;
++	wait_args.index = 0xdeadbeef;
++	thread_args.fd = fd;
++	thread_args.args = &wait_args;
++	thread_args.request = NTSYNC_IOC_WAIT_ANY;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	count = 1;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++	check_sem_state(sem_args.sem, 0, 3);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(0, wait_args.index);
++
++	/* test waking the mutex */
++
++	/* first grab it again for owner 123 */
++	ret = wait_any(fd, 1, &mutex_args.mutex, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	wait_args.timeout = get_abs_timeout(1000);
++	wait_args.owner = 456;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = unlock_mutex(mutex_args.mutex, 123, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, count);
++
++	ret = pthread_tryjoin_np(thread, NULL);
++	EXPECT_EQ(EBUSY, ret);
++
++	ret = unlock_mutex(mutex_args.mutex, 123, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, mutex_args.count);
++	check_mutex_state(mutex_args.mutex, 1, 456);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(1, wait_args.index);
++
++	/* test waking events */
++
++	event_args.manual = false;
++	event_args.signaled = false;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	objs[1] = event_args.event;
++	wait_args.timeout = get_abs_timeout(1000);
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event_args.event, 0, 0);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(1, wait_args.index);
++
++	wait_args.timeout = get_abs_timeout(1000);
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event_args.event, 0, 0);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(1, wait_args.index);
++
++	close(event_args.event);
++
++	event_args.manual = true;
++	event_args.signaled = false;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	objs[1] = event_args.event;
++	wait_args.timeout = get_abs_timeout(1000);
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event_args.event, 1, 1);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(1, wait_args.index);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++
++	wait_args.timeout = get_abs_timeout(1000);
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_PULSE, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++	check_event_state(event_args.event, 0, 1);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(1, wait_args.index);
++
++	close(event_args.event);
++
++	/* delete an object while it's being waited on */
++
++	wait_args.timeout = get_abs_timeout(200);
++	wait_args.owner = 123;
++	objs[1] = mutex_args.mutex;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	close(sem_args.sem);
++	close(mutex_args.mutex);
++
++	ret = wait_for_thread(thread, 200);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(-1, thread_args.ret);
++	EXPECT_EQ(ETIMEDOUT, thread_args.err);
++
++	close(fd);
++}
++
++TEST(wake_all)
++{
++	struct ntsync_event_args manual_event_args = {0};
++	struct ntsync_event_args auto_event_args = {0};
++	struct ntsync_mutex_args mutex_args = {0};
++	struct ntsync_wait_args wait_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	struct wait_args thread_args;
++	__u32 count, index, signaled;
++	int objs[4], fd, ret;
++	pthread_t thread;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 0;
++	sem_args.max = 3;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++
++	mutex_args.owner = 123;
++	mutex_args.count = 1;
++	mutex_args.mutex = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, mutex_args.mutex);
++
++	manual_event_args.manual = true;
++	manual_event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
++	EXPECT_EQ(0, ret);
++
++	auto_event_args.manual = false;
++	auto_event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
++	EXPECT_EQ(0, ret);
++
++	objs[0] = sem_args.sem;
++	objs[1] = mutex_args.mutex;
++	objs[2] = manual_event_args.event;
++	objs[3] = auto_event_args.event;
++
++	wait_args.timeout = get_abs_timeout(1000);
++	wait_args.objs = (uintptr_t)objs;
++	wait_args.count = 4;
++	wait_args.owner = 456;
++	thread_args.fd = fd;
++	thread_args.args = &wait_args;
++	thread_args.request = NTSYNC_IOC_WAIT_ALL;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	count = 1;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++
++	ret = pthread_tryjoin_np(thread, NULL);
++	EXPECT_EQ(EBUSY, ret);
++
++	check_sem_state(sem_args.sem, 1, 3);
++
++	ret = wait_any(fd, 1, &sem_args.sem, 123, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = unlock_mutex(mutex_args.mutex, 123, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, count);
++
++	ret = pthread_tryjoin_np(thread, NULL);
++	EXPECT_EQ(EBUSY, ret);
++
++	check_mutex_state(mutex_args.mutex, 0, 0);
++
++	ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++
++	count = 2;
++	ret = post_sem(sem_args.sem, &count);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, count);
++	check_sem_state(sem_args.sem, 2, 3);
++
++	ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, signaled);
++
++	ret = ioctl(manual_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++
++	ret = ioctl(auto_event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, signaled);
++
++	check_sem_state(sem_args.sem, 1, 3);
++	check_mutex_state(mutex_args.mutex, 1, 456);
++	check_event_state(manual_event_args.event, 1, 1);
++	check_event_state(auto_event_args.event, 0, 0);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++
++	/* delete an object while it's being waited on */
++
++	wait_args.timeout = get_abs_timeout(200);
++	wait_args.owner = 123;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	close(sem_args.sem);
++	close(mutex_args.mutex);
++	close(manual_event_args.event);
++	close(auto_event_args.event);
++
++	ret = wait_for_thread(thread, 200);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(-1, thread_args.ret);
++	EXPECT_EQ(ETIMEDOUT, thread_args.err);
++
++	close(fd);
++}
++
++TEST(alert_any)
++{
++	struct ntsync_event_args event_args = {0};
++	struct ntsync_wait_args wait_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	__u32 index, count, signaled;
++	struct wait_args thread_args;
++	int objs[2], fd, ret;
++	pthread_t thread;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 0;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++	objs[0] = sem_args.sem;
++
++	sem_args.count = 1;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++	objs[1] = sem_args.sem;
++
++	event_args.manual = true;
++	event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_any_alert(fd, 0, NULL, 123, event_args.event, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(1, index);
++
++	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, index);
++
++	/* test wakeup via alert */
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	wait_args.timeout = get_abs_timeout(1000);
++	wait_args.objs = (uintptr_t)objs;
++	wait_args.count = 2;
++	wait_args.owner = 123;
++	wait_args.index = 0xdeadbeef;
++	wait_args.alert = event_args.event;
++	thread_args.fd = fd;
++	thread_args.args = &wait_args;
++	thread_args.request = NTSYNC_IOC_WAIT_ANY;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(2, wait_args.index);
++
++	close(event_args.event);
++
++	/* test with an auto-reset event */
++
++	event_args.manual = false;
++	event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	count = 1;
++	ret = post_sem(objs[0], &count);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, index);
++
++	ret = wait_any_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	close(event_args.event);
++
++	close(objs[0]);
++	close(objs[1]);
++
++	close(fd);
++}
++
++TEST(alert_all)
++{
++	struct ntsync_event_args event_args = {0};
++	struct ntsync_wait_args wait_args = {0};
++	struct ntsync_sem_args sem_args = {0};
++	struct wait_args thread_args;
++	__u32 index, count, signaled;
++	int objs[2], fd, ret;
++	pthread_t thread;
++
++	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, fd);
++
++	sem_args.count = 2;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++	objs[0] = sem_args.sem;
++
++	sem_args.count = 1;
++	sem_args.max = 2;
++	sem_args.sem = 0xdeadbeef;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
++	EXPECT_EQ(0, ret);
++	EXPECT_NE(0xdeadbeef, sem_args.sem);
++	objs[1] = sem_args.sem;
++
++	event_args.manual = true;
++	event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, index);
++
++	/* test wakeup via alert */
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_RESET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	wait_args.timeout = get_abs_timeout(1000);
++	wait_args.objs = (uintptr_t)objs;
++	wait_args.count = 2;
++	wait_args.owner = 123;
++	wait_args.index = 0xdeadbeef;
++	wait_args.alert = event_args.event;
++	thread_args.fd = fd;
++	thread_args.args = &wait_args;
++	thread_args.request = NTSYNC_IOC_WAIT_ALL;
++	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(ETIMEDOUT, ret);
++
++	ret = ioctl(event_args.event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_for_thread(thread, 100);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, thread_args.ret);
++	EXPECT_EQ(2, wait_args.index);
++
++	close(event_args.event);
++
++	/* test with an auto-reset event */
++
++	event_args.manual = false;
++	event_args.signaled = true;
++	ret = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++
++	count = 2;
++	ret = post_sem(objs[1], &count);
++	EXPECT_EQ(0, ret);
++
++	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(0, index);
++
++	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(0, ret);
++	EXPECT_EQ(2, index);
++
++	ret = wait_all_alert(fd, 2, objs, 123, event_args.event, &index);
++	EXPECT_EQ(-1, ret);
++	EXPECT_EQ(ETIMEDOUT, errno);
++
++	close(event_args.event);
++
++	close(objs[0]);
++	close(objs[1]);
++
++	close(fd);
++}
++
++#define STRESS_LOOPS 10000
++#define STRESS_THREADS 4
++
++static unsigned int stress_counter;
++static int stress_device, stress_start_event, stress_mutex;
++
++static void *stress_thread(void *arg)
++{
++	struct ntsync_wait_args wait_args = {0};
++	__u32 index, count, i;
++	int ret;
++
++	wait_args.timeout = UINT64_MAX;
++	wait_args.count = 1;
++	wait_args.objs = (uintptr_t)&stress_start_event;
++	wait_args.owner = gettid();
++	wait_args.index = 0xdeadbeef;
++
++	ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
++
++	wait_args.objs = (uintptr_t)&stress_mutex;
++
++	for (i = 0; i < STRESS_LOOPS; ++i) {
++		ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
++
++		++stress_counter;
++
++		unlock_mutex(stress_mutex, wait_args.owner, &count);
++	}
++
++	return NULL;
++}
++
++TEST(stress_wait)
++{
++	struct ntsync_event_args event_args;
++	struct ntsync_mutex_args mutex_args;
++	pthread_t threads[STRESS_THREADS];
++	__u32 signaled, i;
++	int ret;
++
++	stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
++	ASSERT_LE(0, stress_device);
++
++	mutex_args.owner = 0;
++	mutex_args.count = 0;
++	ret = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
++	EXPECT_EQ(0, ret);
++	stress_mutex = mutex_args.mutex;
++
++	event_args.manual = 1;
++	event_args.signaled = 0;
++	ret = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
++	EXPECT_EQ(0, ret);
++	stress_start_event = event_args.event;
++
++	for (i = 0; i < STRESS_THREADS; ++i)
++		pthread_create(&threads[i], NULL, stress_thread, NULL);
++
++	ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
++	EXPECT_EQ(0, ret);
++
++	for (i = 0; i < STRESS_THREADS; ++i) {
++		ret = pthread_join(threads[i], NULL);
++		EXPECT_EQ(0, ret);
++	}
++
++	EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
++
++	close(stress_start_event);
++	close(stress_mutex);
++	close(stress_device);
++}
++
++TEST_HARNESS_MAIN
+-- 
+2.47.0.rc0
+
+From 94b78b402e239cd15095fa2d3e07f99a060a4b45 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:53:18 +0800
+Subject: [PATCH 10/13] perf-per-core
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ Documentation/arch/x86/topology.rst   |   4 +
+ arch/x86/events/rapl.c                | 418 ++++++++++++++++++--------
+ arch/x86/include/asm/processor.h      |   1 +
+ arch/x86/include/asm/topology.h       |   1 +
+ arch/x86/kernel/cpu/debugfs.c         |   1 +
+ arch/x86/kernel/cpu/topology_common.c |   1 +
+ 6 files changed, 305 insertions(+), 121 deletions(-)
+
+diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst
+index 7352ab89a55a..c12837e61bda 100644
+--- a/Documentation/arch/x86/topology.rst
++++ b/Documentation/arch/x86/topology.rst
+@@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
+     The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
+     "core_id."
+ 
++  - topology_logical_core_id();
++
++    The logical core ID to which a thread belongs.
++
+ 
+ 
+ System topology examples
+diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
+index b985ca79cf97..8206038a01ac 100644
+--- a/arch/x86/events/rapl.c
++++ b/arch/x86/events/rapl.c
+@@ -39,6 +39,10 @@
+  *	  event: rapl_energy_psys
+  *    perf code: 0x5
+  *
++ *  per_core counter: consumption of a single physical core
++ *	  event: rapl_energy_per_core (power_per_core PMU)
++ *    perf code: 0x1
++ *
+  * We manage those counters as free running (read-only). They may be
+  * use simultaneously by other tools, such as turbostat.
+  *
+@@ -70,18 +74,25 @@ MODULE_LICENSE("GPL");
+ /*
+  * RAPL energy status counters
+  */
+-enum perf_rapl_events {
++enum perf_rapl_pkg_events {
+ 	PERF_RAPL_PP0 = 0,		/* all cores */
+ 	PERF_RAPL_PKG,			/* entire package */
+ 	PERF_RAPL_RAM,			/* DRAM */
+ 	PERF_RAPL_PP1,			/* gpu */
+ 	PERF_RAPL_PSYS,			/* psys */
+ 
+-	PERF_RAPL_MAX,
+-	NR_RAPL_DOMAINS = PERF_RAPL_MAX,
++	PERF_RAPL_PKG_EVENTS_MAX,
++	NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
++};
++
++enum perf_rapl_core_events {
++	PERF_RAPL_PER_CORE = 0,		/* per-core */
++
++	PERF_RAPL_CORE_EVENTS_MAX,
++	NR_RAPL_CORE_DOMAINS = PERF_RAPL_CORE_EVENTS_MAX,
+ };
+ 
+-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
++static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
+ 	"pp0-core",
+ 	"package",
+ 	"dram",
+@@ -89,6 +100,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+ 	"psys",
+ };
+ 
++static const char *const rapl_core_domain_names[NR_RAPL_CORE_DOMAINS] __initconst = {
++	"per-core",
++};
++
+ /*
+  * event code: LSB 8 bits, passed in attr->config
+  * any other bit is reserved
+@@ -103,6 +118,10 @@ static struct perf_pmu_events_attr event_attr_##v = {				\
+ 	.event_str	= str,							\
+ };
+ 
++#define rapl_pmu_is_pkg_scope()				\
++	(boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||	\
++	 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++
+ struct rapl_pmu {
+ 	raw_spinlock_t		lock;
+ 	int			n_active;
+@@ -115,8 +134,9 @@ struct rapl_pmu {
+ 
+ struct rapl_pmus {
+ 	struct pmu		pmu;
++	cpumask_t		cpumask;
+ 	unsigned int		nr_rapl_pmu;
+-	struct rapl_pmu		*pmus[] __counted_by(nr_rapl_pmu);
++	struct rapl_pmu		*rapl_pmu[] __counted_by(nr_rapl_pmu);
+ };
+ 
+ enum rapl_unit_quirk {
+@@ -126,29 +146,45 @@ enum rapl_unit_quirk {
+ };
+ 
+ struct rapl_model {
+-	struct perf_msr *rapl_msrs;
+-	unsigned long	events;
++	struct perf_msr *rapl_pkg_msrs;
++	struct perf_msr *rapl_core_msrs;
++	unsigned long	pkg_events;
++	unsigned long	core_events;
+ 	unsigned int	msr_power_unit;
+ 	enum rapl_unit_quirk	unit_quirk;
+ };
+ 
+  /* 1/2^hw_unit Joule */
+-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+-static struct rapl_pmus *rapl_pmus;
+-static cpumask_t rapl_cpu_mask;
+-static unsigned int rapl_cntr_mask;
++static int rapl_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
++static struct rapl_pmus *rapl_pmus_pkg;
++static struct rapl_pmus *rapl_pmus_core;
++static unsigned int rapl_pkg_cntr_mask;
++static unsigned int rapl_core_cntr_mask;
+ static u64 rapl_timer_ms;
+-static struct perf_msr *rapl_msrs;
++static struct rapl_model *rapl_model;
++
++static inline unsigned int get_rapl_pmu_idx(int cpu)
++{
++	return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
++					 topology_logical_die_id(cpu);
++}
++
++static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu)
++{
++	return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) :
++					 topology_die_cpumask(cpu);
++}
+ 
+ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+ {
+-	unsigned int rapl_pmu_idx = topology_logical_die_id(cpu);
++	unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+ 
+ 	/*
+ 	 * The unsigned check also catches the '-1' return value for non
+ 	 * existent mappings in the topology map.
+ 	 */
+-	return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
++	return rapl_pmu_idx < rapl_pmus_pkg->nr_rapl_pmu ?
++	       rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx] : NULL;
+ }
+ 
+ static inline u64 rapl_read_counter(struct perf_event *event)
+@@ -160,7 +196,7 @@ static inline u64 rapl_read_counter(struct perf_event *event)
+ 
+ static inline u64 rapl_scale(u64 v, int cfg)
+ {
+-	if (cfg > NR_RAPL_DOMAINS) {
++	if (cfg > NR_RAPL_PKG_DOMAINS) {
+ 		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+ 		return v;
+ 	}
+@@ -212,34 +248,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
+ 
+ static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+ {
+-	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
++	struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+ 	struct perf_event *event;
+ 	unsigned long flags;
+ 
+-	if (!pmu->n_active)
++	if (!rapl_pmu->n_active)
+ 		return HRTIMER_NORESTART;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
++	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+-	list_for_each_entry(event, &pmu->active_list, active_entry)
++	list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
+ 		rapl_event_update(event);
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
++	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ 
+-	hrtimer_forward_now(hrtimer, pmu->timer_interval);
++	hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
+ 
+ 	return HRTIMER_RESTART;
+ }
+ 
+-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
++static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
+ {
+-	struct hrtimer *hr = &pmu->hrtimer;
++	struct hrtimer *hr = &rapl_pmu->hrtimer;
+ 
+ 	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ 	hr->function = rapl_hrtimer_handle;
+ }
+ 
+-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
++static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
+ 				   struct perf_event *event)
+ {
+ 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+@@ -247,39 +283,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+ 
+ 	event->hw.state = 0;
+ 
+-	list_add_tail(&event->active_entry, &pmu->active_list);
++	list_add_tail(&event->active_entry, &rapl_pmu->active_list);
+ 
+ 	local64_set(&event->hw.prev_count, rapl_read_counter(event));
+ 
+-	pmu->n_active++;
+-	if (pmu->n_active == 1)
+-		rapl_start_hrtimer(pmu);
++	rapl_pmu->n_active++;
++	if (rapl_pmu->n_active == 1)
++		rapl_start_hrtimer(rapl_pmu);
+ }
+ 
+ static void rapl_pmu_event_start(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
++	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
+-	__rapl_pmu_event_start(pmu, event);
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
++	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
++	__rapl_pmu_event_start(rapl_pmu, event);
++	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ }
+ 
+ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
++	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	struct hw_perf_event *hwc = &event->hw;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
++	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+ 	/* mark event as deactivated and stopped */
+ 	if (!(hwc->state & PERF_HES_STOPPED)) {
+-		WARN_ON_ONCE(pmu->n_active <= 0);
+-		pmu->n_active--;
+-		if (pmu->n_active == 0)
+-			hrtimer_cancel(&pmu->hrtimer);
++		WARN_ON_ONCE(rapl_pmu->n_active <= 0);
++		rapl_pmu->n_active--;
++		if (rapl_pmu->n_active == 0)
++			hrtimer_cancel(&rapl_pmu->hrtimer);
+ 
+ 		list_del(&event->active_entry);
+ 
+@@ -297,23 +333,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+ 		hwc->state |= PERF_HES_UPTODATE;
+ 	}
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
++	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ }
+ 
+ static int rapl_pmu_event_add(struct perf_event *event, int mode)
+ {
+-	struct rapl_pmu *pmu = event->pmu_private;
++	struct rapl_pmu *rapl_pmu = event->pmu_private;
+ 	struct hw_perf_event *hwc = &event->hw;
+ 	unsigned long flags;
+ 
+-	raw_spin_lock_irqsave(&pmu->lock, flags);
++	raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ 
+ 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ 
+ 	if (mode & PERF_EF_START)
+-		__rapl_pmu_event_start(pmu, event);
++		__rapl_pmu_event_start(rapl_pmu, event);
+ 
+-	raw_spin_unlock_irqrestore(&pmu->lock, flags);
++	raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
+ 
+ 	return 0;
+ }
+@@ -327,10 +363,14 @@ static int rapl_pmu_event_init(struct perf_event *event)
+ {
+ 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+ 	int bit, ret = 0;
+-	struct rapl_pmu *pmu;
++	struct rapl_pmu *rapl_pmu;
++	struct rapl_pmus *curr_rapl_pmus;
+ 
+ 	/* only look at RAPL events */
+-	if (event->attr.type != rapl_pmus->pmu.type)
++	if (event->attr.type == rapl_pmus_pkg->pmu.type ||
++		(rapl_pmus_core && event->attr.type == rapl_pmus_core->pmu.type))
++		curr_rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
++	else
+ 		return -ENOENT;
+ 
+ 	/* check only supported bits are set */
+@@ -340,16 +380,18 @@ static int rapl_pmu_event_init(struct perf_event *event)
+ 	if (event->cpu < 0)
+ 		return -EINVAL;
+ 
+-	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
++	if (curr_rapl_pmus == rapl_pmus_pkg)
++		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+ 
+-	if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
++	if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+ 		return -EINVAL;
+ 
+-	cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
++	cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
+ 	bit = cfg - 1;
+ 
+ 	/* check event supported */
+-	if (!(rapl_cntr_mask & (1 << bit)))
++	if (!(rapl_pkg_cntr_mask & (1 << bit)) &&
++	    !(rapl_core_cntr_mask & (1 << bit)))
+ 		return -EINVAL;
+ 
+ 	/* unsupported modes and filters */
+@@ -357,12 +399,18 @@ static int rapl_pmu_event_init(struct perf_event *event)
+ 		return -EINVAL;
+ 
+ 	/* must be done before validate_group */
+-	pmu = cpu_to_rapl_pmu(event->cpu);
+-	if (!pmu)
++	if (curr_rapl_pmus == rapl_pmus_core) {
++		rapl_pmu = curr_rapl_pmus->rapl_pmu[topology_logical_core_id(event->cpu)];
++		event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr;
++	} else {
++		rapl_pmu = curr_rapl_pmus->rapl_pmu[get_rapl_pmu_idx(event->cpu)];
++		event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
++	}
++
++	if (!rapl_pmu)
+ 		return -EINVAL;
+-	event->cpu = pmu->cpu;
+-	event->pmu_private = pmu;
+-	event->hw.event_base = rapl_msrs[bit].msr;
++	event->cpu = rapl_pmu->cpu;
++	event->pmu_private = rapl_pmu;
+ 	event->hw.config = cfg;
+ 	event->hw.idx = bit;
+ 
+@@ -377,7 +425,7 @@ static void rapl_pmu_event_read(struct perf_event *event)
+ static ssize_t rapl_get_attr_cpumask(struct device *dev,
+ 				struct device_attribute *attr, char *buf)
+ {
+-	return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
++	return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_pkg->cpumask);
+ }
+ 
+ static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+@@ -391,17 +439,38 @@ static struct attribute_group rapl_pmu_attr_group = {
+ 	.attrs = rapl_pmu_attrs,
+ };
+ 
++static ssize_t rapl_get_attr_per_core_cpumask(struct device *dev,
++					     struct device_attribute *attr, char *buf)
++{
++	return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_core->cpumask);
++}
++
++static struct device_attribute dev_attr_per_core_cpumask = __ATTR(cpumask, 0444,
++								 rapl_get_attr_per_core_cpumask,
++								 NULL);
++
++static struct attribute *rapl_pmu_per_core_attrs[] = {
++	&dev_attr_per_core_cpumask.attr,
++	NULL,
++};
++
++static struct attribute_group rapl_pmu_per_core_attr_group = {
++	.attrs = rapl_pmu_per_core_attrs,
++};
++
+ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+ RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+ RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+ RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+ RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
++RAPL_EVENT_ATTR_STR(energy-per-core,   rapl_per_core, "event=0x01");
+ 
+ RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+ RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
++RAPL_EVENT_ATTR_STR(energy-per-core.unit,   rapl_per_core_unit, "Joules");
+ 
+ /*
+  * we compute in 0.23 nJ increments regardless of MSR
+@@ -411,6 +480,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890
+ RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+ RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+ RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
++RAPL_EVENT_ATTR_STR(energy-per-core.scale,   rapl_per_core_scale, "2.3283064365386962890625e-10");
+ 
+ /*
+  * There are no default events, but we need to create
+@@ -444,6 +514,13 @@ static const struct attribute_group *rapl_attr_groups[] = {
+ 	NULL,
+ };
+ 
++static const struct attribute_group *rapl_per_core_attr_groups[] = {
++	&rapl_pmu_per_core_attr_group,
++	&rapl_pmu_format_group,
++	&rapl_pmu_events_group,
++	NULL,
++};
++
+ static struct attribute *rapl_events_cores[] = {
+ 	EVENT_PTR(rapl_cores),
+ 	EVENT_PTR(rapl_cores_unit),
+@@ -504,6 +581,18 @@ static struct attribute_group rapl_events_psys_group = {
+ 	.attrs = rapl_events_psys,
+ };
+ 
++static struct attribute *rapl_events_per_core[] = {
++	EVENT_PTR(rapl_per_core),
++	EVENT_PTR(rapl_per_core_unit),
++	EVENT_PTR(rapl_per_core_scale),
++	NULL,
++};
++
++static struct attribute_group rapl_events_per_core_group = {
++	.name  = "events",
++	.attrs = rapl_events_per_core,
++};
++
+ static bool test_msr(int idx, void *data)
+ {
+ 	return test_bit(idx, (unsigned long *) data);
+@@ -529,11 +618,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
+ };
+ 
+ /*
+- * Force to PERF_RAPL_MAX size due to:
+- * - perf_msr_probe(PERF_RAPL_MAX)
++ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
++ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
+  * - want to use same event codes across both architectures
+  */
+-static struct perf_msr amd_rapl_msrs[] = {
++static struct perf_msr amd_rapl_pkg_msrs[] = {
+ 	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, NULL, false, 0 },
+ 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr, false, RAPL_MSR_MASK },
+ 	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   NULL, false, 0 },
+@@ -541,72 +630,104 @@ static struct perf_msr amd_rapl_msrs[] = {
+ 	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
+ };
+ 
+-static int rapl_cpu_offline(unsigned int cpu)
++static struct perf_msr amd_rapl_core_msrs[] = {
++	[PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group,
++				 test_msr, false, RAPL_MSR_MASK },
++};
++
++static int __rapl_cpu_offline(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx,
++			      const struct cpumask *event_cpumask, unsigned int cpu)
+ {
+-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
++	struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ 	int target;
+ 
+ 	/* Check if exiting cpu is used for collecting rapl events */
+-	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
++	if (!cpumask_test_and_clear_cpu(cpu, &rapl_pmus->cpumask))
+ 		return 0;
+ 
+-	pmu->cpu = -1;
++	rapl_pmu->cpu = -1;
+ 	/* Find a new cpu to collect rapl events */
+-	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
++	target = cpumask_any_but(event_cpumask, cpu);
+ 
+ 	/* Migrate rapl events to the new target */
+ 	if (target < nr_cpu_ids) {
+-		cpumask_set_cpu(target, &rapl_cpu_mask);
+-		pmu->cpu = target;
+-		perf_pmu_migrate_context(pmu->pmu, cpu, target);
++		cpumask_set_cpu(target, &rapl_pmus->cpumask);
++		rapl_pmu->cpu = target;
++		perf_pmu_migrate_context(rapl_pmu->pmu, cpu, target);
+ 	}
+ 	return 0;
+ }
+ 
+-static int rapl_cpu_online(unsigned int cpu)
++static int rapl_cpu_offline(unsigned int cpu)
++{
++	int ret =  __rapl_cpu_offline(rapl_pmus_pkg, get_rapl_pmu_idx(cpu),
++				  get_rapl_pmu_cpumask(cpu), cpu);
++
++	if (ret == 0 && rapl_model->core_events)
++		ret = __rapl_cpu_offline(rapl_pmus_core, topology_logical_core_id(cpu),
++				   topology_sibling_cpumask(cpu), cpu);
++
++	return ret;
++}
++
++static int __rapl_cpu_online(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx,
++			     const struct cpumask *event_cpumask, unsigned int cpu)
+ {
+-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
++	struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ 	int target;
+ 
+-	if (!pmu) {
+-		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+-		if (!pmu)
++	if (!rapl_pmu) {
++		rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu));
++		if (!rapl_pmu)
+ 			return -ENOMEM;
+ 
+-		raw_spin_lock_init(&pmu->lock);
+-		INIT_LIST_HEAD(&pmu->active_list);
+-		pmu->pmu = &rapl_pmus->pmu;
+-		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+-		rapl_hrtimer_init(pmu);
++		raw_spin_lock_init(&rapl_pmu->lock);
++		INIT_LIST_HEAD(&rapl_pmu->active_list);
++		rapl_pmu->pmu = &rapl_pmus->pmu;
++		rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
++		rapl_hrtimer_init(rapl_pmu);
+ 
+-		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
++		rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu;
+ 	}
+ 
+ 	/*
+ 	 * Check if there is an online cpu in the package which collects rapl
+ 	 * events already.
+ 	 */
+-	target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
++	target = cpumask_any_and(&rapl_pmus->cpumask, event_cpumask);
+ 	if (target < nr_cpu_ids)
+ 		return 0;
+ 
+-	cpumask_set_cpu(cpu, &rapl_cpu_mask);
+-	pmu->cpu = cpu;
++	cpumask_set_cpu(cpu, &rapl_pmus->cpumask);
++	rapl_pmu->cpu = cpu;
+ 	return 0;
+ }
+ 
+-static int rapl_check_hw_unit(struct rapl_model *rm)
++static int rapl_cpu_online(unsigned int cpu)
++{
++	int ret =  __rapl_cpu_online(rapl_pmus_pkg, get_rapl_pmu_idx(cpu),
++				 get_rapl_pmu_cpumask(cpu), cpu);
++
++	if (ret == 0 && rapl_model->core_events)
++		ret = __rapl_cpu_online(rapl_pmus_core, topology_logical_core_id(cpu),
++				   topology_sibling_cpumask(cpu), cpu);
++
++	return ret;
++}
++
++
++static int rapl_check_hw_unit(void)
+ {
+ 	u64 msr_rapl_power_unit_bits;
+ 	int i;
+ 
+ 	/* protect rdmsrl() to handle virtualization */
+-	if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
++	if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
+ 		return -1;
+-	for (i = 0; i < NR_RAPL_DOMAINS; i++)
++	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
+ 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ 
+-	switch (rm->unit_quirk) {
++	switch (rapl_model->unit_quirk) {
+ 	/*
+ 	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+ 	 * different than the unit from power unit MSR. See
+@@ -645,22 +766,29 @@ static void __init rapl_advertise(void)
+ 	int i;
+ 
+ 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+-		hweight32(rapl_cntr_mask), rapl_timer_ms);
++		hweight32(rapl_pkg_cntr_mask) + hweight32(rapl_core_cntr_mask), rapl_timer_ms);
+ 
+-	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+-		if (rapl_cntr_mask & (1 << i)) {
++	for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
++		if (rapl_pkg_cntr_mask & (1 << i)) {
+ 			pr_info("hw unit of domain %s 2^-%d Joules\n",
+-				rapl_domain_names[i], rapl_hw_unit[i]);
++				rapl_pkg_domain_names[i], rapl_hw_unit[i]);
++		}
++	}
++
++	for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) {
++		if (rapl_core_cntr_mask & (1 << i)) {
++			pr_info("hw unit of domain %s 2^-%d Joules\n",
++				rapl_core_domain_names[i], rapl_hw_unit[i]);
+ 		}
+ 	}
+ }
+ 
+-static void cleanup_rapl_pmus(void)
++static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
+-		kfree(rapl_pmus->pmus[i]);
++		kfree(rapl_pmus->rapl_pmu[i]);
+ 	kfree(rapl_pmus);
+ }
+ 
+@@ -673,11 +801,17 @@ static const struct attribute_group *rapl_attr_update[] = {
+ 	NULL,
+ };
+ 
+-static int __init init_rapl_pmus(void)
++static const struct attribute_group *rapl_per_core_attr_update[] = {
++	&rapl_events_per_core_group,
++};
++
++static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int nr_rapl_pmu,
++				 const struct attribute_group **rapl_attr_groups,
++				 const struct attribute_group **rapl_attr_update)
+ {
+-	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
++	struct rapl_pmus *rapl_pmus;
+ 
+-	rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
++	rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
+ 	if (!rapl_pmus)
+ 		return -ENOMEM;
+ 
+@@ -693,75 +827,80 @@ static int __init init_rapl_pmus(void)
+ 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+ 	rapl_pmus->pmu.module		= THIS_MODULE;
+ 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
++
++	*rapl_pmus_ptr = rapl_pmus;
++
+ 	return 0;
+ }
+ 
+ static struct rapl_model model_snb = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_PP1),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_snbep = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_hsw = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PP1),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_hsx = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_knl = {
+-	.events		= BIT(PERF_RAPL_PKG) |
++	.pkg_events	= BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_skl = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PP1) |
+ 			  BIT(PERF_RAPL_PSYS),
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_msrs,
++	.rapl_pkg_msrs	= intel_rapl_msrs,
+ };
+ 
+ static struct rapl_model model_spr = {
+-	.events		= BIT(PERF_RAPL_PP0) |
++	.pkg_events	= BIT(PERF_RAPL_PP0) |
+ 			  BIT(PERF_RAPL_PKG) |
+ 			  BIT(PERF_RAPL_RAM) |
+ 			  BIT(PERF_RAPL_PSYS),
+ 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_SPR,
+ 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+-	.rapl_msrs      = intel_rapl_spr_msrs,
++	.rapl_pkg_msrs	= intel_rapl_spr_msrs,
+ };
+ 
+ static struct rapl_model model_amd_hygon = {
+-	.events		= BIT(PERF_RAPL_PKG),
++	.pkg_events	= BIT(PERF_RAPL_PKG),
++	.core_events	= BIT(PERF_RAPL_PER_CORE),
+ 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
+-	.rapl_msrs      = amd_rapl_msrs,
++	.rapl_pkg_msrs	= amd_rapl_pkg_msrs,
++	.rapl_core_msrs	= amd_rapl_core_msrs,
+ };
+ 
+ static const struct x86_cpu_id rapl_model_match[] __initconst = {
+@@ -817,28 +956,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
+ static int __init rapl_pmu_init(void)
+ {
+ 	const struct x86_cpu_id *id;
+-	struct rapl_model *rm;
+ 	int ret;
++	int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
++	int nr_cores = topology_max_packages() * topology_num_cores_per_package();
++
++	if (rapl_pmu_is_pkg_scope())
++		nr_rapl_pmu = topology_max_packages();
+ 
+ 	id = x86_match_cpu(rapl_model_match);
+ 	if (!id)
+ 		return -ENODEV;
+ 
+-	rm = (struct rapl_model *) id->driver_data;
+-
+-	rapl_msrs = rm->rapl_msrs;
++	rapl_model = (struct rapl_model *) id->driver_data;
+ 
+-	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+-					false, (void *) &rm->events);
++	rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX,
++					false, (void *) &rapl_model->pkg_events);
+ 
+-	ret = rapl_check_hw_unit(rm);
++	ret = rapl_check_hw_unit();
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = init_rapl_pmus();
++	ret = init_rapl_pmus(&rapl_pmus_pkg, nr_rapl_pmu, rapl_attr_groups, rapl_attr_update);
+ 	if (ret)
+ 		return ret;
+ 
++	if (rapl_model->core_events) {
++		rapl_core_cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
++						     PERF_RAPL_CORE_EVENTS_MAX, false,
++						     (void *) &rapl_model->core_events);
++
++		ret = init_rapl_pmus(&rapl_pmus_core, nr_cores,
++				     rapl_per_core_attr_groups, rapl_per_core_attr_update);
++		if (ret) {
++			/*
++			 * If initialization of per_core PMU fails, reset per_core
++			 * flag, and continue with power PMU initialization.
++			 */
++			pr_warn("Per-core PMU initialization failed (%d)\n", ret);
++			rapl_model->core_events = 0UL;
++		}
++	}
++
+ 	/*
+ 	 * Install callbacks. Core will call them for each online cpu.
+ 	 */
+@@ -848,10 +1006,24 @@ static int __init rapl_pmu_init(void)
+ 	if (ret)
+ 		goto out;
+ 
+-	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
++	ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
+ 	if (ret)
+ 		goto out1;
+ 
++	if (rapl_model->core_events) {
++		ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1);
++		if (ret) {
++			/*
++			 * If registration of per_core PMU fails, cleanup per_core PMU
++			 * variables, reset the per_core flag and keep the
++			 * power PMU untouched.
++			 */
++			pr_warn("Per-core PMU registration failed (%d)\n", ret);
++			cleanup_rapl_pmus(rapl_pmus_core);
++			rapl_model->core_events = 0UL;
++		}
++	}
++
+ 	rapl_advertise();
+ 	return 0;
+ 
+@@ -859,7 +1031,7 @@ static int __init rapl_pmu_init(void)
+ 	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out:
+ 	pr_warn("Initialization failed (%d), disabled\n", ret);
+-	cleanup_rapl_pmus();
++	cleanup_rapl_pmus(rapl_pmus_pkg);
+ 	return ret;
+ }
+ module_init(rapl_pmu_init);
+@@ -867,7 +1039,11 @@ module_init(rapl_pmu_init);
+ static void __exit intel_rapl_exit(void)
+ {
+ 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+-	perf_pmu_unregister(&rapl_pmus->pmu);
+-	cleanup_rapl_pmus();
++	perf_pmu_unregister(&rapl_pmus_pkg->pmu);
++	cleanup_rapl_pmus(rapl_pmus_pkg);
++	if (rapl_model->core_events) {
++		perf_pmu_unregister(&rapl_pmus_core->pmu);
++		cleanup_rapl_pmus(rapl_pmus_core);
++	}
+ }
+ module_exit(intel_rapl_exit);
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index e17f4d733e44..7e53b701bc27 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -98,6 +98,7 @@ struct cpuinfo_topology {
+ 	// Logical ID mappings
+ 	u32			logical_pkg_id;
+ 	u32			logical_die_id;
++	u32			logical_core_id;
+ 
+ 	// AMD Node ID and Nodes per Package info
+ 	u32			amd_node_id;
+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
+index 9f9376db64e3..1c55229efb1e 100644
+--- a/arch/x86/include/asm/topology.h
++++ b/arch/x86/include/asm/topology.h
+@@ -143,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
+ #define topology_logical_package_id(cpu)	(cpu_data(cpu).topo.logical_pkg_id)
+ #define topology_physical_package_id(cpu)	(cpu_data(cpu).topo.pkg_id)
+ #define topology_logical_die_id(cpu)		(cpu_data(cpu).topo.logical_die_id)
++#define topology_logical_core_id(cpu)		(cpu_data(cpu).topo.logical_core_id)
+ #define topology_die_id(cpu)			(cpu_data(cpu).topo.die_id)
+ #define topology_core_id(cpu)			(cpu_data(cpu).topo.core_id)
+ #define topology_ppin(cpu)			(cpu_data(cpu).ppin)
+diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
+index 10719aba6276..cacfd3f6abef 100644
+--- a/arch/x86/kernel/cpu/debugfs.c
++++ b/arch/x86/kernel/cpu/debugfs.c
+@@ -25,6 +25,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
+ 	seq_printf(m, "cpu_type:            %s\n", get_topology_cpu_type_name(c));
+ 	seq_printf(m, "logical_pkg_id:      %u\n", c->topo.logical_pkg_id);
+ 	seq_printf(m, "logical_die_id:      %u\n", c->topo.logical_die_id);
++	seq_printf(m, "logical_core_id:     %u\n", c->topo.logical_core_id);
+ 	seq_printf(m, "llc_id:              %u\n", c->topo.llc_id);
+ 	seq_printf(m, "l2c_id:              %u\n", c->topo.l2c_id);
+ 	seq_printf(m, "amd_node_id:         %u\n", c->topo.amd_node_id);
+diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
+index 8277c64f88db..b5a5e1411469 100644
+--- a/arch/x86/kernel/cpu/topology_common.c
++++ b/arch/x86/kernel/cpu/topology_common.c
+@@ -185,6 +185,7 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
+ 	if (!early) {
+ 		c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
+ 		c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
++		c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN);
+ 	}
+ 
+ 	/* Package relative core ID */
+-- 
+2.47.0.rc0
+
+From 38ca6249d4a3205988323759c2e0986d93e737aa Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:53:29 +0800
+Subject: [PATCH 11/13] t2
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ .../ABI/testing/sysfs-driver-hid-appletb-kbd  |   13 +
+ Documentation/core-api/printk-formats.rst     |   32 +
+ MAINTAINERS                                   |    6 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |    3 +
+ drivers/gpu/drm/drm_format_helper.c           |   54 +
+ drivers/gpu/drm/i915/display/intel_ddi.c      |    4 +
+ drivers/gpu/drm/i915/display/intel_fbdev.c    |    6 +-
+ drivers/gpu/drm/i915/display/intel_quirks.c   |   15 +
+ drivers/gpu/drm/i915/display/intel_quirks.h   |    1 +
+ .../gpu/drm/tests/drm_format_helper_test.c    |   81 ++
+ drivers/gpu/drm/tiny/Kconfig                  |   12 +
+ drivers/gpu/drm/tiny/Makefile                 |    1 +
+ drivers/gpu/drm/tiny/appletbdrm.c             |  624 +++++++++
+ drivers/gpu/vga/vga_switcheroo.c              |    7 +-
+ drivers/hid/Kconfig                           |   22 +
+ drivers/hid/Makefile                          |    2 +
+ drivers/hid/hid-appletb-bl.c                  |  207 +++
+ drivers/hid/hid-appletb-kbd.c                 |  501 ++++++++
+ drivers/hid/hid-core.c                        |   25 +
+ drivers/hid/hid-google-hammer.c               |   27 +-
+ drivers/hid/hid-multitouch.c                  |   60 +-
+ drivers/hid/hid-quirks.c                      |    8 +-
+ drivers/hwmon/applesmc.c                      | 1138 ++++++++++++-----
+ drivers/input/mouse/bcm5974.c                 |  138 ++
+ drivers/pci/vgaarb.c                          |    1 +
+ drivers/platform/x86/apple-gmux.c             |   18 +
+ drivers/staging/Kconfig                       |    2 +
+ drivers/staging/Makefile                      |    1 +
+ drivers/staging/apple-bce/Kconfig             |   18 +
+ drivers/staging/apple-bce/Makefile            |   28 +
+ drivers/staging/apple-bce/apple_bce.c         |  445 +++++++
+ drivers/staging/apple-bce/apple_bce.h         |   38 +
+ drivers/staging/apple-bce/audio/audio.c       |  711 ++++++++++
+ drivers/staging/apple-bce/audio/audio.h       |  125 ++
+ drivers/staging/apple-bce/audio/description.h |   42 +
+ drivers/staging/apple-bce/audio/pcm.c         |  308 +++++
+ drivers/staging/apple-bce/audio/pcm.h         |   16 +
+ drivers/staging/apple-bce/audio/protocol.c    |  347 +++++
+ drivers/staging/apple-bce/audio/protocol.h    |  147 +++
+ .../staging/apple-bce/audio/protocol_bce.c    |  226 ++++
+ .../staging/apple-bce/audio/protocol_bce.h    |   72 ++
+ drivers/staging/apple-bce/mailbox.c           |  151 +++
+ drivers/staging/apple-bce/mailbox.h           |   53 +
+ drivers/staging/apple-bce/queue.c             |  390 ++++++
+ drivers/staging/apple-bce/queue.h             |  177 +++
+ drivers/staging/apple-bce/queue_dma.c         |  220 ++++
+ drivers/staging/apple-bce/queue_dma.h         |   50 +
+ drivers/staging/apple-bce/vhci/command.h      |  204 +++
+ drivers/staging/apple-bce/vhci/queue.c        |  268 ++++
+ drivers/staging/apple-bce/vhci/queue.h        |   76 ++
+ drivers/staging/apple-bce/vhci/transfer.c     |  661 ++++++++++
+ drivers/staging/apple-bce/vhci/transfer.h     |   73 ++
+ drivers/staging/apple-bce/vhci/vhci.c         |  759 +++++++++++
+ drivers/staging/apple-bce/vhci/vhci.h         |   52 +
+ include/drm/drm_format_helper.h               |    3 +
+ include/linux/hid.h                           |    2 +
+ lib/test_printf.c                             |   20 +-
+ lib/vsprintf.c                                |   36 +-
+ scripts/checkpatch.pl                         |    2 +-
+ 59 files changed, 8368 insertions(+), 361 deletions(-)
+ create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+ create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c
+ create mode 100644 drivers/hid/hid-appletb-bl.c
+ create mode 100644 drivers/hid/hid-appletb-kbd.c
+ create mode 100644 drivers/staging/apple-bce/Kconfig
+ create mode 100644 drivers/staging/apple-bce/Makefile
+ create mode 100644 drivers/staging/apple-bce/apple_bce.c
+ create mode 100644 drivers/staging/apple-bce/apple_bce.h
+ create mode 100644 drivers/staging/apple-bce/audio/audio.c
+ create mode 100644 drivers/staging/apple-bce/audio/audio.h
+ create mode 100644 drivers/staging/apple-bce/audio/description.h
+ create mode 100644 drivers/staging/apple-bce/audio/pcm.c
+ create mode 100644 drivers/staging/apple-bce/audio/pcm.h
+ create mode 100644 drivers/staging/apple-bce/audio/protocol.c
+ create mode 100644 drivers/staging/apple-bce/audio/protocol.h
+ create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c
+ create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h
+ create mode 100644 drivers/staging/apple-bce/mailbox.c
+ create mode 100644 drivers/staging/apple-bce/mailbox.h
+ create mode 100644 drivers/staging/apple-bce/queue.c
+ create mode 100644 drivers/staging/apple-bce/queue.h
+ create mode 100644 drivers/staging/apple-bce/queue_dma.c
+ create mode 100644 drivers/staging/apple-bce/queue_dma.h
+ create mode 100644 drivers/staging/apple-bce/vhci/command.h
+ create mode 100644 drivers/staging/apple-bce/vhci/queue.c
+ create mode 100644 drivers/staging/apple-bce/vhci/queue.h
+ create mode 100644 drivers/staging/apple-bce/vhci/transfer.c
+ create mode 100644 drivers/staging/apple-bce/vhci/transfer.h
+ create mode 100644 drivers/staging/apple-bce/vhci/vhci.c
+ create mode 100644 drivers/staging/apple-bce/vhci/vhci.h
+
+diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+new file mode 100644
+index 000000000000..2a19584d091e
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+@@ -0,0 +1,13 @@
++What:		/sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
++Date:		September, 2023
++KernelVersion:	6.5
++Contact:	linux-input@vger.kernel.org
++Description:
++		The set of keys displayed on the Touch Bar.
++		Valid values are:
++		== =================
++		0  Escape key only
++		1  Function keys
++		2  Media/brightness keys
++		3  None
++		== =================
+diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
+index 4451ef501936..c726a846f752 100644
+--- a/Documentation/core-api/printk-formats.rst
++++ b/Documentation/core-api/printk-formats.rst
+@@ -632,6 +632,38 @@ Examples::
+ 	%p4cc	Y10  little-endian (0x20303159)
+ 	%p4cc	NV12 big-endian (0xb231564e)
+ 
++Generic FourCC code
++-------------------
++
++::
++	%p4c[hnbl]	gP00 (0x67503030)
++
++Print a generic FourCC code, as both ASCII characters and its numerical
++value as hexadecimal.
++
++The additional ``h``, ``r``, ``b``, and ``l`` specifiers are used to specify
++host, reversed, big or little endian order data respectively. Host endian
++order means the data is interpreted as a 32-bit integer and the most
++significant byte is printed first; that is, the character code as printed
++matches the byte order stored in memory on big-endian systems, and is reversed
++on little-endian systems.
++
++Passed by reference.
++
++Examples for a little-endian machine, given &(u32)0x67503030::
++
++	%p4ch	gP00 (0x67503030)
++	%p4cl	gP00 (0x67503030)
++	%p4cb	00Pg (0x30305067)
++	%p4cr	00Pg (0x30305067)
++
++Examples for a big-endian machine, given &(u32)0x67503030::
++
++	%p4ch	gP00 (0x67503030)
++	%p4cl	00Pg (0x30305067)
++	%p4cb	gP00 (0x67503030)
++	%p4cr	00Pg (0x30305067)
++
+ Rust
+ ----
+ 
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 0bcfbc58a9ab..affc58245cc1 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -6903,6 +6903,12 @@ S:	Supported
+ T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
+ F:	drivers/gpu/drm/sun4i/sun8i*
+ 
++DRM DRIVER FOR APPLE TOUCH BARS
++M:	Kerem Karabay <kekrby@gmail.com>
++L:	dri-devel@lists.freedesktop.org
++S:	Maintained
++F:	drivers/gpu/drm/tiny/appletbdrm.c
++
+ DRM DRIVER FOR ARM PL111 CLCD
+ S:	Orphan
+ T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index ad5c05ee92f3..09c82a3e83f2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -2237,6 +2237,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ 	int ret, retry = 0, i;
+ 	bool supports_atomic = false;
+ 
++	if (vga_switcheroo_client_probe_defer(pdev))
++		return -EPROBE_DEFER;
++
+ 	/* skip devices which are owned by radeon */
+ 	for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
+ 		if (amdgpu_unsupported_pciidlist[i] == pdev->device)
+diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
+index b1be458ed4dd..28c0e76a1e88 100644
+--- a/drivers/gpu/drm/drm_format_helper.c
++++ b/drivers/gpu/drm/drm_format_helper.c
+@@ -702,6 +702,57 @@ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pi
+ }
+ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888);
+ 
++static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels)
++{
++	u8 *dbuf8 = dbuf;
++	const __le32 *sbuf32 = sbuf;
++	unsigned int x;
++	u32 pix;
++
++	for (x = 0; x < pixels; x++) {
++		pix = le32_to_cpu(sbuf32[x]);
++		/* write red-green-blue to output in little endianness */
++		*dbuf8++ = (pix & 0x00FF0000) >> 16;
++		*dbuf8++ = (pix & 0x0000FF00) >> 8;
++		*dbuf8++ = (pix & 0x000000FF) >> 0;
++	}
++}
++
++/**
++ * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer
++ * @dst: Array of BGR888 destination buffers
++ * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines
++ *             within @dst; can be NULL if scanlines are stored next to each other.
++ * @src: Array of XRGB8888 source buffers
++ * @fb: DRM framebuffer
++ * @clip: Clip rectangle area to copy
++ * @state: Transform and conversion state
++ *
++ * This function copies parts of a framebuffer to display memory and converts the
++ * color format during the process. Destination and framebuffer formats must match. The
++ * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at
++ * least as many entries as there are planes in @fb's format. Each entry stores the
++ * value for the format's respective color plane at the same index.
++ *
++ * This function does not apply clipping on @dst (i.e. the destination is at the
++ * top-left corner).
++ *
++ * Drivers can use this function for BGR888 devices that don't natively
++ * support XRGB8888.
++ */
++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
++			       const struct iosys_map *src, const struct drm_framebuffer *fb,
++			       const struct drm_rect *clip, struct drm_format_conv_state *state)
++{
++	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
++		3,
++	};
++
++	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
++		    drm_fb_xrgb8888_to_bgr888_line);
++}
++EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888);
++
+ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels)
+ {
+ 	__le32 *dbuf32 = dbuf;
+@@ -1035,6 +1086,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
+ 		} else if (dst_format == DRM_FORMAT_RGB888) {
+ 			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state);
+ 			return 0;
++		} else if (dst_format == DRM_FORMAT_BGR888) {
++			drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip, state);
++			return 0;
+ 		} else if (dst_format == DRM_FORMAT_ARGB8888) {
+ 			drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state);
+ 			return 0;
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
+index 5b6aabce4c32..fafc673d508e 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi.c
+@@ -4640,6 +4640,7 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *dig_port)
+ 
+ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
+ {
++	struct intel_display *display = to_intel_display(dig_port);
+ 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+ 
+ 	if (dig_port->base.port != PORT_A)
+@@ -4648,6 +4649,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
+ 	if (dig_port->saved_port_bits & DDI_A_4_LANES)
+ 		return false;
+ 
++	if (intel_has_quirk(display, QUIRK_DDI_A_FORCE_4_LANES))
++		return true;
++
+ 	/* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only
+ 	 *                     supported configuration
+ 	 */
+diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
+index 49a1ac4f5491..c8c10a6104c4 100644
+--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
++++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
+@@ -199,10 +199,10 @@ static int intelfb_create(struct drm_fb_helper *helper,
+ 	ifbdev->fb = NULL;
+ 
+ 	if (fb &&
+-	    (sizes->fb_width > fb->base.width ||
+-	     sizes->fb_height > fb->base.height)) {
++	    (sizes->fb_width != fb->base.width ||
++	     sizes->fb_height != fb->base.height)) {
+ 		drm_dbg_kms(&dev_priv->drm,
+-			    "BIOS fb too small (%dx%d), we require (%dx%d),"
++			    "BIOS fb not valid (%dx%d), we require (%dx%d),"
+ 			    " releasing it\n",
+ 			    fb->base.width, fb->base.height,
+ 			    sizes->fb_width, sizes->fb_height);
+diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
+index dfd8b4960e6d..7232f9acd0a0 100644
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -64,6 +64,18 @@ static void quirk_increase_ddi_disabled_time(struct intel_display *display)
+ 	drm_info(display->drm, "Applying Increase DDI Disabled quirk\n");
+ }
+ 
++/*
++ * In some cases, the firmware might not set the lane count to 4 (for example,
++ * when booting in some dual GPU Macs with the dGPU as the default GPU), this
++ * quirk is used to force it as otherwise it might not be possible to compute a
++ * valid link configuration.
++ */
++static void quirk_ddi_a_force_4_lanes(struct intel_display *display)
++{
++	intel_set_quirk(display, QUIRK_DDI_A_FORCE_4_LANES);
++	drm_info(display->drm, "Applying DDI A Forced 4 Lanes quirk\n");
++}
++
+ static void quirk_no_pps_backlight_power_hook(struct intel_display *display)
+ {
+ 	intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK);
+@@ -229,6 +241,9 @@ static struct intel_quirk intel_quirks[] = {
+ 	{ 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+ 	/* HP Notebook - 14-r206nv */
+ 	{ 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
++
++	/* Apple MacBookPro15,1 */
++	{ 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes },
+ };
+ 
+ static struct intel_dpcd_quirk intel_dpcd_quirks[] = {
+diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h
+index cafdebda7535..a5296f82776e 100644
+--- a/drivers/gpu/drm/i915/display/intel_quirks.h
++++ b/drivers/gpu/drm/i915/display/intel_quirks.h
+@@ -20,6 +20,7 @@ enum intel_quirk_id {
+ 	QUIRK_LVDS_SSC_DISABLE,
+ 	QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK,
+ 	QUIRK_FW_SYNC_LEN,
++	QUIRK_DDI_A_FORCE_4_LANES,
+ };
+ 
+ void intel_init_quirks(struct intel_display *display);
+diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c
+index 08992636ec05..35cd3405d045 100644
+--- a/drivers/gpu/drm/tests/drm_format_helper_test.c
++++ b/drivers/gpu/drm/tests/drm_format_helper_test.c
+@@ -60,6 +60,11 @@ struct convert_to_rgb888_result {
+ 	const u8 expected[TEST_BUF_SIZE];
+ };
+ 
++struct convert_to_bgr888_result {
++	unsigned int dst_pitch;
++	const u8 expected[TEST_BUF_SIZE];
++};
++
+ struct convert_to_argb8888_result {
+ 	unsigned int dst_pitch;
+ 	const u32 expected[TEST_BUF_SIZE];
+@@ -107,6 +112,7 @@ struct convert_xrgb8888_case {
+ 	struct convert_to_argb1555_result argb1555_result;
+ 	struct convert_to_rgba5551_result rgba5551_result;
+ 	struct convert_to_rgb888_result rgb888_result;
++	struct convert_to_bgr888_result bgr888_result;
+ 	struct convert_to_argb8888_result argb8888_result;
+ 	struct convert_to_xrgb2101010_result xrgb2101010_result;
+ 	struct convert_to_argb2101010_result argb2101010_result;
+@@ -151,6 +157,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
+ 			.expected = { 0x00, 0x00, 0xFF },
+ 		},
++		.bgr888_result = {
++			.dst_pitch = TEST_USE_DEFAULT_PITCH,
++			.expected = { 0xFF, 0x00, 0x00 },
++		},
+ 		.argb8888_result = {
+ 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
+ 			.expected = { 0xFFFF0000 },
+@@ -217,6 +227,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
+ 			.expected = { 0x00, 0x00, 0xFF },
+ 		},
++		.bgr888_result = {
++			.dst_pitch = TEST_USE_DEFAULT_PITCH,
++			.expected = { 0xFF, 0x00, 0x00 },
++		},
+ 		.argb8888_result = {
+ 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
+ 			.expected = { 0xFFFF0000 },
+@@ -330,6 +344,15 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 				0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+ 			},
+ 		},
++		.bgr888_result = {
++			.dst_pitch = TEST_USE_DEFAULT_PITCH,
++			.expected = {
++				0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
++				0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00,
++				0x00, 0x00, 0xFF, 0xFF, 0x00, 0xFF,
++				0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF,
++			},
++		},
+ 		.argb8888_result = {
+ 			.dst_pitch = TEST_USE_DEFAULT_PITCH,
+ 			.expected = {
+@@ -468,6 +491,17 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 			},
+ 		},
++		.bgr888_result = {
++			.dst_pitch = 15,
++			.expected = {
++				0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, 0xA8, 0xF3, 0x03,
++				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++				0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05,
++				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++				0xA8, 0x03, 0x03, 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C,
++				0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++			},
++		},
+ 		.argb8888_result = {
+ 			.dst_pitch = 20,
+ 			.expected = {
+@@ -914,6 +948,52 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
+ 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
+ }
+ 
++static void drm_test_fb_xrgb8888_to_bgr888(struct kunit *test)
++{
++	const struct convert_xrgb8888_case *params = test->param_value;
++	const struct convert_to_bgr888_result *result = &params->bgr888_result;
++	size_t dst_size;
++	u8 *buf = NULL;
++	__le32 *xrgb8888 = NULL;
++	struct iosys_map dst, src;
++
++	struct drm_framebuffer fb = {
++		.format = drm_format_info(DRM_FORMAT_XRGB8888),
++		.pitches = { params->pitch, 0, 0 },
++	};
++
++	dst_size = conversion_buf_size(DRM_FORMAT_BGR888, result->dst_pitch,
++				       &params->clip, 0);
++	KUNIT_ASSERT_GT(test, dst_size, 0);
++
++	buf = kunit_kzalloc(test, dst_size, GFP_KERNEL);
++	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
++	iosys_map_set_vaddr(&dst, buf);
++
++	xrgb8888 = cpubuf_to_le32(test, params->xrgb8888, TEST_BUF_SIZE);
++	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xrgb8888);
++	iosys_map_set_vaddr(&src, xrgb8888);
++
++	/*
++	 * BGR888 expected results are already in little-endian
++	 * order, so there's no need to convert the test output.
++	 */
++	drm_fb_xrgb8888_to_bgr888(&dst, &result->dst_pitch, &src, &fb, &params->clip,
++				  &fmtcnv_state);
++	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
++
++	buf = dst.vaddr; /* restore original value of buf */
++	memset(buf, 0, dst_size);
++
++	int blit_result = 0;
++
++	blit_result = drm_fb_blit(&dst, &result->dst_pitch, DRM_FORMAT_BGR888, &src, &fb, &params->clip,
++				  &fmtcnv_state);
++
++	KUNIT_EXPECT_FALSE(test, blit_result);
++	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
++}
++
+ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test)
+ {
+ 	const struct convert_xrgb8888_case *params = test->param_value;
+@@ -1851,6 +1931,7 @@ static struct kunit_case drm_format_helper_test_cases[] = {
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb1555, convert_xrgb8888_gen_params),
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgba5551, convert_xrgb8888_gen_params),
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgb888, convert_xrgb8888_gen_params),
++	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_bgr888, convert_xrgb8888_gen_params),
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb8888, convert_xrgb8888_gen_params),
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params),
+ 	KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params),
+diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
+index f6889f649bc1..559a97bce12c 100644
+--- a/drivers/gpu/drm/tiny/Kconfig
++++ b/drivers/gpu/drm/tiny/Kconfig
+@@ -1,5 +1,17 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ 
++config DRM_APPLETBDRM
++	tristate "DRM support for Apple Touch Bars"
++	depends on DRM && USB && MMU
++	select DRM_KMS_HELPER
++	select DRM_GEM_SHMEM_HELPER
++	help
++	  Say Y here if you want support for the display of Touch Bars on x86
++	  MacBook Pros.
++
++	  To compile this driver as a module, choose M here: the
++	  module will be called appletbdrm.
++
+ config DRM_ARCPGU
+ 	tristate "ARC PGU"
+ 	depends on DRM && OF
+diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile
+index 76dde89a044b..9a1b412e764a 100644
+--- a/drivers/gpu/drm/tiny/Makefile
++++ b/drivers/gpu/drm/tiny/Makefile
+@@ -1,5 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ 
++obj-$(CONFIG_DRM_APPLETBDRM)		+= appletbdrm.o
+ obj-$(CONFIG_DRM_ARCPGU)		+= arcpgu.o
+ obj-$(CONFIG_DRM_BOCHS)			+= bochs.o
+ obj-$(CONFIG_DRM_CIRRUS_QEMU)		+= cirrus.o
+diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c
+new file mode 100644
+index 000000000000..b9440ce0064e
+--- /dev/null
++++ b/drivers/gpu/drm/tiny/appletbdrm.c
+@@ -0,0 +1,624 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar DRM Driver
++ *
++ * Copyright (c) 2023 Kerem Karabay <kekrby@gmail.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <asm/unaligned.h>
++
++#include <linux/usb.h>
++#include <linux/module.h>
++
++#include <drm/drm_drv.h>
++#include <drm/drm_fourcc.h>
++#include <drm/drm_probe_helper.h>
++#include <drm/drm_atomic_helper.h>
++#include <drm/drm_damage_helper.h>
++#include <drm/drm_format_helper.h>
++#include <drm/drm_gem_shmem_helper.h>
++#include <drm/drm_gem_atomic_helper.h>
++#include <drm/drm_simple_kms_helper.h>
++#include <drm/drm_gem_framebuffer_helper.h>
++
++#define _APPLETBDRM_FOURCC(s)		(((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3])
++#define APPLETBDRM_FOURCC(s)		_APPLETBDRM_FOURCC(#s)
++
++#define APPLETBDRM_PIXEL_FORMAT		APPLETBDRM_FOURCC(RGBA) /* The actual format is BGR888 */
++#define APPLETBDRM_BITS_PER_PIXEL	24
++
++#define APPLETBDRM_MSG_CLEAR_DISPLAY	APPLETBDRM_FOURCC(CLRD)
++#define APPLETBDRM_MSG_GET_INFORMATION	APPLETBDRM_FOURCC(GINF)
++#define APPLETBDRM_MSG_UPDATE_COMPLETE	APPLETBDRM_FOURCC(UDCL)
++#define APPLETBDRM_MSG_SIGNAL_READINESS	APPLETBDRM_FOURCC(REDY)
++
++#define APPLETBDRM_BULK_MSG_TIMEOUT	1000
++
++#define drm_to_adev(_drm)		container_of(_drm, struct appletbdrm_device, drm)
++#define adev_to_udev(adev)		interface_to_usbdev(to_usb_interface(adev->dev))
++
++struct appletbdrm_device {
++	struct device *dev;
++
++	u8 in_ep;
++	u8 out_ep;
++
++	u32 width;
++	u32 height;
++
++	struct drm_device drm;
++	struct drm_display_mode mode;
++	struct drm_connector connector;
++	struct drm_simple_display_pipe pipe;
++
++	bool readiness_signal_received;
++};
++
++struct appletbdrm_request_header {
++	__le16 unk_00;
++	__le16 unk_02;
++	__le32 unk_04;
++	__le32 unk_08;
++	__le32 size;
++} __packed;
++
++struct appletbdrm_response_header {
++	u8 unk_00[16];
++	u32 msg;
++} __packed;
++
++struct appletbdrm_simple_request {
++	struct appletbdrm_request_header header;
++	u32 msg;
++	u8 unk_14[8];
++	__le32 size;
++} __packed;
++
++struct appletbdrm_information {
++	struct appletbdrm_response_header header;
++	u8 unk_14[12];
++	__le32 width;
++	__le32 height;
++	u8 bits_per_pixel;
++	__le32 bytes_per_row;
++	__le32 orientation;
++	__le32 bitmap_info;
++	u32 pixel_format;
++	__le32 width_inches;	/* floating point */
++	__le32 height_inches;	/* floating point */
++} __packed;
++
++struct appletbdrm_frame {
++	__le16 begin_x;
++	__le16 begin_y;
++	__le16 width;
++	__le16 height;
++	__le32 buf_size;
++	u8 buf[];
++} __packed;
++
++struct appletbdrm_fb_request_footer {
++	u8 unk_00[12];
++	__le32 unk_0c;
++	u8 unk_10[12];
++	__le32 unk_1c;
++	__le64 timestamp;
++	u8 unk_28[12];
++	__le32 unk_34;
++	u8 unk_38[20];
++	__le32 unk_4c;
++} __packed;
++
++struct appletbdrm_fb_request {
++	struct appletbdrm_request_header header;
++	__le16 unk_10;
++	u8 msg_id;
++	u8 unk_13[29];
++	/*
++	 * Contents of `data`:
++	 * - struct appletbdrm_frame frames[];
++	 * - struct appletbdrm_fb_request_footer footer;
++	 * - padding to make the total size a multiple of 16
++	 */
++	u8 data[];
++} __packed;
++
++struct appletbdrm_fb_request_response {
++	struct appletbdrm_response_header header;
++	u8 unk_14[12];
++	__le64 timestamp;
++} __packed;
++
++static int appletbdrm_send_request(struct appletbdrm_device *adev,
++				   struct appletbdrm_request_header *request, size_t size)
++{
++	struct usb_device *udev = adev_to_udev(adev);
++	struct drm_device *drm = &adev->drm;
++	int ret, actual_size;
++
++	ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, adev->out_ep),
++			   request, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
++	if (ret) {
++		drm_err(drm, "Failed to send message (%pe)\n", ERR_PTR(ret));
++		return ret;
++	}
++
++	if (actual_size != size) {
++		drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
++			actual_size, size);
++		return -EIO;
++	}
++
++	return ret;
++}
++
++static int appletbdrm_read_response(struct appletbdrm_device *adev,
++				    struct appletbdrm_response_header *response,
++				    size_t size, u32 expected_response)
++{
++	struct usb_device *udev = adev_to_udev(adev);
++	struct drm_device *drm = &adev->drm;
++	int ret, actual_size;
++
++retry:
++	ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, adev->in_ep),
++			   response, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
++	if (ret) {
++		drm_err(drm, "Failed to read response (%pe)\n", ERR_PTR(ret));
++		return ret;
++	}
++
++	/*
++	 * The device responds to the first request sent in a particular
++	 * timeframe after the USB device configuration is set with a readiness
++	 * signal, in which case the response should be read again
++	 */
++	if (response->msg == APPLETBDRM_MSG_SIGNAL_READINESS) {
++		if (!adev->readiness_signal_received) {
++			adev->readiness_signal_received = true;
++			goto retry;
++		}
++
++		drm_err(drm, "Encountered unexpected readiness signal\n");
++		return -EIO;
++	}
++
++	if (actual_size != size) {
++		drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
++			actual_size, size);
++		return -EIO;
++	}
++
++	if (response->msg != expected_response) {
++		drm_err(drm, "Unexpected response from device (expected %p4ch found %p4ch)\n",
++			&expected_response, &response->msg);
++		return -EIO;
++	}
++
++	return 0;
++}
++
++static int appletbdrm_send_msg(struct appletbdrm_device *adev, u32 msg)
++{
++	struct appletbdrm_simple_request *request;
++	int ret;
++
++	request = kzalloc(sizeof(*request), GFP_KERNEL);
++	if (!request)
++		return -ENOMEM;
++
++	request->header.unk_00 = cpu_to_le16(2);
++	request->header.unk_02 = cpu_to_le16(0x1512);
++	request->header.size = cpu_to_le32(sizeof(*request) - sizeof(request->header));
++	request->msg = msg;
++	request->size = request->header.size;
++
++	ret = appletbdrm_send_request(adev, &request->header, sizeof(*request));
++
++	kfree(request);
++
++	return ret;
++}
++
++static int appletbdrm_clear_display(struct appletbdrm_device *adev)
++{
++	return appletbdrm_send_msg(adev, APPLETBDRM_MSG_CLEAR_DISPLAY);
++}
++
++static int appletbdrm_signal_readiness(struct appletbdrm_device *adev)
++{
++	return appletbdrm_send_msg(adev, APPLETBDRM_MSG_SIGNAL_READINESS);
++}
++
++static int appletbdrm_get_information(struct appletbdrm_device *adev)
++{
++	struct appletbdrm_information *info;
++	struct drm_device *drm = &adev->drm;
++	u8 bits_per_pixel;
++	u32 pixel_format;
++	int ret;
++
++	info = kzalloc(sizeof(*info), GFP_KERNEL);
++	if (!info)
++		return -ENOMEM;
++
++	ret = appletbdrm_send_msg(adev, APPLETBDRM_MSG_GET_INFORMATION);
++	if (ret)
++		return ret;
++
++	ret = appletbdrm_read_response(adev, &info->header, sizeof(*info),
++				       APPLETBDRM_MSG_GET_INFORMATION);
++	if (ret)
++		goto free_info;
++
++	bits_per_pixel = info->bits_per_pixel;
++	pixel_format = get_unaligned(&info->pixel_format);
++
++	adev->width = get_unaligned_le32(&info->width);
++	adev->height = get_unaligned_le32(&info->height);
++
++	if (bits_per_pixel != APPLETBDRM_BITS_PER_PIXEL) {
++		drm_err(drm, "Encountered unexpected bits per pixel value (%d)\n", bits_per_pixel);
++		ret = -EINVAL;
++		goto free_info;
++	}
++
++	if (pixel_format != APPLETBDRM_PIXEL_FORMAT) {
++		drm_err(drm, "Encountered unknown pixel format (%p4ch)\n", &pixel_format);
++		ret = -EINVAL;
++		goto free_info;
++	}
++
++free_info:
++	kfree(info);
++
++	return ret;
++}
++
++static u32 rect_size(struct drm_rect *rect)
++{
++	return drm_rect_width(rect) * drm_rect_height(rect) * (APPLETBDRM_BITS_PER_PIXEL / 8);
++}
++
++static int appletbdrm_flush_damage(struct appletbdrm_device *adev,
++				   struct drm_plane_state *old_state,
++				   struct drm_plane_state *state)
++{
++	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state);
++	struct appletbdrm_fb_request_response *response;
++	struct appletbdrm_fb_request_footer *footer;
++	struct drm_atomic_helper_damage_iter iter;
++	struct drm_framebuffer *fb = state->fb;
++	struct appletbdrm_fb_request *request;
++	struct drm_device *drm = &adev->drm;
++	struct appletbdrm_frame *frame;
++	u64 timestamp = ktime_get_ns();
++	struct drm_rect damage;
++	size_t frames_size = 0;
++	size_t request_size;
++	int ret;
++
++	drm_atomic_helper_damage_iter_init(&iter, old_state, state);
++	drm_atomic_for_each_plane_damage(&iter, &damage) {
++		frames_size += struct_size(frame, buf, rect_size(&damage));
++	}
++
++	if (!frames_size)
++		return 0;
++
++	request_size = ALIGN(sizeof(*request) + frames_size + sizeof(*footer), 16);
++
++	request = kzalloc(request_size, GFP_KERNEL);
++	if (!request)
++		return -ENOMEM;
++
++	response = kzalloc(sizeof(*response), GFP_KERNEL);
++	if (!response) {
++		ret = -ENOMEM;
++		goto free_request;
++	}
++
++	ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
++	if (ret) {
++		drm_err(drm, "Failed to start CPU framebuffer access (%pe)\n", ERR_PTR(ret));
++		goto free_response;
++	}
++
++	request->header.unk_00 = cpu_to_le16(2);
++	request->header.unk_02 = cpu_to_le16(0x12);
++	request->header.unk_04 = cpu_to_le32(9);
++	request->header.size = cpu_to_le32(request_size - sizeof(request->header));
++	request->unk_10 = cpu_to_le16(1);
++	request->msg_id = timestamp & 0xff;
++
++	frame = (struct appletbdrm_frame *)request->data;
++
++	drm_atomic_helper_damage_iter_init(&iter, old_state, state);
++	drm_atomic_for_each_plane_damage(&iter, &damage) {
++		struct iosys_map dst = IOSYS_MAP_INIT_VADDR(frame->buf);
++		u32 buf_size = rect_size(&damage);
++
++		/*
++		 * The coordinates need to be translated to the coordinate
++		 * system the device expects, see the comment in
++		 * appletbdrm_setup_mode_config
++		 */
++		frame->begin_x = cpu_to_le16(damage.y1);
++		frame->begin_y = cpu_to_le16(adev->height - damage.x2);
++		frame->width = cpu_to_le16(drm_rect_height(&damage));
++		frame->height = cpu_to_le16(drm_rect_width(&damage));
++		frame->buf_size = cpu_to_le32(buf_size);
++
++		ret = drm_fb_blit(&dst, NULL, DRM_FORMAT_BGR888,
++				  &shadow_plane_state->data[0], fb, &damage, &shadow_plane_state->fmtcnv_state);
++		if (ret) {
++			drm_err(drm, "Failed to copy damage clip (%pe)\n", ERR_PTR(ret));
++			goto end_fb_cpu_access;
++		}
++
++		frame = (void *)frame + struct_size(frame, buf, buf_size);
++	}
++
++	footer = (struct appletbdrm_fb_request_footer *)&request->data[frames_size];
++
++	footer->unk_0c = cpu_to_le32(0xfffe);
++	footer->unk_1c = cpu_to_le32(0x80001);
++	footer->unk_34 = cpu_to_le32(0x80002);
++	footer->unk_4c = cpu_to_le32(0xffff);
++	footer->timestamp = cpu_to_le64(timestamp);
++
++	ret = appletbdrm_send_request(adev, &request->header, request_size);
++	if (ret)
++		goto end_fb_cpu_access;
++
++	ret = appletbdrm_read_response(adev, &response->header, sizeof(*response),
++				       APPLETBDRM_MSG_UPDATE_COMPLETE);
++	if (ret)
++		goto end_fb_cpu_access;
++
++	if (response->timestamp != footer->timestamp) {
++		drm_err(drm, "Response timestamp (%llu) doesn't match request timestamp (%llu)\n",
++			le64_to_cpu(response->timestamp), timestamp);
++		goto end_fb_cpu_access;
++	}
++
++end_fb_cpu_access:
++	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
++free_response:
++	kfree(response);
++free_request:
++	kfree(request);
++
++	return ret;
++}
++
++static int appletbdrm_connector_helper_get_modes(struct drm_connector *connector)
++{
++	struct appletbdrm_device *adev = drm_to_adev(connector->dev);
++
++	return drm_connector_helper_get_modes_fixed(connector, &adev->mode);
++}
++
++static enum drm_mode_status appletbdrm_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
++						       const struct drm_display_mode *mode)
++{
++	struct drm_crtc *crtc = &pipe->crtc;
++	struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
++
++	return drm_crtc_helper_mode_valid_fixed(crtc, mode, &adev->mode);
++}
++
++static void appletbdrm_pipe_disable(struct drm_simple_display_pipe *pipe)
++{
++	struct appletbdrm_device *adev = drm_to_adev(pipe->crtc.dev);
++	int idx;
++
++	if (!drm_dev_enter(&adev->drm, &idx))
++		return;
++
++	appletbdrm_clear_display(adev);
++
++	drm_dev_exit(idx);
++}
++
++static void appletbdrm_pipe_update(struct drm_simple_display_pipe *pipe,
++				   struct drm_plane_state *old_state)
++{
++	struct drm_crtc *crtc = &pipe->crtc;
++	struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
++	int idx;
++
++	if (!crtc->state->active || !drm_dev_enter(&adev->drm, &idx))
++		return;
++
++	appletbdrm_flush_damage(adev, old_state, pipe->plane.state);
++
++	drm_dev_exit(idx);
++}
++
++static const u32 appletbdrm_formats[] = {
++	DRM_FORMAT_BGR888,
++	DRM_FORMAT_XRGB8888, /* emulated */
++};
++
++static const struct drm_mode_config_funcs appletbdrm_mode_config_funcs = {
++	.fb_create = drm_gem_fb_create_with_dirty,
++	.atomic_check = drm_atomic_helper_check,
++	.atomic_commit = drm_atomic_helper_commit,
++};
++
++static const struct drm_connector_funcs appletbdrm_connector_funcs = {
++	.reset = drm_atomic_helper_connector_reset,
++	.destroy = drm_connector_cleanup,
++	.fill_modes = drm_helper_probe_single_connector_modes,
++	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
++	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
++};
++
++static const struct drm_connector_helper_funcs appletbdrm_connector_helper_funcs = {
++	.get_modes = appletbdrm_connector_helper_get_modes,
++};
++
++static const struct drm_simple_display_pipe_funcs appletbdrm_pipe_funcs = {
++	DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS,
++	.update = appletbdrm_pipe_update,
++	.disable = appletbdrm_pipe_disable,
++	.mode_valid = appletbdrm_pipe_mode_valid,
++};
++
++DEFINE_DRM_GEM_FOPS(appletbdrm_drm_fops);
++
++static const struct drm_driver appletbdrm_drm_driver = {
++	DRM_GEM_SHMEM_DRIVER_OPS,
++	.name			= "appletbdrm",
++	.desc			= "Apple Touch Bar DRM Driver",
++	.date			= "20230910",
++	.major			= 1,
++	.minor			= 0,
++	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
++	.fops			= &appletbdrm_drm_fops,
++};
++
++static int appletbdrm_setup_mode_config(struct appletbdrm_device *adev)
++{
++	struct drm_connector *connector = &adev->connector;
++	struct drm_device *drm = &adev->drm;
++	struct device *dev = adev->dev;
++	int ret;
++
++	ret = drmm_mode_config_init(drm);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to initialize mode configuration\n");
++
++	/*
++	 * The coordinate system used by the device is different from the
++	 * coordinate system of the framebuffer in that the x and y axes are
++	 * swapped, and that the y axis is inverted; so what the device reports
++	 * as the height is actually the width of the framebuffer and vice
++	 * versa
++	 */
++	drm->mode_config.min_width = 0;
++	drm->mode_config.min_height = 0;
++	drm->mode_config.max_width = max(adev->height, DRM_SHADOW_PLANE_MAX_WIDTH);
++	drm->mode_config.max_height = max(adev->width, DRM_SHADOW_PLANE_MAX_HEIGHT);
++	drm->mode_config.preferred_depth = APPLETBDRM_BITS_PER_PIXEL;
++	drm->mode_config.funcs = &appletbdrm_mode_config_funcs;
++
++	adev->mode = (struct drm_display_mode) {
++		DRM_MODE_INIT(60, adev->height, adev->width,
++			      DRM_MODE_RES_MM(adev->height, 218),
++			      DRM_MODE_RES_MM(adev->width, 218))
++	};
++
++	ret = drm_connector_init(drm, connector,
++				 &appletbdrm_connector_funcs, DRM_MODE_CONNECTOR_USB);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to initialize connector\n");
++
++	drm_connector_helper_add(connector, &appletbdrm_connector_helper_funcs);
++
++	ret = drm_connector_set_panel_orientation(connector,
++						  DRM_MODE_PANEL_ORIENTATION_RIGHT_UP);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to set panel orientation\n");
++
++	connector->display_info.non_desktop = true;
++	ret = drm_object_property_set_value(&connector->base,
++					    drm->mode_config.non_desktop_property, true);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to set non-desktop property\n");
++
++	ret = drm_simple_display_pipe_init(drm, &adev->pipe, &appletbdrm_pipe_funcs,
++					   appletbdrm_formats, ARRAY_SIZE(appletbdrm_formats),
++					   NULL, &adev->connector);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to initialize simple display pipe\n");
++
++	drm_plane_enable_fb_damage_clips(&adev->pipe.plane);
++
++	drm_mode_config_reset(drm);
++
++	ret = drm_dev_register(drm, 0);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to register DRM device\n");
++
++	return 0;
++}
++
++static int appletbdrm_probe(struct usb_interface *intf,
++			    const struct usb_device_id *id)
++{
++	struct usb_endpoint_descriptor *bulk_in, *bulk_out;
++	struct device *dev = &intf->dev;
++	struct appletbdrm_device *adev;
++	int ret;
++
++	ret = usb_find_common_endpoints(intf->cur_altsetting, &bulk_in, &bulk_out, NULL, NULL);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to find bulk endpoints\n");
++
++	adev = devm_drm_dev_alloc(dev, &appletbdrm_drm_driver, struct appletbdrm_device, drm);
++	if (IS_ERR(adev))
++		return PTR_ERR(adev);
++
++	adev->dev = dev;
++	adev->in_ep = bulk_in->bEndpointAddress;
++	adev->out_ep = bulk_out->bEndpointAddress;
++
++	usb_set_intfdata(intf, adev);
++
++	ret = appletbdrm_get_information(adev);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to get display information\n");
++
++	ret = appletbdrm_signal_readiness(adev);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to signal readiness\n");
++
++	ret = appletbdrm_clear_display(adev);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to clear display\n");
++
++	return appletbdrm_setup_mode_config(adev);
++}
++
++static void appletbdrm_disconnect(struct usb_interface *intf)
++{
++	struct appletbdrm_device *adev = usb_get_intfdata(intf);
++	struct drm_device *drm = &adev->drm;
++
++	drm_dev_unplug(drm);
++	drm_atomic_helper_shutdown(drm);
++}
++
++static void appletbdrm_shutdown(struct usb_interface *intf)
++{
++	struct appletbdrm_device *adev = usb_get_intfdata(intf);
++
++	/*
++	 * The framebuffer needs to be cleared on shutdown since its content
++	 * persists across boots
++	 */
++	drm_atomic_helper_shutdown(&adev->drm);
++}
++
++static const struct usb_device_id appletbdrm_usb_id_table[] = {
++	{ USB_DEVICE_INTERFACE_CLASS(0x05ac, 0x8302, USB_CLASS_AUDIO_VIDEO) },
++	{}
++};
++MODULE_DEVICE_TABLE(usb, appletbdrm_usb_id_table);
++
++static struct usb_driver appletbdrm_usb_driver = {
++	.name		= "appletbdrm",
++	.probe		= appletbdrm_probe,
++	.disconnect	= appletbdrm_disconnect,
++	.shutdown	= appletbdrm_shutdown,
++	.id_table	= appletbdrm_usb_id_table,
++};
++module_usb_driver(appletbdrm_usb_driver);
++
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("Apple Touch Bar DRM Driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
+index 365e6ddbe90f..cf357cd3389d 100644
+--- a/drivers/gpu/vga/vga_switcheroo.c
++++ b/drivers/gpu/vga/vga_switcheroo.c
+@@ -438,12 +438,7 @@ find_active_client(struct list_head *head)
+ bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev)
+ {
+ 	if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+-		/*
+-		 * apple-gmux is needed on pre-retina MacBook Pro
+-		 * to probe the panel if pdev is the inactive GPU.
+-		 */
+-		if (apple_gmux_present() && pdev != vga_default_device() &&
+-		    !vgasr_priv.handler_flags)
++		if (apple_gmux_present() && !vgasr_priv.handler_flags)
+ 			return true;
+ 	}
+ 
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index 08446c89eff6..35ef5d4ef068 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -148,6 +148,27 @@ config HID_APPLEIR
+ 
+ 	Say Y here if you want support for Apple infrared remote control.
+ 
++config HID_APPLETB_BL
++	tristate "Apple Touch Bar Backlight"
++	depends on BACKLIGHT_CLASS_DEVICE
++	help
++	  Say Y here if you want support for the backlight of Touch Bars on x86
++	  MacBook Pros.
++
++	  To compile this driver as a module, choose M here: the
++	  module will be called hid-appletb-bl.
++
++config HID_APPLETB_KBD
++	tristate "Apple Touch Bar Keyboard Mode"
++	depends on USB_HID
++	help
++	  Say Y here if you want support for the keyboard mode (escape,
++	  function, media and brightness keys) of Touch Bars on x86 MacBook
++	  Pros.
++
++	  To compile this driver as a module, choose M here: the
++	  module will be called hid-appletb-kbd.
++
+ config HID_ASUS
+ 	tristate "Asus"
+ 	depends on USB_HID
+@@ -723,6 +744,7 @@ config HID_MULTITOUCH
+ 	  Say Y here if you have one of the following devices:
+ 	  - 3M PCT touch screens
+ 	  - ActionStar dual touch panels
++	  - Touch Bars on x86 MacBook Pros
+ 	  - Atmel panels
+ 	  - Cando dual touch panels
+ 	  - Chunghwa panels
+diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
+index e40f1ddebbb7..d903c9a2629d 100644
+--- a/drivers/hid/Makefile
++++ b/drivers/hid/Makefile
+@@ -29,6 +29,8 @@ obj-$(CONFIG_HID_ALPS)		+= hid-alps.o
+ obj-$(CONFIG_HID_ACRUX)		+= hid-axff.o
+ obj-$(CONFIG_HID_APPLE)		+= hid-apple.o
+ obj-$(CONFIG_HID_APPLEIR)	+= hid-appleir.o
++obj-$(CONFIG_HID_APPLETB_BL)	+= hid-appletb-bl.o
++obj-$(CONFIG_HID_APPLETB_KBD)	+= hid-appletb-kbd.o
+ obj-$(CONFIG_HID_CREATIVE_SB0540)	+= hid-creative-sb0540.o
+ obj-$(CONFIG_HID_ASUS)		+= hid-asus.o
+ obj-$(CONFIG_HID_AUREAL)	+= hid-aureal.o
+diff --git a/drivers/hid/hid-appletb-bl.c b/drivers/hid/hid-appletb-bl.c
+new file mode 100644
+index 000000000000..819157686e59
+--- /dev/null
++++ b/drivers/hid/hid-appletb-bl.c
+@@ -0,0 +1,207 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar Backlight Driver
++ *
++ * Copyright (c) 2017-2018 Ronald Tschalär
++ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/hid.h>
++#include <linux/backlight.h>
++#include <linux/device.h>
++
++#include "hid-ids.h"
++
++#define APPLETB_BL_ON			1
++#define APPLETB_BL_DIM			3
++#define APPLETB_BL_OFF			4
++
++#define HID_UP_APPLEVENDOR_TB_BL	0xff120000
++
++#define HID_VD_APPLE_TB_BRIGHTNESS	0xff120001
++#define HID_USAGE_AUX1			0xff120020
++#define HID_USAGE_BRIGHTNESS		0xff120021
++
++static int appletb_bl_def_brightness = 2;
++module_param_named(brightness, appletb_bl_def_brightness, int, 0444);
++MODULE_PARM_DESC(brightness, "Default brightness:\n"
++			 "    0 - Touchbar is off\n"
++			 "    1 - Dim brightness\n"
++			 "    [2] - Full brightness");
++
++struct appletb_bl {
++	struct hid_field *aux1_field, *brightness_field;
++	struct backlight_device *bdev;
++
++	bool full_on;
++};
++
++static const u8 appletb_bl_brightness_map[] = {
++	APPLETB_BL_OFF,
++	APPLETB_BL_DIM,
++	APPLETB_BL_ON,
++};
++
++static int appletb_bl_set_brightness(struct appletb_bl *bl, u8 brightness)
++{
++	struct hid_report *report = bl->brightness_field->report;
++	struct hid_device *hdev = report->device;
++	int ret;
++
++	ret = hid_set_field(bl->aux1_field, 0, 1);
++	if (ret) {
++		hid_err(hdev, "Failed to set auxiliary field (%pe)\n", ERR_PTR(ret));
++		return ret;
++	}
++
++	ret = hid_set_field(bl->brightness_field, 0, brightness);
++	if (ret) {
++		hid_err(hdev, "Failed to set brightness field (%pe)\n", ERR_PTR(ret));
++		return ret;
++	}
++
++	if (!bl->full_on) {
++		ret = hid_hw_power(hdev, PM_HINT_FULLON);
++		if (ret < 0) {
++			hid_err(hdev, "Device didn't power on (%pe)\n", ERR_PTR(ret));
++			return ret;
++		}
++
++		bl->full_on = true;
++	}
++
++	hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
++
++	if (brightness == APPLETB_BL_OFF) {
++		hid_hw_power(hdev, PM_HINT_NORMAL);
++		bl->full_on = false;
++	}
++
++	return 0;
++}
++
++static int appletb_bl_update_status(struct backlight_device *bdev)
++{
++	struct appletb_bl *bl = bl_get_data(bdev);
++	u8 brightness;
++
++	if (backlight_is_blank(bdev))
++		brightness = APPLETB_BL_OFF;
++	else
++		brightness = appletb_bl_brightness_map[backlight_get_brightness(bdev)];
++
++	return appletb_bl_set_brightness(bl, brightness);
++}
++
++static const struct backlight_ops appletb_bl_backlight_ops = {
++	.options = BL_CORE_SUSPENDRESUME,
++	.update_status = appletb_bl_update_status,
++};
++
++static int appletb_bl_probe(struct hid_device *hdev, const struct hid_device_id *id)
++{
++	struct hid_field *aux1_field, *brightness_field;
++	struct backlight_properties bl_props = { 0 };
++	struct device *dev = &hdev->dev;
++	struct appletb_bl *bl;
++	int ret;
++
++	ret = hid_parse(hdev);
++	if (ret)
++		return dev_err_probe(dev, ret, "HID parse failed\n");
++
++	aux1_field = hid_find_field(hdev, HID_FEATURE_REPORT,
++				    HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_AUX1);
++
++	brightness_field = hid_find_field(hdev, HID_FEATURE_REPORT,
++					  HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_BRIGHTNESS);
++
++	if (!aux1_field || !brightness_field)
++		return -ENODEV;
++
++	if (aux1_field->report != brightness_field->report)
++		return dev_err_probe(dev, -ENODEV, "Encountered unexpected report structure\n");
++
++	bl = devm_kzalloc(dev, sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return -ENOMEM;
++
++	ret = hid_hw_start(hdev, HID_CONNECT_DRIVER);
++	if (ret)
++		return dev_err_probe(dev, ret, "HID hardware start failed\n");
++
++	ret = hid_hw_open(hdev);
++	if (ret) {
++		dev_err_probe(dev, ret, "HID hardware open failed\n");
++		goto stop_hw;
++	}
++
++	bl->aux1_field = aux1_field;
++	bl->brightness_field = brightness_field;
++
++	if (appletb_bl_def_brightness == 0)
++		ret = appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
++	else if (appletb_bl_def_brightness == 1)
++		ret = appletb_bl_set_brightness(bl, APPLETB_BL_DIM);
++	else
++		ret = appletb_bl_set_brightness(bl, APPLETB_BL_ON);
++
++	if (ret) {
++		dev_err_probe(dev, ret, "Failed to set touch bar brightness to off\n");
++		goto close_hw;
++	}
++
++	bl_props.type = BACKLIGHT_RAW;
++	bl_props.max_brightness = ARRAY_SIZE(appletb_bl_brightness_map) - 1;
++
++	bl->bdev = devm_backlight_device_register(dev, "appletb_backlight", dev, bl,
++						  &appletb_bl_backlight_ops, &bl_props);
++	if (IS_ERR(bl->bdev)) {
++		ret = PTR_ERR(bl->bdev);
++		dev_err_probe(dev, ret, "Failed to register backlight device\n");
++		goto close_hw;
++	}
++
++	hid_set_drvdata(hdev, bl);
++
++	return 0;
++
++close_hw:
++	hid_hw_close(hdev);
++stop_hw:
++	hid_hw_stop(hdev);
++
++	return ret;
++}
++
++static void appletb_bl_remove(struct hid_device *hdev)
++{
++	struct appletb_bl *bl = hid_get_drvdata(hdev);
++
++	appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
++
++	hid_hw_close(hdev);
++	hid_hw_stop(hdev);
++}
++
++static const struct hid_device_id appletb_bl_hid_ids[] = {
++	/* MacBook Pro's 2018, 2019, with T2 chip: iBridge DFR Brightness */
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
++	{ }
++};
++MODULE_DEVICE_TABLE(hid, appletb_bl_hid_ids);
++
++static struct hid_driver appletb_bl_hid_driver = {
++	.name = "hid-appletb-bl",
++	.id_table = appletb_bl_hid_ids,
++	.probe = appletb_bl_probe,
++	.remove = appletb_bl_remove,
++};
++module_hid_driver(appletb_bl_hid_driver);
++
++MODULE_AUTHOR("Ronald Tschalär");
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("MacBookPro Touch Bar Backlight Driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c
+new file mode 100644
+index 000000000000..c26b7a19a5e4
+--- /dev/null
++++ b/drivers/hid/hid-appletb-kbd.c
+@@ -0,0 +1,501 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar Keyboard Mode Driver
++ *
++ * Copyright (c) 2017-2018 Ronald Tschalär
++ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
++ * Copyright (c) 2024 Aditya Garg <gargaditya08@live.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/hid.h>
++#include <linux/usb.h>
++#include <linux/input.h>
++#include <linux/sysfs.h>
++#include <linux/bitops.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/backlight.h>
++#include <linux/timer.h>
++#include <linux/input/sparse-keymap.h>
++
++#include "hid-ids.h"
++
++#define APPLETB_KBD_MODE_ESC	0
++#define APPLETB_KBD_MODE_FN	1
++#define APPLETB_KBD_MODE_SPCL	2
++#define APPLETB_KBD_MODE_OFF	3
++#define APPLETB_KBD_MODE_MAX	APPLETB_KBD_MODE_OFF
++
++#define APPLETB_DEVID_KEYBOARD	1
++#define APPLETB_DEVID_TRACKPAD	2
++
++#define HID_USAGE_MODE		0x00ff0004
++
++static int appletb_tb_def_mode = APPLETB_KBD_MODE_SPCL;
++module_param_named(mode, appletb_tb_def_mode, int, 0444);
++MODULE_PARM_DESC(mode, "Default touchbar mode:\n"
++			 "    0 - escape key only\n"
++			 "    1 - function-keys\n"
++			 "    [2] - special keys");
++
++static bool appletb_tb_fn_toggle = true;
++module_param_named(fntoggle, appletb_tb_fn_toggle, bool, 0644);
++MODULE_PARM_DESC(fntoggle, "Switch between Fn and media controls on pressing Fn key");
++
++static bool appletb_tb_autodim = true;
++module_param_named(autodim, appletb_tb_autodim, bool, 0644);
++MODULE_PARM_DESC(autodim, "Automatically dim touchbar if left idle");
++
++static int appletb_tb_dim_timeout = 60;
++module_param_named(dim_timeout, appletb_tb_dim_timeout, int, 0644);
++MODULE_PARM_DESC(dim_timeout, "Dim timeout in sec");
++
++static int appletb_tb_idle_timeout = 15;
++module_param_named(idle_timeout, appletb_tb_idle_timeout, int, 0644);
++MODULE_PARM_DESC(idle_timeout, "Idle timeout in sec");
++
++struct appletb_kbd {
++	struct hid_field *mode_field;
++	struct input_handler inp_handler;
++	struct input_handle kbd_handle;
++	struct input_handle tpd_handle;
++	struct backlight_device *backlight_dev;
++	struct timer_list inactivity_timer;
++	bool has_dimmed;
++	bool has_turned_off;
++	u8 saved_mode;
++	u8 current_mode;
++};
++
++static const struct key_entry appletb_kbd_keymap[] = {
++	{ KE_KEY, KEY_ESC, { KEY_ESC } },
++	{ KE_KEY, KEY_F1,  { KEY_BRIGHTNESSDOWN } },
++	{ KE_KEY, KEY_F2,  { KEY_BRIGHTNESSUP } },
++	{ KE_KEY, KEY_F3,  { KEY_RESERVED } },
++	{ KE_KEY, KEY_F4,  { KEY_RESERVED } },
++	{ KE_KEY, KEY_F5,  { KEY_KBDILLUMDOWN } },
++	{ KE_KEY, KEY_F6,  { KEY_KBDILLUMUP } },
++	{ KE_KEY, KEY_F7,  { KEY_PREVIOUSSONG } },
++	{ KE_KEY, KEY_F8,  { KEY_PLAYPAUSE } },
++	{ KE_KEY, KEY_F9,  { KEY_NEXTSONG } },
++	{ KE_KEY, KEY_F10, { KEY_MUTE } },
++	{ KE_KEY, KEY_F11, { KEY_VOLUMEDOWN } },
++	{ KE_KEY, KEY_F12, { KEY_VOLUMEUP } },
++	{ KE_END, 0 }
++};
++
++static int appletb_kbd_set_mode(struct appletb_kbd *kbd, u8 mode)
++{
++	struct hid_report *report = kbd->mode_field->report;
++	struct hid_device *hdev = report->device;
++	int ret;
++
++	ret = hid_hw_power(hdev, PM_HINT_FULLON);
++	if (ret) {
++		hid_err(hdev, "Device didn't resume (%pe)\n", ERR_PTR(ret));
++		return ret;
++	}
++
++	ret = hid_set_field(kbd->mode_field, 0, mode);
++	if (ret) {
++		hid_err(hdev, "Failed to set mode field to %u (%pe)\n", mode, ERR_PTR(ret));
++		goto power_normal;
++	}
++
++	hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
++
++	kbd->current_mode = mode;
++
++power_normal:
++	hid_hw_power(hdev, PM_HINT_NORMAL);
++
++	return ret;
++}
++
++static ssize_t mode_show(struct device *dev,
++			 struct device_attribute *attr, char *buf)
++{
++	struct appletb_kbd *kbd = dev_get_drvdata(dev);
++
++	return sysfs_emit(buf, "%d\n", kbd->current_mode);
++}
++
++static ssize_t mode_store(struct device *dev,
++			  struct device_attribute *attr,
++			  const char *buf, size_t size)
++{
++	struct appletb_kbd *kbd = dev_get_drvdata(dev);
++	u8 mode;
++	int ret;
++
++	ret = kstrtou8(buf, 0, &mode);
++	if (ret)
++		return ret;
++
++	if (mode > APPLETB_KBD_MODE_MAX)
++		return -EINVAL;
++
++	ret = appletb_kbd_set_mode(kbd, mode);
++
++	return ret < 0 ? ret : size;
++}
++static DEVICE_ATTR_RW(mode);
++
++struct attribute *appletb_kbd_attrs[] = {
++	&dev_attr_mode.attr,
++	NULL
++};
++ATTRIBUTE_GROUPS(appletb_kbd);
++
++static int appletb_tb_key_to_slot(unsigned int code)
++{
++	switch (code) {
++	case KEY_ESC:
++		return 0;
++	case KEY_F1 ... KEY_F10:
++		return code - KEY_F1 + 1;
++	case KEY_F11 ... KEY_F12:
++		return code - KEY_F11 + 11;
++
++	default:
++		return -EINVAL;
++	}
++}
++
++static void appletb_inactivity_timer(struct timer_list *t)
++{
++	struct appletb_kbd *kbd = from_timer(kbd, t, inactivity_timer);
++
++	if (kbd->backlight_dev && appletb_tb_autodim) {
++		if (!kbd->has_dimmed) {
++			backlight_device_set_brightness(kbd->backlight_dev, 1);
++			kbd->has_dimmed = true;
++			mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_idle_timeout * 1000));
++		} else if (!kbd->has_turned_off) {
++			backlight_device_set_brightness(kbd->backlight_dev, 0);
++			kbd->has_turned_off = true;
++		}
++	}
++}
++
++static void reset_inactivity_timer(struct appletb_kbd *kbd)
++{
++	if ((kbd->has_dimmed || kbd->has_turned_off) && kbd->backlight_dev) {
++		backlight_device_set_brightness(kbd->backlight_dev, 2);
++		if (appletb_tb_autodim) {
++			kbd->has_dimmed = false;
++			kbd->has_turned_off = false;
++			mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_dim_timeout * 1000));
++		}
++	}
++}
++
++static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field,
++				      struct hid_usage *usage, __s32 value)
++{
++	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++	struct key_entry *translation;
++	struct input_dev *input;
++	int slot;
++
++	if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD || usage->type != EV_KEY)
++		return 0;
++
++	input = field->hidinput->input;
++
++	/*
++	 * Skip non-touch-bar keys.
++	 *
++	 * Either the touch bar itself or usbhid generate a slew of key-down
++	 * events for all the meta keys. None of which we're at all interested
++	 * in.
++	 */
++	slot = appletb_tb_key_to_slot(usage->code);
++	if (slot < 0)
++		return 0;
++
++	reset_inactivity_timer(kbd);
++
++	translation = sparse_keymap_entry_from_scancode(input, usage->code);
++
++	if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) {
++		input_event(input, usage->type, translation->keycode, value);
++
++		return 1;
++	}
++
++	return kbd->current_mode == APPLETB_KBD_MODE_OFF;
++}
++
++static void appletb_kbd_inp_event(struct input_handle *handle, unsigned int type,
++			      unsigned int code, int value)
++{
++	struct appletb_kbd *kbd = handle->private;
++
++	reset_inactivity_timer(kbd);
++
++	if (type == EV_KEY && code == KEY_FN && appletb_tb_fn_toggle) {
++		if (value == 1) {
++			kbd->saved_mode = kbd->current_mode;
++			if (kbd->current_mode == APPLETB_KBD_MODE_SPCL)
++				appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_FN);
++			else if (kbd->current_mode == APPLETB_KBD_MODE_FN)
++				appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_SPCL);
++		} else if (value == 0) {
++			if (kbd->saved_mode != kbd->current_mode)
++				appletb_kbd_set_mode(kbd, kbd->saved_mode);
++		}
++	}
++}
++
++static int appletb_kbd_inp_connect(struct input_handler *handler,
++			       struct input_dev *dev,
++			       const struct input_device_id *id)
++{
++	struct appletb_kbd *kbd = handler->private;
++	struct input_handle *handle;
++	int rc;
++
++	if (id->driver_info == APPLETB_DEVID_KEYBOARD) {
++		handle = &kbd->kbd_handle;
++		handle->name = "tbkbd";
++	} else if (id->driver_info == APPLETB_DEVID_TRACKPAD) {
++		handle = &kbd->tpd_handle;
++		handle->name = "tbtpd";
++	} else {
++		return -ENOENT;
++	}
++
++	if (handle->dev)
++		return -EEXIST;
++
++	handle->open = 0;
++	handle->dev = input_get_device(dev);
++	handle->handler = handler;
++	handle->private = kbd;
++
++	rc = input_register_handle(handle);
++	if (rc)
++		goto err_free_dev;
++
++	rc = input_open_device(handle);
++	if (rc)
++		goto err_unregister_handle;
++
++	return 0;
++
++ err_unregister_handle:
++	input_unregister_handle(handle);
++ err_free_dev:
++	input_put_device(handle->dev);
++	handle->dev = NULL;
++	return rc;
++}
++
++static void appletb_kbd_inp_disconnect(struct input_handle *handle)
++{
++	input_close_device(handle);
++	input_unregister_handle(handle);
++
++	input_put_device(handle->dev);
++	handle->dev = NULL;
++}
++
++static int appletb_kbd_input_configured(struct hid_device *hdev, struct hid_input *hidinput)
++{
++	int idx;
++	struct input_dev *input = hidinput->input;
++
++	/*
++	 * Clear various input capabilities that are blindly set by the hid
++	 * driver (usbkbd.c)
++	 */
++	memset(input->evbit, 0, sizeof(input->evbit));
++	memset(input->keybit, 0, sizeof(input->keybit));
++	memset(input->ledbit, 0, sizeof(input->ledbit));
++
++	__set_bit(EV_REP, input->evbit);
++
++	sparse_keymap_setup(input, appletb_kbd_keymap, NULL);
++
++	for (idx = 0; appletb_kbd_keymap[idx].type != KE_END; idx++) {
++		input_set_capability(input, EV_KEY, appletb_kbd_keymap[idx].code);
++	}
++
++	return 0;
++}
++
++static const struct input_device_id appletb_kbd_input_devices[] = {
++	{
++		.flags = INPUT_DEVICE_ID_MATCH_BUS |
++			INPUT_DEVICE_ID_MATCH_VENDOR |
++			INPUT_DEVICE_ID_MATCH_KEYBIT,
++		.bustype = BUS_USB,
++		.vendor = USB_VENDOR_ID_APPLE,
++		.keybit = { [BIT_WORD(KEY_FN)] = BIT_MASK(KEY_FN) },
++		.driver_info = APPLETB_DEVID_KEYBOARD,
++	},
++	{
++		.flags = INPUT_DEVICE_ID_MATCH_BUS |
++			INPUT_DEVICE_ID_MATCH_VENDOR |
++			INPUT_DEVICE_ID_MATCH_KEYBIT,
++		.bustype = BUS_USB,
++		.vendor = USB_VENDOR_ID_APPLE,
++		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
++		.driver_info = APPLETB_DEVID_TRACKPAD,
++	},
++	{ }
++};
++
++static bool appletb_kbd_match_internal_device(struct input_handler *handler,
++					  struct input_dev *inp_dev)
++{
++	struct device *dev = &inp_dev->dev;
++
++	/* in kernel: dev && !is_usb_device(dev) */
++	while (dev && !(dev->type && dev->type->name &&
++			!strcmp(dev->type->name, "usb_device")))
++		dev = dev->parent;
++
++	/*
++	 * Apple labels all their internal keyboards and trackpads as such,
++	 * instead of maintaining an ever expanding list of product-id's we
++	 * just look at the device's product name.
++	 */
++	if (dev)
++		return !!strstr(to_usb_device(dev)->product, "Internal Keyboard");
++
++	return false;
++}
++
++static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id)
++{
++	struct appletb_kbd *kbd;
++	struct device *dev = &hdev->dev;
++	struct hid_field *mode_field;
++	int ret;
++
++	ret = hid_parse(hdev);
++	if (ret)
++		return dev_err_probe(dev, ret, "HID parse failed\n");
++
++	mode_field = hid_find_field(hdev, HID_OUTPUT_REPORT,
++				    HID_GD_KEYBOARD, HID_USAGE_MODE);
++	if (!mode_field)
++		return -ENODEV;
++
++	kbd = devm_kzalloc(dev, sizeof(*kbd), GFP_KERNEL);
++	if (!kbd)
++		return -ENOMEM;
++
++	kbd->mode_field = mode_field;
++
++	ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT);
++	if (ret)
++		return dev_err_probe(dev, ret, "HID hw start failed\n");
++
++	ret = hid_hw_open(hdev);
++	if (ret) {
++		dev_err_probe(dev, ret, "HID hw open failed\n");
++		goto stop_hw;
++	}
++
++	timer_setup(&kbd->inactivity_timer, appletb_inactivity_timer, 0);
++	mod_timer(&kbd->inactivity_timer, jiffies + msecs_to_jiffies(appletb_tb_dim_timeout * 1000));
++	kbd->backlight_dev = backlight_device_get_by_name("appletb_backlight");
++		if (!kbd->backlight_dev)
++			dev_err_probe(dev, ret, "Failed to get backlight device\n");
++
++	kbd->inp_handler.event = appletb_kbd_inp_event;
++	kbd->inp_handler.connect = appletb_kbd_inp_connect;
++	kbd->inp_handler.disconnect = appletb_kbd_inp_disconnect;
++	kbd->inp_handler.name = "appletb";
++	kbd->inp_handler.id_table = appletb_kbd_input_devices;
++	kbd->inp_handler.match = appletb_kbd_match_internal_device;
++	kbd->inp_handler.private = kbd;
++
++	ret = input_register_handler(&kbd->inp_handler);
++	if (ret) {
++		dev_err_probe(dev, ret, "Unable to register keyboard handler\n");
++		goto close_hw;
++	}
++
++	ret = appletb_kbd_set_mode(kbd, appletb_tb_def_mode);
++	if (ret) {
++		dev_err_probe(dev, ret, "Failed to set touchbar mode\n");
++		goto close_hw;
++	}
++
++	hid_set_drvdata(hdev, kbd);
++
++	return 0;
++
++close_hw:
++	hid_hw_close(hdev);
++stop_hw:
++	hid_hw_stop(hdev);
++	return ret;
++}
++
++static void appletb_kbd_remove(struct hid_device *hdev)
++{
++	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++	appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
++
++	input_unregister_handler(&kbd->inp_handler);
++	del_timer_sync(&kbd->inactivity_timer);
++
++	hid_hw_close(hdev);
++	hid_hw_stop(hdev);
++}
++
++#ifdef CONFIG_PM
++static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg)
++{
++	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++	kbd->saved_mode = kbd->current_mode;
++	appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
++
++	return 0;
++}
++
++static int appletb_kbd_reset_resume(struct hid_device *hdev)
++{
++	struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++	appletb_kbd_set_mode(kbd, kbd->saved_mode);
++
++	return 0;
++}
++#endif
++
++static const struct hid_device_id appletb_kbd_hid_ids[] = {
++	/* MacBook Pro's 2018, 2019, with T2 chip: iBridge Display */
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++	{ }
++};
++MODULE_DEVICE_TABLE(hid, appletb_kbd_hid_ids);
++
++static struct hid_driver appletb_kbd_hid_driver = {
++	.name = "hid-appletb-kbd",
++	.id_table = appletb_kbd_hid_ids,
++	.probe = appletb_kbd_probe,
++	.remove = appletb_kbd_remove,
++	.event = appletb_kbd_hid_event,
++	.input_configured = appletb_kbd_input_configured,
++#ifdef CONFIG_PM
++	.suspend = appletb_kbd_suspend,
++	.reset_resume = appletb_kbd_reset_resume,
++#endif
++	.driver.dev_groups = appletb_kbd_groups,
++};
++module_hid_driver(appletb_kbd_hid_driver);
++
++MODULE_AUTHOR("Ronald Tschalär");
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 988d0acbdf04..caeba5487b69 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1912,6 +1912,31 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
+ }
+ EXPORT_SYMBOL_GPL(hid_set_field);
+ 
++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
++				 unsigned int application, unsigned int usage)
++{
++	struct list_head *report_list = &hdev->report_enum[report_type].report_list;
++	struct hid_report *report;
++	int i, j;
++
++	list_for_each_entry(report, report_list, list) {
++		if (report->application != application)
++			continue;
++
++		for (i = 0; i < report->maxfield; i++) {
++			struct hid_field *field = report->field[i];
++
++			for (j = 0; j < field->maxusage; j++) {
++				if (field->usage[j].hid == usage)
++					return field;
++			}
++		}
++	}
++
++	return NULL;
++}
++EXPORT_SYMBOL_GPL(hid_find_field);
++
+ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
+ 		const u8 *data)
+ {
+diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
+index 6e4ebc349e45..4e79fafeeafa 100644
+--- a/drivers/hid/hid-google-hammer.c
++++ b/drivers/hid/hid-google-hammer.c
+@@ -418,38 +418,15 @@ static int hammer_event(struct hid_device *hid, struct hid_field *field,
+ 	return 0;
+ }
+ 
+-static bool hammer_has_usage(struct hid_device *hdev, unsigned int report_type,
+-			unsigned application, unsigned usage)
+-{
+-	struct hid_report_enum *re = &hdev->report_enum[report_type];
+-	struct hid_report *report;
+-	int i, j;
+-
+-	list_for_each_entry(report, &re->report_list, list) {
+-		if (report->application != application)
+-			continue;
+-
+-		for (i = 0; i < report->maxfield; i++) {
+-			struct hid_field *field = report->field[i];
+-
+-			for (j = 0; j < field->maxusage; j++)
+-				if (field->usage[j].hid == usage)
+-					return true;
+-		}
+-	}
+-
+-	return false;
+-}
+-
+ static bool hammer_has_folded_event(struct hid_device *hdev)
+ {
+-	return hammer_has_usage(hdev, HID_INPUT_REPORT,
++	return !!hid_find_field(hdev, HID_INPUT_REPORT,
+ 				HID_GD_KEYBOARD, HID_USAGE_KBD_FOLDED);
+ }
+ 
+ static bool hammer_has_backlight_control(struct hid_device *hdev)
+ {
+-	return hammer_has_usage(hdev, HID_OUTPUT_REPORT,
++	return !!hid_find_field(hdev, HID_OUTPUT_REPORT,
+ 				HID_GD_KEYBOARD, HID_AD_BRIGHTNESS);
+ }
+ 
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 847462650549..6c4cb3883955 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -72,6 +72,7 @@ MODULE_LICENSE("GPL");
+ #define MT_QUIRK_FORCE_MULTI_INPUT	BIT(20)
+ #define MT_QUIRK_DISABLE_WAKEUP		BIT(21)
+ #define MT_QUIRK_ORIENTATION_INVERT	BIT(22)
++#define MT_QUIRK_TOUCH_IS_TIPSTATE	BIT(23)
+ 
+ #define MT_INPUTMODE_TOUCHSCREEN	0x02
+ #define MT_INPUTMODE_TOUCHPAD		0x03
+@@ -145,6 +146,7 @@ struct mt_class {
+ 	__s32 sn_height;	/* Signal/noise ratio for height events */
+ 	__s32 sn_pressure;	/* Signal/noise ratio for pressure events */
+ 	__u8 maxcontacts;
++	bool is_direct;	/* true for touchscreens */
+ 	bool is_indirect;	/* true for touchpads */
+ 	bool export_all_inputs;	/* do not ignore mouse, keyboards, etc... */
+ };
+@@ -212,6 +214,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
+ #define MT_CLS_GOOGLE				0x0111
+ #define MT_CLS_RAZER_BLADE_STEALTH		0x0112
+ #define MT_CLS_SMART_TECH			0x0113
++#define MT_CLS_APPLE_TOUCHBAR			0x0114
+ 
+ #define MT_DEFAULT_MAXCONTACT	10
+ #define MT_MAX_MAXCONTACT	250
+@@ -396,6 +399,13 @@ static const struct mt_class mt_classes[] = {
+ 			MT_QUIRK_CONTACT_CNT_ACCURATE |
+ 			MT_QUIRK_SEPARATE_APP_REPORT,
+ 	},
++	{ .name = MT_CLS_APPLE_TOUCHBAR,
++		.quirks = MT_QUIRK_HOVERING |
++			MT_QUIRK_TOUCH_IS_TIPSTATE |
++			MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE,
++		.is_direct = true,
++		.maxcontacts = 11,
++	},
+ 	{ }
+ };
+ 
+@@ -489,9 +499,6 @@ static void mt_feature_mapping(struct hid_device *hdev,
+ 		if (!td->maxcontacts &&
+ 		    field->logical_maximum <= MT_MAX_MAXCONTACT)
+ 			td->maxcontacts = field->logical_maximum;
+-		if (td->mtclass.maxcontacts)
+-			/* check if the maxcontacts is given by the class */
+-			td->maxcontacts = td->mtclass.maxcontacts;
+ 
+ 		break;
+ 	case HID_DG_BUTTONTYPE:
+@@ -565,13 +572,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td,
+ 	mt_application->application = application;
+ 	INIT_LIST_HEAD(&mt_application->mt_usages);
+ 
+-	if (application == HID_DG_TOUCHSCREEN)
++	if (application == HID_DG_TOUCHSCREEN && !td->mtclass.is_indirect)
+ 		mt_application->mt_flags |= INPUT_MT_DIRECT;
+ 
+ 	/*
+ 	 * Model touchscreens providing buttons as touchpads.
+ 	 */
+-	if (application == HID_DG_TOUCHPAD) {
++	if (application == HID_DG_TOUCHPAD && !td->mtclass.is_direct) {
+ 		mt_application->mt_flags |= INPUT_MT_POINTER;
+ 		td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
+ 	}
+@@ -635,7 +642,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
+ 
+ 		if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) {
+ 			for (n = 0; n < field->report_count; n++) {
+-				if (field->usage[n].hid == HID_DG_CONTACTID) {
++				unsigned int hid = field->usage[n].hid;
++
++				if (hid == HID_DG_CONTACTID || hid == HID_DG_TRANSDUCER_INDEX) {
+ 					rdata->is_mt_collection = true;
+ 					break;
+ 				}
+@@ -807,6 +816,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ 
+ 			MT_STORE_FIELD(confidence_state);
+ 			return 1;
++		case HID_DG_TOUCH:
++			/*
++			 * Legacy devices use TIPSWITCH and not TOUCH.
++			 * Let's just ignore this field unless the quirk is set.
++			 */
++			if (!(cls->quirks & MT_QUIRK_TOUCH_IS_TIPSTATE))
++				return -1;
++
++			fallthrough;
+ 		case HID_DG_TIPSWITCH:
+ 			if (field->application != HID_GD_SYSTEM_MULTIAXIS)
+ 				input_set_capability(hi->input,
+@@ -814,6 +832,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ 			MT_STORE_FIELD(tip_state);
+ 			return 1;
+ 		case HID_DG_CONTACTID:
++		case HID_DG_TRANSDUCER_INDEX:
+ 			MT_STORE_FIELD(contactid);
+ 			app->touches_by_report++;
+ 			return 1;
+@@ -869,10 +888,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ 		case HID_DG_CONTACTMAX:
+ 			/* contact max are global to the report */
+ 			return -1;
+-		case HID_DG_TOUCH:
+-			/* Legacy devices use TIPSWITCH and not TOUCH.
+-			 * Let's just ignore this field. */
+-			return -1;
+ 		}
+ 		/* let hid-input decide for the others */
+ 		return 0;
+@@ -1300,6 +1315,10 @@ static int mt_touch_input_configured(struct hid_device *hdev,
+ 	struct input_dev *input = hi->input;
+ 	int ret;
+ 
++	/* check if the maxcontacts is given by the class */
++	if (cls->maxcontacts)
++		td->maxcontacts = cls->maxcontacts;
++
+ 	if (!td->maxcontacts)
+ 		td->maxcontacts = MT_DEFAULT_MAXCONTACT;
+ 
+@@ -1307,6 +1326,9 @@ static int mt_touch_input_configured(struct hid_device *hdev,
+ 	if (td->serial_maybe)
+ 		mt_post_parse_default_settings(td, app);
+ 
++	if (cls->is_direct)
++		app->mt_flags |= INPUT_MT_DIRECT;
++
+ 	if (cls->is_indirect)
+ 		app->mt_flags |= INPUT_MT_POINTER;
+ 
+@@ -1758,6 +1780,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ 		}
+ 	}
+ 
++	ret = hid_parse(hdev);
++	if (ret != 0)
++		return ret;
++
++	if (mtclass->name == MT_CLS_APPLE_TOUCHBAR &&
++	    !hid_find_field(hdev, HID_INPUT_REPORT,
++			    HID_DG_TOUCHPAD, HID_DG_TRANSDUCER_INDEX))
++		return -ENODEV;
++
+ 	td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL);
+ 	if (!td) {
+ 		dev_err(&hdev->dev, "cannot allocate multitouch data\n");
+@@ -1805,10 +1836,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ 
+ 	timer_setup(&td->release_timer, mt_expired_timeout, 0);
+ 
+-	ret = hid_parse(hdev);
+-	if (ret != 0)
+-		return ret;
+-
+ 	if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID)
+ 		mt_fix_const_fields(hdev, HID_DG_CONTACTID);
+ 
+@@ -2277,6 +2304,11 @@ static const struct hid_device_id mt_devices[] = {
+ 		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
+ 			USB_DEVICE_ID_XIROKU_CSR2) },
+ 
++	/* Apple Touch Bars */
++	{ .driver_data = MT_CLS_APPLE_TOUCHBAR,
++		HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
++			       USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++
+ 	/* Google MT devices */
+ 	{ .driver_data = MT_CLS_GOOGLE,
+ 		HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index e0bbf0c6345d..7c576d6540fe 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -328,8 +328,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) },
+-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
+-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
+ #endif
+ #if IS_ENABLED(CONFIG_HID_APPLEIR)
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+@@ -338,6 +336,12 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+ #endif
++#if IS_ENABLED(CONFIG_HID_APPLETB_BL)
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
++#endif
++#if IS_ENABLED(CONFIG_HID_APPLETB_KBD)
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++#endif
+ #if IS_ENABLED(CONFIG_HID_ASUS)
+ 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
+ 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index fc6d6a9053ce..698f44794453 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -6,6 +6,7 @@
+  *
+  * Copyright (C) 2007 Nicolas Boichat <nicolas@boichat.ch>
+  * Copyright (C) 2010 Henrik Rydberg <rydberg@euromail.se>
++ * Copyright (C) 2019 Paul Pawlowski <paul@mrarm.io>
+  *
+  * Based on hdaps.c driver:
+  * Copyright (C) 2005 Robert Love <rml@novell.com>
+@@ -18,7 +19,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
+ #include <linux/delay.h>
+-#include <linux/platform_device.h>
++#include <linux/acpi.h>
+ #include <linux/input.h>
+ #include <linux/kernel.h>
+ #include <linux/slab.h>
+@@ -35,12 +36,24 @@
+ #include <linux/bits.h>
+ 
+ /* data port used by Apple SMC */
+-#define APPLESMC_DATA_PORT	0x300
++#define APPLESMC_DATA_PORT	0
+ /* command/status port used by Apple SMC */
+-#define APPLESMC_CMD_PORT	0x304
++#define APPLESMC_CMD_PORT	4
+ 
+ #define APPLESMC_NR_PORTS	32 /* 0x300-0x31f */
+ 
++#define APPLESMC_IOMEM_KEY_DATA	0
++#define APPLESMC_IOMEM_KEY_STATUS	0x4005
++#define APPLESMC_IOMEM_KEY_NAME	0x78
++#define APPLESMC_IOMEM_KEY_DATA_LEN	0x7D
++#define APPLESMC_IOMEM_KEY_SMC_ID	0x7E
++#define APPLESMC_IOMEM_KEY_CMD		0x7F
++#define APPLESMC_IOMEM_MIN_SIZE	0x4006
++
++#define APPLESMC_IOMEM_KEY_TYPE_CODE		0
++#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN	5
++#define APPLESMC_IOMEM_KEY_TYPE_FLAGS		6
++
+ #define APPLESMC_MAX_DATA_LENGTH 32
+ 
+ /* Apple SMC status bits */
+@@ -74,6 +87,7 @@
+ #define FAN_ID_FMT		"F%dID" /* r-o char[16] */
+ 
+ #define TEMP_SENSOR_TYPE	"sp78"
++#define FLOAT_TYPE		"flt "
+ 
+ /* List of keys used to read/write fan speeds */
+ static const char *const fan_speed_fmt[] = {
+@@ -83,6 +97,7 @@ static const char *const fan_speed_fmt[] = {
+ 	"F%dSf",		/* safe speed - not all models */
+ 	"F%dTg",		/* target speed (manual: rw) */
+ };
++#define FAN_MANUAL_FMT "F%dMd"
+ 
+ #define INIT_TIMEOUT_MSECS	5000	/* wait up to 5s for device init ... */
+ #define INIT_WAIT_MSECS		50	/* ... in 50ms increments */
+@@ -119,7 +134,7 @@ struct applesmc_entry {
+ };
+ 
+ /* Register lookup and registers common to all SMCs */
+-static struct applesmc_registers {
++struct applesmc_registers {
+ 	struct mutex mutex;		/* register read/write mutex */
+ 	unsigned int key_count;		/* number of SMC registers */
+ 	unsigned int fan_count;		/* number of fans */
+@@ -133,26 +148,38 @@ static struct applesmc_registers {
+ 	bool init_complete;		/* true when fully initialized */
+ 	struct applesmc_entry *cache;	/* cached key entries */
+ 	const char **index;		/* temperature key index */
+-} smcreg = {
+-	.mutex = __MUTEX_INITIALIZER(smcreg.mutex),
+ };
+ 
+-static const int debug;
+-static struct platform_device *pdev;
+-static s16 rest_x;
+-static s16 rest_y;
+-static u8 backlight_state[2];
++struct applesmc_device {
++	struct acpi_device *dev;
++	struct device *ldev;
++	struct applesmc_registers reg;
+ 
+-static struct device *hwmon_dev;
+-static struct input_dev *applesmc_idev;
++	bool port_base_set, iomem_base_set;
++	u16 port_base;
++	u8 *__iomem iomem_base;
++	u32 iomem_base_addr, iomem_base_size;
+ 
+-/*
+- * Last index written to key_at_index sysfs file, and value to use for all other
+- * key_at_index_* sysfs files.
+- */
+-static unsigned int key_at_index;
++	s16 rest_x;
++	s16 rest_y;
++
++	u8 backlight_state[2];
++
++	struct device *hwmon_dev;
++	struct input_dev *idev;
++
++	/*
++	 * Last index written to key_at_index sysfs file, and value to use for all other
++	 * key_at_index_* sysfs files.
++	 */
++	unsigned int key_at_index;
+ 
+-static struct workqueue_struct *applesmc_led_wq;
++	struct workqueue_struct *backlight_wq;
++	struct work_struct backlight_work;
++	struct led_classdev backlight_dev;
++};
++
++static const int debug;
+ 
+ /*
+  * Wait for specific status bits with a mask on the SMC.
+@@ -162,7 +189,7 @@ static struct workqueue_struct *applesmc_led_wq;
+  * run out past 500ms.
+  */
+ 
+-static int wait_status(u8 val, u8 mask)
++static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask)
+ {
+ 	u8 status;
+ 	int us;
+@@ -170,7 +197,7 @@ static int wait_status(u8 val, u8 mask)
+ 
+ 	us = APPLESMC_MIN_WAIT;
+ 	for (i = 0; i < 24 ; i++) {
+-		status = inb(APPLESMC_CMD_PORT);
++		status = inb(smc->port_base + APPLESMC_CMD_PORT);
+ 		if ((status & mask) == val)
+ 			return 0;
+ 		usleep_range(us, us * 2);
+@@ -180,13 +207,13 @@ static int wait_status(u8 val, u8 mask)
+ 	return -EIO;
+ }
+ 
+-/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
++/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
+ 
+-static int send_byte(u8 cmd, u16 port)
++static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
+ {
+ 	int status;
+ 
+-	status = wait_status(0, SMC_STATUS_IB_CLOSED);
++	status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ 	if (status)
+ 		return status;
+ 	/*
+@@ -195,24 +222,25 @@ static int send_byte(u8 cmd, u16 port)
+ 	 * this extra read may not happen if status returns both
+ 	 * simultaneously and this would appear to be required.
+ 	 */
+-	status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY);
++	status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
+ 	if (status)
+ 		return status;
+ 
+-	outb(cmd, port);
++	outb(cmd, smc->port_base + port);
+ 	return 0;
+ }
+ 
+-/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
++/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
+ 
+-static int send_command(u8 cmd)
++static int port_send_command(struct applesmc_device *smc, u8 cmd)
+ {
+ 	int ret;
+ 
+-	ret = wait_status(0, SMC_STATUS_IB_CLOSED);
++	ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ 	if (ret)
+ 		return ret;
+-	outb(cmd, APPLESMC_CMD_PORT);
++
++	outb(cmd, smc->port_base + APPLESMC_CMD_PORT);
+ 	return 0;
+ }
+ 
+@@ -222,110 +250,304 @@ static int send_command(u8 cmd)
+  * If busy is stuck high after the command then the SMC is jammed.
+  */
+ 
+-static int smc_sane(void)
++static int port_smc_sane(struct applesmc_device *smc)
+ {
+ 	int ret;
+ 
+-	ret = wait_status(0, SMC_STATUS_BUSY);
++	ret = port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ 	if (!ret)
+ 		return ret;
+-	ret = send_command(APPLESMC_READ_CMD);
++	ret = port_send_command(smc, APPLESMC_READ_CMD);
+ 	if (ret)
+ 		return ret;
+-	return wait_status(0, SMC_STATUS_BUSY);
++	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+ 
+-static int send_argument(const char *key)
++static int port_send_argument(struct applesmc_device *smc, const char *key)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 4; i++)
+-		if (send_byte(key[i], APPLESMC_DATA_PORT))
++		if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT))
+ 			return -EIO;
+ 	return 0;
+ }
+ 
+-static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
++static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++	u8 *buffer, u8 len)
+ {
+ 	u8 status, data = 0;
+ 	int i;
+ 	int ret;
+ 
+-	ret = smc_sane();
++	ret = port_smc_sane(smc);
+ 	if (ret)
+ 		return ret;
+ 
+-	if (send_command(cmd) || send_argument(key)) {
++	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
+ 		pr_warn("%.4s: read arg fail\n", key);
+ 		return -EIO;
+ 	}
+ 
+ 	/* This has no effect on newer (2012) SMCs */
+-	if (send_byte(len, APPLESMC_DATA_PORT)) {
++	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ 		pr_warn("%.4s: read len fail\n", key);
+ 		return -EIO;
+ 	}
+ 
+ 	for (i = 0; i < len; i++) {
+-		if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
++		if (port_wait_status(smc,
++				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
+ 				SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) {
+ 			pr_warn("%.4s: read data[%d] fail\n", key, i);
+ 			return -EIO;
+ 		}
+-		buffer[i] = inb(APPLESMC_DATA_PORT);
++		buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT);
+ 	}
+ 
+ 	/* Read the data port until bit0 is cleared */
+ 	for (i = 0; i < 16; i++) {
+ 		udelay(APPLESMC_MIN_WAIT);
+-		status = inb(APPLESMC_CMD_PORT);
++		status = inb(smc->port_base + APPLESMC_CMD_PORT);
+ 		if (!(status & SMC_STATUS_AWAITING_DATA))
+ 			break;
+-		data = inb(APPLESMC_DATA_PORT);
++		data = inb(smc->port_base + APPLESMC_DATA_PORT);
+ 	}
+ 	if (i)
+ 		pr_warn("flushed %d bytes, last value is: %d\n", i, data);
+ 
+-	return wait_status(0, SMC_STATUS_BUSY);
++	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+ 
+-static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len)
++static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++	const u8 *buffer, u8 len)
+ {
+ 	int i;
+ 	int ret;
+ 
+-	ret = smc_sane();
++	ret = port_smc_sane(smc);
+ 	if (ret)
+ 		return ret;
+ 
+-	if (send_command(cmd) || send_argument(key)) {
++	if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
+ 		pr_warn("%s: write arg fail\n", key);
+ 		return -EIO;
+ 	}
+ 
+-	if (send_byte(len, APPLESMC_DATA_PORT)) {
++	if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ 		pr_warn("%.4s: write len fail\n", key);
+ 		return -EIO;
+ 	}
+ 
+ 	for (i = 0; i < len; i++) {
+-		if (send_byte(buffer[i], APPLESMC_DATA_PORT)) {
++		if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
+ 			pr_warn("%s: write data fail\n", key);
+ 			return -EIO;
+ 		}
+ 	}
+ 
+-	return wait_status(0, SMC_STATUS_BUSY);
++	return port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+ 
+-static int read_register_count(unsigned int *count)
++static int port_get_smc_key_info(struct applesmc_device *smc,
++	const char *key, struct applesmc_entry *info)
+ {
+-	__be32 be;
+ 	int ret;
++	u8 raw[6];
+ 
+-	ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
++	ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6);
+ 	if (ret)
+ 		return ret;
++	info->len = raw[0];
++	memcpy(info->type, &raw[1], 4);
++	info->flags = raw[5];
++	return 0;
++}
++
++
++/*
++ * MMIO based communication.
++ * TODO: Use updated mechanism for cmd timeout/retry
++ */
++
++static void iomem_clear_status(struct applesmc_device *smc)
++{
++	if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS))
++		iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++}
++
++static int iomem_wait_read(struct applesmc_device *smc)
++{
++	u8 status;
++	int us;
++	int i;
++
++	us = APPLESMC_MIN_WAIT;
++	for (i = 0; i < 24 ; i++) {
++		status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++		if (status & 0x20)
++			return 0;
++		usleep_range(us, us * 2);
++		if (i > 9)
++			us <<= 1;
++	}
++
++	dev_warn(smc->ldev, "%s... timeout\n", __func__);
++	return -EIO;
++}
++
++static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++	u8 *buffer, u8 len)
++{
++	u8 err, remote_len;
++	u32 key_int = *((u32 *) key);
++
++	iomem_clear_status(smc);
++	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++	if (iomem_wait_read(smc))
++		return -EIO;
++
++	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++	if (err != 0) {
++		dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n",
++				cmd, key_int, key, err);
++		return -EIO;
++	}
++
++	if (cmd == APPLESMC_READ_CMD) {
++		remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
++		if (remote_len != len) {
++			dev_warn(smc->ldev,
++				 "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n",
++				 cmd, key_int, key, remote_len, len);
++			return -EINVAL;
++		}
++	} else {
++		remote_len = len;
++	}
++
++	memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA,
++			remote_len);
++
++	dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n",
++			cmd, key_int, key, len, remote_len);
++	print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len);
++	return 0;
++}
++
++static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key,
++	struct applesmc_entry *e)
++{
++	u8 err;
++	u8 cmd = APPLESMC_GET_KEY_TYPE_CMD;
++	u32 key_int = *((u32 *) key);
++
++	iomem_clear_status(smc);
++	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++	if (iomem_wait_read(smc))
++		return -EIO;
++
++	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++	if (err != 0) {
++		dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err);
++		return -EIO;
++	}
++
++	e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN);
++	*((uint32_t *) e->type) = ioread32(
++			smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE);
++	e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS);
++
++	dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n",
++		key, e->len, e->type, e->flags);
++	return 0;
++}
++
++static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++	const u8 *buffer, u8 len)
++{
++	u8 err;
++	u32 key_int = *((u32 *) key);
++
++	iomem_clear_status(smc);
++	iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++	memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len);
++	iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
++	iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++	iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++	if (iomem_wait_read(smc))
++		return -EIO;
++
++	err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++	if (err != 0) {
++		dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err);
++		print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
++		return -EIO;
++	}
++
++	dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len);
++	print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
++	return 0;
++}
++
++
++static int read_smc(struct applesmc_device *smc, const char *key,
++	u8 *buffer, u8 len)
++{
++	if (smc->iomem_base_set)
++		return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
++	else
++		return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
++}
++
++static int write_smc(struct applesmc_device *smc, const char *key,
++	const u8 *buffer, u8 len)
++{
++	if (smc->iomem_base_set)
++		return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
++	else
++		return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
++}
++
++static int get_smc_key_by_index(struct applesmc_device *smc,
++	unsigned int index, char *key)
++{
++	__be32 be;
++
++	be = cpu_to_be32(index);
++	if (smc->iomem_base_set)
++		return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
++							  (const char *) &be, (u8 *) key, 4);
++	else
++		return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
++							 (const char *) &be, (u8 *) key, 4);
++}
++
++static int get_smc_key_info(struct applesmc_device *smc, const char *key,
++	struct applesmc_entry *info)
++{
++	if (smc->iomem_base_set)
++		return iomem_get_smc_key_type(smc, key, info);
++	else
++		return port_get_smc_key_info(smc, key, info);
++}
++
++static int read_register_count(struct applesmc_device *smc,
++	unsigned int *count)
++{
++	__be32 be;
++	int ret;
++
++	ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4);
++	if (ret < 0)
++		return ret;
+ 
+ 	*count = be32_to_cpu(be);
+ 	return 0;
+@@ -338,76 +560,73 @@ static int read_register_count(unsigned int *count)
+  * All functions below are concurrency safe - callers should NOT hold lock.
+  */
+ 
+-static int applesmc_read_entry(const struct applesmc_entry *entry,
+-			       u8 *buf, u8 len)
++static int applesmc_read_entry(struct applesmc_device *smc,
++	const struct applesmc_entry *entry, u8 *buf, u8 len)
+ {
+ 	int ret;
+ 
+ 	if (entry->len != len)
+ 		return -EINVAL;
+-	mutex_lock(&smcreg.mutex);
+-	ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len);
+-	mutex_unlock(&smcreg.mutex);
++	mutex_lock(&smc->reg.mutex);
++	ret = read_smc(smc, entry->key, buf, len);
++	mutex_unlock(&smc->reg.mutex);
+ 
+ 	return ret;
+ }
+ 
+-static int applesmc_write_entry(const struct applesmc_entry *entry,
+-				const u8 *buf, u8 len)
++static int applesmc_write_entry(struct applesmc_device *smc,
++	const struct applesmc_entry *entry, const u8 *buf, u8 len)
+ {
+ 	int ret;
+ 
+ 	if (entry->len != len)
+ 		return -EINVAL;
+-	mutex_lock(&smcreg.mutex);
+-	ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len);
+-	mutex_unlock(&smcreg.mutex);
++	mutex_lock(&smc->reg.mutex);
++	ret = write_smc(smc, entry->key, buf, len);
++	mutex_unlock(&smc->reg.mutex);
+ 	return ret;
+ }
+ 
+-static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
++static const struct applesmc_entry *applesmc_get_entry_by_index(
++	struct applesmc_device *smc, int index)
+ {
+-	struct applesmc_entry *cache = &smcreg.cache[index];
+-	u8 key[4], info[6];
+-	__be32 be;
++	struct applesmc_entry *cache = &smc->reg.cache[index];
++	char key[4];
+ 	int ret = 0;
+ 
+ 	if (cache->valid)
+ 		return cache;
+ 
+-	mutex_lock(&smcreg.mutex);
++	mutex_lock(&smc->reg.mutex);
+ 
+ 	if (cache->valid)
+ 		goto out;
+-	be = cpu_to_be32(index);
+-	ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
++	ret = get_smc_key_by_index(smc, index, key);
+ 	if (ret)
+ 		goto out;
+-	ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
++	memcpy(cache->key, key, 4);
++
++	ret = get_smc_key_info(smc, key, cache);
+ 	if (ret)
+ 		goto out;
+-
+-	memcpy(cache->key, key, 4);
+-	cache->len = info[0];
+-	memcpy(cache->type, &info[1], 4);
+-	cache->flags = info[5];
+ 	cache->valid = true;
+ 
+ out:
+-	mutex_unlock(&smcreg.mutex);
++	mutex_unlock(&smc->reg.mutex);
+ 	if (ret)
+ 		return ERR_PTR(ret);
+ 	return cache;
+ }
+ 
+-static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
++static int applesmc_get_lower_bound(struct applesmc_device *smc,
++	unsigned int *lo, const char *key)
+ {
+-	int begin = 0, end = smcreg.key_count;
++	int begin = 0, end = smc->reg.key_count;
+ 	const struct applesmc_entry *entry;
+ 
+ 	while (begin != end) {
+ 		int middle = begin + (end - begin) / 2;
+-		entry = applesmc_get_entry_by_index(middle);
++		entry = applesmc_get_entry_by_index(smc, middle);
+ 		if (IS_ERR(entry)) {
+ 			*lo = 0;
+ 			return PTR_ERR(entry);
+@@ -422,16 +641,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
+ 	return 0;
+ }
+ 
+-static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
++static int applesmc_get_upper_bound(struct applesmc_device *smc,
++	unsigned int *hi, const char *key)
+ {
+-	int begin = 0, end = smcreg.key_count;
++	int begin = 0, end = smc->reg.key_count;
+ 	const struct applesmc_entry *entry;
+ 
+ 	while (begin != end) {
+ 		int middle = begin + (end - begin) / 2;
+-		entry = applesmc_get_entry_by_index(middle);
++		entry = applesmc_get_entry_by_index(smc, middle);
+ 		if (IS_ERR(entry)) {
+-			*hi = smcreg.key_count;
++			*hi = smc->reg.key_count;
+ 			return PTR_ERR(entry);
+ 		}
+ 		if (strcmp(key, entry->key) < 0)
+@@ -444,50 +664,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
+ 	return 0;
+ }
+ 
+-static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key)
++static const struct applesmc_entry *applesmc_get_entry_by_key(
++	struct applesmc_device *smc, const char *key)
+ {
+ 	int begin, end;
+ 	int ret;
+ 
+-	ret = applesmc_get_lower_bound(&begin, key);
++	ret = applesmc_get_lower_bound(smc, &begin, key);
+ 	if (ret)
+ 		return ERR_PTR(ret);
+-	ret = applesmc_get_upper_bound(&end, key);
++	ret = applesmc_get_upper_bound(smc, &end, key);
+ 	if (ret)
+ 		return ERR_PTR(ret);
+ 	if (end - begin != 1)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	return applesmc_get_entry_by_index(begin);
++	return applesmc_get_entry_by_index(smc, begin);
+ }
+ 
+-static int applesmc_read_key(const char *key, u8 *buffer, u8 len)
++static int applesmc_read_key(struct applesmc_device *smc,
++	const char *key, u8 *buffer, u8 len)
+ {
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_key(key);
++	entry = applesmc_get_entry_by_key(smc, key);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+ 
+-	return applesmc_read_entry(entry, buffer, len);
++	return applesmc_read_entry(smc, entry, buffer, len);
+ }
+ 
+-static int applesmc_write_key(const char *key, const u8 *buffer, u8 len)
++static int applesmc_write_key(struct applesmc_device *smc,
++	const char *key, const u8 *buffer, u8 len)
+ {
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_key(key);
++	entry = applesmc_get_entry_by_key(smc, key);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+ 
+-	return applesmc_write_entry(entry, buffer, len);
++	return applesmc_write_entry(smc, entry, buffer, len);
+ }
+ 
+-static int applesmc_has_key(const char *key, bool *value)
++static int applesmc_has_key(struct applesmc_device *smc,
++	const char *key, bool *value)
+ {
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_key(key);
++	entry = applesmc_get_entry_by_key(smc, key);
+ 	if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL)
+ 		return PTR_ERR(entry);
+ 
+@@ -498,12 +722,13 @@ static int applesmc_has_key(const char *key, bool *value)
+ /*
+  * applesmc_read_s16 - Read 16-bit signed big endian register
+  */
+-static int applesmc_read_s16(const char *key, s16 *value)
++static int applesmc_read_s16(struct applesmc_device *smc,
++	const char *key, s16 *value)
+ {
+ 	u8 buffer[2];
+ 	int ret;
+ 
+-	ret = applesmc_read_key(key, buffer, 2);
++	ret = applesmc_read_key(smc, key, buffer, 2);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -511,31 +736,68 @@ static int applesmc_read_s16(const char *key, s16 *value)
+ 	return 0;
+ }
+ 
++/**
++ * applesmc_float_to_u32 - Retrieve the integral part of a float.
++ * This is needed because Apple made fans use float values in the T2.
++ * The fractional point is not significantly useful though, and the integral
++ * part can be easily extracted.
++ */
++static inline u32 applesmc_float_to_u32(u32 d)
++{
++	u8 sign = (u8) ((d >> 31) & 1);
++	s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f;
++	u32 fr = d & ((1u << 23) - 1);
++
++	if (sign || exp < 0)
++		return 0;
++
++	return (u32) ((1u << exp) + (fr >> (23 - exp)));
++}
++
++/**
++ * applesmc_u32_to_float - Convert an u32 into a float.
++ * See applesmc_float_to_u32 for a rationale.
++ */
++static inline u32 applesmc_u32_to_float(u32 d)
++{
++	u32 dc = d, bc = 0, exp;
++
++	if (!d)
++		return 0;
++
++	while (dc >>= 1)
++		++bc;
++	exp = 0x7f + bc;
++
++	return (u32) ((exp << 23) |
++		((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1)));
++}
+ /*
+  * applesmc_device_init - initialize the accelerometer.  Can sleep.
+  */
+-static void applesmc_device_init(void)
++static void applesmc_device_init(struct applesmc_device *smc)
+ {
+ 	int total;
+ 	u8 buffer[2];
+ 
+-	if (!smcreg.has_accelerometer)
++	if (!smc->reg.has_accelerometer)
+ 		return;
+ 
+ 	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
+-		if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) &&
++		if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) &&
+ 				(buffer[0] != 0x00 || buffer[1] != 0x00))
+ 			return;
+ 		buffer[0] = 0xe0;
+ 		buffer[1] = 0x00;
+-		applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2);
++		applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2);
+ 		msleep(INIT_WAIT_MSECS);
+ 	}
+ 
+ 	pr_warn("failed to init the device\n");
+ }
+ 
+-static int applesmc_init_index(struct applesmc_registers *s)
++static int applesmc_init_index(struct applesmc_device *smc,
++	struct applesmc_registers *s)
+ {
+ 	const struct applesmc_entry *entry;
+ 	unsigned int i;
+@@ -548,7 +810,7 @@ static int applesmc_init_index(struct applesmc_registers *s)
+ 		return -ENOMEM;
+ 
+ 	for (i = s->temp_begin; i < s->temp_end; i++) {
+-		entry = applesmc_get_entry_by_index(i);
++		entry = applesmc_get_entry_by_index(smc, i);
+ 		if (IS_ERR(entry))
+ 			continue;
+ 		if (strcmp(entry->type, TEMP_SENSOR_TYPE))
+@@ -562,9 +824,9 @@ static int applesmc_init_index(struct applesmc_registers *s)
+ /*
+  * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent.
+  */
+-static int applesmc_init_smcreg_try(void)
++static int applesmc_init_smcreg_try(struct applesmc_device *smc)
+ {
+-	struct applesmc_registers *s = &smcreg;
++	struct applesmc_registers *s = &smc->reg;
+ 	bool left_light_sensor = false, right_light_sensor = false;
+ 	unsigned int count;
+ 	u8 tmp[1];
+@@ -573,7 +835,7 @@ static int applesmc_init_smcreg_try(void)
+ 	if (s->init_complete)
+ 		return 0;
+ 
+-	ret = read_register_count(&count);
++	ret = read_register_count(smc, &count);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -590,35 +852,35 @@ static int applesmc_init_smcreg_try(void)
+ 	if (!s->cache)
+ 		return -ENOMEM;
+ 
+-	ret = applesmc_read_key(FANS_COUNT, tmp, 1);
++	ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1);
+ 	if (ret)
+ 		return ret;
+ 	s->fan_count = tmp[0];
+ 	if (s->fan_count > 10)
+ 		s->fan_count = 10;
+ 
+-	ret = applesmc_get_lower_bound(&s->temp_begin, "T");
++	ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T");
+ 	if (ret)
+ 		return ret;
+-	ret = applesmc_get_lower_bound(&s->temp_end, "U");
++	ret = applesmc_get_lower_bound(smc, &s->temp_end, "U");
+ 	if (ret)
+ 		return ret;
+ 	s->temp_count = s->temp_end - s->temp_begin;
+ 
+-	ret = applesmc_init_index(s);
++	ret = applesmc_init_index(smc, s);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
++	ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
+ 	if (ret)
+ 		return ret;
+-	ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
++	ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
+ 	if (ret)
+ 		return ret;
+-	ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer);
++	ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer);
+ 	if (ret)
+ 		return ret;
+-	ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight);
++	ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -634,13 +896,13 @@ static int applesmc_init_smcreg_try(void)
+ 	return 0;
+ }
+ 
+-static void applesmc_destroy_smcreg(void)
++static void applesmc_destroy_smcreg(struct applesmc_device *smc)
+ {
+-	kfree(smcreg.index);
+-	smcreg.index = NULL;
+-	kfree(smcreg.cache);
+-	smcreg.cache = NULL;
+-	smcreg.init_complete = false;
++	kfree(smc->reg.index);
++	smc->reg.index = NULL;
++	kfree(smc->reg.cache);
++	smc->reg.cache = NULL;
++	smc->reg.init_complete = false;
+ }
+ 
+ /*
+@@ -649,12 +911,12 @@ static void applesmc_destroy_smcreg(void)
+  * Retries until initialization is successful, or the operation times out.
+  *
+  */
+-static int applesmc_init_smcreg(void)
++static int applesmc_init_smcreg(struct applesmc_device *smc)
+ {
+ 	int ms, ret;
+ 
+ 	for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) {
+-		ret = applesmc_init_smcreg_try();
++		ret = applesmc_init_smcreg_try(smc);
+ 		if (!ret) {
+ 			if (ms)
+ 				pr_info("init_smcreg() took %d ms\n", ms);
+@@ -663,50 +925,223 @@ static int applesmc_init_smcreg(void)
+ 		msleep(INIT_WAIT_MSECS);
+ 	}
+ 
+-	applesmc_destroy_smcreg();
++	applesmc_destroy_smcreg(smc);
+ 
+ 	return ret;
+ }
+ 
+ /* Device model stuff */
+-static int applesmc_probe(struct platform_device *dev)
++
++static int applesmc_init_resources(struct applesmc_device *smc);
++static void applesmc_free_resources(struct applesmc_device *smc);
++static int applesmc_create_modules(struct applesmc_device *smc);
++static void applesmc_destroy_modules(struct applesmc_device *smc);
++
++static int applesmc_add(struct acpi_device *dev)
+ {
++	struct applesmc_device *smc;
+ 	int ret;
+ 
+-	ret = applesmc_init_smcreg();
++	smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL);
++	if (!smc)
++		return -ENOMEM;
++	smc->dev = dev;
++	smc->ldev = &dev->dev;
++	mutex_init(&smc->reg.mutex);
++
++	dev_set_drvdata(&dev->dev, smc);
++
++	ret = applesmc_init_resources(smc);
+ 	if (ret)
+-		return ret;
++		goto out_mem;
++
++	ret = applesmc_init_smcreg(smc);
++	if (ret)
++		goto out_res;
++
++	applesmc_device_init(smc);
++
++	ret = applesmc_create_modules(smc);
++	if (ret)
++		goto out_reg;
++
++	return 0;
++
++out_reg:
++	applesmc_destroy_smcreg(smc);
++out_res:
++	applesmc_free_resources(smc);
++out_mem:
++	dev_set_drvdata(&dev->dev, NULL);
++	mutex_destroy(&smc->reg.mutex);
++	kfree(smc);
++
++	return ret;
++}
++
++static void applesmc_remove(struct acpi_device *dev)
++{
++	struct applesmc_device *smc = dev_get_drvdata(&dev->dev);
++
++	applesmc_destroy_modules(smc);
++	applesmc_destroy_smcreg(smc);
++	applesmc_free_resources(smc);
+ 
+-	applesmc_device_init();
++	mutex_destroy(&smc->reg.mutex);
++	kfree(smc);
++
++	return;
++}
++
++static acpi_status applesmc_walk_resources(struct acpi_resource *res,
++	void *data)
++{
++	struct applesmc_device *smc = data;
++
++	switch (res->type) {
++	case ACPI_RESOURCE_TYPE_IO:
++		if (!smc->port_base_set) {
++			if (res->data.io.address_length < APPLESMC_NR_PORTS)
++				return AE_ERROR;
++			smc->port_base = res->data.io.minimum;
++			smc->port_base_set = true;
++		}
++		return AE_OK;
++
++	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
++		if (!smc->iomem_base_set) {
++			if (res->data.fixed_memory32.address_length <
++					APPLESMC_IOMEM_MIN_SIZE) {
++				dev_warn(smc->ldev, "found iomem but it's too small: %u\n",
++						 res->data.fixed_memory32.address_length);
++				return AE_OK;
++			}
++			smc->iomem_base_addr = res->data.fixed_memory32.address;
++			smc->iomem_base_size = res->data.fixed_memory32.address_length;
++			smc->iomem_base_set = true;
++		}
++		return AE_OK;
++
++	case ACPI_RESOURCE_TYPE_END_TAG:
++		if (smc->port_base_set)
++			return AE_OK;
++		else
++			return AE_NOT_FOUND;
++
++	default:
++		return AE_OK;
++	}
++}
++
++static int applesmc_try_enable_iomem(struct applesmc_device *smc);
++
++static int applesmc_init_resources(struct applesmc_device *smc)
++{
++	int ret;
++
++	ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS,
++			applesmc_walk_resources, smc);
++	if (ACPI_FAILURE(ret))
++		return -ENXIO;
++
++	if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc"))
++		return -ENXIO;
++
++	if (smc->iomem_base_set) {
++		if (applesmc_try_enable_iomem(smc))
++			smc->iomem_base_set = false;
++	}
++
++	return 0;
++}
++
++static int applesmc_try_enable_iomem(struct applesmc_device *smc)
++{
++	u8 test_val, ldkn_version;
++
++	dev_dbg(smc->ldev, "Trying to enable iomem based communication\n");
++	smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size);
++	if (!smc->iomem_base)
++		goto out;
++
++	/* Apple's driver does this check for some reason */
++	test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++	if (test_val == 0xff) {
++		dev_warn(smc->ldev,
++			 "iomem enable failed: initial status is 0xff (is %x)\n",
++			 test_val);
++		goto out_iomem;
++	}
++
++	if (read_smc(smc, "LDKN", &ldkn_version, 1)) {
++		dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n");
++		goto out_iomem;
++	}
++
++	if (ldkn_version < 2) {
++		dev_warn(smc->ldev,
++			 "iomem enable failed: ldkn version %u is less than minimum (2)\n",
++			 ldkn_version);
++		goto out_iomem;
++	}
+ 
+ 	return 0;
++
++out_iomem:
++	iounmap(smc->iomem_base);
++
++out:
++	return -ENXIO;
++}
++
++static void applesmc_free_resources(struct applesmc_device *smc)
++{
++	if (smc->iomem_base_set)
++		iounmap(smc->iomem_base);
++	release_region(smc->port_base, APPLESMC_NR_PORTS);
+ }
+ 
+ /* Synchronize device with memorized backlight state */
+ static int applesmc_pm_resume(struct device *dev)
+ {
+-	if (smcreg.has_key_backlight)
+-		applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++
++	if (smc->reg.has_key_backlight)
++		applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
++
+ 	return 0;
+ }
+ 
+ /* Reinitialize device on resume from hibernation */
+ static int applesmc_pm_restore(struct device *dev)
+ {
+-	applesmc_device_init();
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++
++	applesmc_device_init(smc);
++
+ 	return applesmc_pm_resume(dev);
+ }
+ 
++static const struct acpi_device_id applesmc_ids[] = {
++	{"APP0001", 0},
++	{"", 0},
++};
++
+ static const struct dev_pm_ops applesmc_pm_ops = {
+ 	.resume = applesmc_pm_resume,
+ 	.restore = applesmc_pm_restore,
+ };
+ 
+-static struct platform_driver applesmc_driver = {
+-	.probe = applesmc_probe,
+-	.driver	= {
+-		.name = "applesmc",
+-		.pm = &applesmc_pm_ops,
++static struct acpi_driver applesmc_driver = {
++	.name = "applesmc",
++	.class = "applesmc",
++	.ids = applesmc_ids,
++	.ops = {
++		.add = applesmc_add,
++		.remove = applesmc_remove
++	},
++	.drv = {
++		.pm = &applesmc_pm_ops
+ 	},
+ };
+ 
+@@ -714,25 +1149,26 @@ static struct platform_driver applesmc_driver = {
+  * applesmc_calibrate - Set our "resting" values.  Callers must
+  * hold applesmc_lock.
+  */
+-static void applesmc_calibrate(void)
++static void applesmc_calibrate(struct applesmc_device *smc)
+ {
+-	applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x);
+-	applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y);
+-	rest_x = -rest_x;
++	applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x);
++	applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y);
++	smc->rest_x = -smc->rest_x;
+ }
+ 
+ static void applesmc_idev_poll(struct input_dev *idev)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(&idev->dev);
+ 	s16 x, y;
+ 
+-	if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x))
++	if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x))
+ 		return;
+-	if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y))
++	if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y))
+ 		return;
+ 
+ 	x = -x;
+-	input_report_abs(idev, ABS_X, x - rest_x);
+-	input_report_abs(idev, ABS_Y, y - rest_y);
++	input_report_abs(idev, ABS_X, x - smc->rest_x);
++	input_report_abs(idev, ABS_Y, y - smc->rest_y);
+ 	input_sync(idev);
+ }
+ 
+@@ -747,16 +1183,17 @@ static ssize_t applesmc_name_show(struct device *dev,
+ static ssize_t applesmc_position_show(struct device *dev,
+ 				   struct device_attribute *attr, char *buf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	int ret;
+ 	s16 x, y, z;
+ 
+-	ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x);
++	ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x);
+ 	if (ret)
+ 		goto out;
+-	ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y);
++	ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y);
+ 	if (ret)
+ 		goto out;
+-	ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z);
++	ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z);
+ 	if (ret)
+ 		goto out;
+ 
+@@ -770,6 +1207,7 @@ static ssize_t applesmc_position_show(struct device *dev,
+ static ssize_t applesmc_light_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	const struct applesmc_entry *entry;
+ 	static int data_length;
+ 	int ret;
+@@ -777,7 +1215,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ 	u8 buffer[10];
+ 
+ 	if (!data_length) {
+-		entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY);
++		entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY);
+ 		if (IS_ERR(entry))
+ 			return PTR_ERR(entry);
+ 		if (entry->len > 10)
+@@ -786,7 +1224,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ 		pr_info("light sensor data length set to %d\n", data_length);
+ 	}
+ 
+-	ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
++	ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
+ 	if (ret)
+ 		goto out;
+ 	/* newer macbooks report a single 10-bit bigendian value */
+@@ -796,7 +1234,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ 	}
+ 	left = buffer[2];
+ 
+-	ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
++	ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
+ 	if (ret)
+ 		goto out;
+ 	right = buffer[2];
+@@ -812,7 +1250,8 @@ static ssize_t applesmc_light_show(struct device *dev,
+ static ssize_t applesmc_show_sensor_label(struct device *dev,
+ 			struct device_attribute *devattr, char *sysfsbuf)
+ {
+-	const char *key = smcreg.index[to_index(devattr)];
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	const char *key = smc->reg.index[to_index(devattr)];
+ 
+ 	return sysfs_emit(sysfsbuf, "%s\n", key);
+ }
+@@ -821,12 +1260,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev,
+ static ssize_t applesmc_show_temperature(struct device *dev,
+ 			struct device_attribute *devattr, char *sysfsbuf)
+ {
+-	const char *key = smcreg.index[to_index(devattr)];
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	const char *key = smc->reg.index[to_index(devattr)];
+ 	int ret;
+ 	s16 value;
+ 	int temp;
+ 
+-	ret = applesmc_read_s16(key, &value);
++	ret = applesmc_read_s16(smc, key, &value);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -838,6 +1278,8 @@ static ssize_t applesmc_show_temperature(struct device *dev,
+ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	const struct applesmc_entry *entry;
+ 	int ret;
+ 	unsigned int speed = 0;
+ 	char newkey[5];
+@@ -846,11 +1288,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+ 		  to_index(attr));
+ 
+-	ret = applesmc_read_key(newkey, buffer, 2);
++	entry = applesmc_get_entry_by_key(smc, newkey);
++	if (IS_ERR(entry))
++		return PTR_ERR(entry);
++
++	if (!strcmp(entry->type, FLOAT_TYPE)) {
++		ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4);
++		speed = applesmc_float_to_u32(speed);
++	} else {
++		ret = applesmc_read_entry(smc, entry, buffer, 2);
++		speed = ((buffer[0] << 8 | buffer[1]) >> 2);
++	}
++
+ 	if (ret)
+ 		return ret;
+ 
+-	speed = ((buffer[0] << 8 | buffer[1]) >> 2);
+ 	return sysfs_emit(sysfsbuf, "%u\n", speed);
+ }
+ 
+@@ -858,6 +1310,8 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ 					struct device_attribute *attr,
+ 					const char *sysfsbuf, size_t count)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	const struct applesmc_entry *entry;
+ 	int ret;
+ 	unsigned long speed;
+ 	char newkey[5];
+@@ -869,9 +1323,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ 	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+ 		  to_index(attr));
+ 
+-	buffer[0] = (speed >> 6) & 0xff;
+-	buffer[1] = (speed << 2) & 0xff;
+-	ret = applesmc_write_key(newkey, buffer, 2);
++	entry = applesmc_get_entry_by_key(smc, newkey);
++	if (IS_ERR(entry))
++		return PTR_ERR(entry);
++
++	if (!strcmp(entry->type, FLOAT_TYPE)) {
++		speed = applesmc_u32_to_float(speed);
++		ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4);
++	} else {
++		buffer[0] = (speed >> 6) & 0xff;
++		buffer[1] = (speed << 2) & 0xff;
++		ret = applesmc_write_key(smc, newkey, buffer, 2);
++	}
+ 
+ 	if (ret)
+ 		return ret;
+@@ -882,15 +1345,30 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ static ssize_t applesmc_show_fan_manual(struct device *dev,
+ 			struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	int ret;
+ 	u16 manual = 0;
+ 	u8 buffer[2];
++	char newkey[5];
++	bool has_newkey = false;
++
++	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
++
++	ret = applesmc_has_key(smc, newkey, &has_newkey);
++	if (ret)
++		return ret;
++
++	if (has_newkey) {
++		ret = applesmc_read_key(smc, newkey, buffer, 1);
++		manual = buffer[0];
++	} else {
++		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
++		manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
++	}
+ 
+-	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
+ 	if (ret)
+ 		return ret;
+ 
+-	manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
+ 	return sysfs_emit(sysfsbuf, "%d\n", manual);
+ }
+ 
+@@ -898,29 +1376,42 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ 					 struct device_attribute *attr,
+ 					 const char *sysfsbuf, size_t count)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	int ret;
+ 	u8 buffer[2];
++	char newkey[5];
++	bool has_newkey = false;
+ 	unsigned long input;
+ 	u16 val;
+ 
+ 	if (kstrtoul(sysfsbuf, 10, &input) < 0)
+ 		return -EINVAL;
+ 
+-	ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
++	scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
++
++	ret = applesmc_has_key(smc, newkey, &has_newkey);
+ 	if (ret)
+-		goto out;
++		return ret;
+ 
+-	val = (buffer[0] << 8 | buffer[1]);
++	if (has_newkey) {
++		buffer[0] = input & 1;
++		ret = applesmc_write_key(smc, newkey, buffer, 1);
++	} else {
++		ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
++		val = (buffer[0] << 8 | buffer[1]);
++		if (ret)
++			goto out;
+ 
+-	if (input)
+-		val = val | (0x01 << to_index(attr));
+-	else
+-		val = val & ~(0x01 << to_index(attr));
++		if (input)
++			val = val | (0x01 << to_index(attr));
++		else
++			val = val & ~(0x01 << to_index(attr));
+ 
+-	buffer[0] = (val >> 8) & 0xFF;
+-	buffer[1] = val & 0xFF;
++		buffer[0] = (val >> 8) & 0xFF;
++		buffer[1] = val & 0xFF;
+ 
+-	ret = applesmc_write_key(FANS_MANUAL, buffer, 2);
++		ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
++	}
+ 
+ out:
+ 	if (ret)
+@@ -932,13 +1423,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ static ssize_t applesmc_show_fan_position(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	int ret;
+ 	char newkey[5];
+ 	u8 buffer[17];
+ 
+ 	scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr));
+ 
+-	ret = applesmc_read_key(newkey, buffer, 16);
++	ret = applesmc_read_key(smc, newkey, buffer, 16);
+ 	buffer[16] = 0;
+ 
+ 	if (ret)
+@@ -950,43 +1442,79 @@ static ssize_t applesmc_show_fan_position(struct device *dev,
+ static ssize_t applesmc_calibrate_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
+-	return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y);
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++
++	return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y);
+ }
+ 
+ static ssize_t applesmc_calibrate_store(struct device *dev,
+ 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
+ {
+-	applesmc_calibrate();
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++
++	applesmc_calibrate(smc);
+ 
+ 	return count;
+ }
+ 
+ static void applesmc_backlight_set(struct work_struct *work)
+ {
+-	applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
++	struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work);
++
++	applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
+ }
+-static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
+ 
+ static void applesmc_brightness_set(struct led_classdev *led_cdev,
+ 						enum led_brightness value)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev);
+ 	int ret;
+ 
+-	backlight_state[0] = value;
+-	ret = queue_work(applesmc_led_wq, &backlight_work);
++	smc->backlight_state[0] = value;
++	ret = queue_work(smc->backlight_wq, &smc->backlight_work);
+ 
+ 	if (debug && (!ret))
+ 		dev_dbg(led_cdev->dev, "work was already on the queue.\n");
+ }
+ 
++static ssize_t applesmc_BCLM_store(struct device *dev,
++		struct device_attribute *attr, char *sysfsbuf, size_t count)
++{
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	u8 val;
++
++	if (kstrtou8(sysfsbuf, 10, &val) < 0)
++		return -EINVAL;
++
++	if (val < 0 || val > 100)
++		return -EINVAL;
++
++	if (applesmc_write_key(smc, "BCLM", &val, 1))
++		return -ENODEV;
++	return count;
++}
++
++static ssize_t applesmc_BCLM_show(struct device *dev,
++		struct device_attribute *attr, char *sysfsbuf)
++{
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++	u8 val;
++
++	if (applesmc_read_key(smc, "BCLM", &val, 1))
++		return -ENODEV;
++
++	return sysfs_emit(sysfsbuf, "%d\n", val);
++}
++
+ static ssize_t applesmc_key_count_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	int ret;
+ 	u8 buffer[4];
+ 	u32 count;
+ 
+-	ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4);
++	ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -998,13 +1526,14 @@ static ssize_t applesmc_key_count_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	const struct applesmc_entry *entry;
+ 	int ret;
+ 
+-	entry = applesmc_get_entry_by_index(key_at_index);
++	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+-	ret = applesmc_read_entry(entry, sysfsbuf, entry->len);
++	ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1014,9 +1543,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_index(key_at_index);
++	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+ 
+@@ -1026,9 +1556,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_index(key_at_index);
++	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+ 
+@@ -1038,9 +1569,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	const struct applesmc_entry *entry;
+ 
+-	entry = applesmc_get_entry_by_index(key_at_index);
++	entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ 	if (IS_ERR(entry))
+ 		return PTR_ERR(entry);
+ 
+@@ -1050,28 +1582,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_show(struct device *dev,
+ 				struct device_attribute *attr, char *sysfsbuf)
+ {
+-	return sysfs_emit(sysfsbuf, "%d\n", key_at_index);
++	struct applesmc_device *smc = dev_get_drvdata(dev);
++
++	return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index);
+ }
+ 
+ static ssize_t applesmc_key_at_index_store(struct device *dev,
+ 	struct device_attribute *attr, const char *sysfsbuf, size_t count)
+ {
++	struct applesmc_device *smc = dev_get_drvdata(dev);
+ 	unsigned long newkey;
+ 
+ 	if (kstrtoul(sysfsbuf, 10, &newkey) < 0
+-	    || newkey >= smcreg.key_count)
++	    || newkey >= smc->reg.key_count)
+ 		return -EINVAL;
+ 
+-	key_at_index = newkey;
++	smc->key_at_index = newkey;
+ 	return count;
+ }
+ 
+-static struct led_classdev applesmc_backlight = {
+-	.name			= "smc::kbd_backlight",
+-	.default_trigger	= "nand-disk",
+-	.brightness_set		= applesmc_brightness_set,
+-};
+-
+ static struct applesmc_node_group info_group[] = {
+ 	{ "name", applesmc_name_show },
+ 	{ "key_count", applesmc_key_count_show },
+@@ -1111,19 +1640,25 @@ static struct applesmc_node_group temp_group[] = {
+ 	{ }
+ };
+ 
++static struct applesmc_node_group BCLM_group[] = {
++	{ "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store },
++	{ }
++};
++
+ /* Module stuff */
+ 
+ /*
+  * applesmc_destroy_nodes - remove files and free associated memory
+  */
+-static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
++static void applesmc_destroy_nodes(struct applesmc_device *smc,
++	struct applesmc_node_group *groups)
+ {
+ 	struct applesmc_node_group *grp;
+ 	struct applesmc_dev_attr *node;
+ 
+ 	for (grp = groups; grp->nodes; grp++) {
+ 		for (node = grp->nodes; node->sda.dev_attr.attr.name; node++)
+-			sysfs_remove_file(&pdev->dev.kobj,
++			sysfs_remove_file(&smc->dev->dev.kobj,
+ 					  &node->sda.dev_attr.attr);
+ 		kfree(grp->nodes);
+ 		grp->nodes = NULL;
+@@ -1133,7 +1668,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
+ /*
+  * applesmc_create_nodes - create a two-dimensional group of sysfs files
+  */
+-static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
++static int applesmc_create_nodes(struct applesmc_device *smc,
++	struct applesmc_node_group *groups, int num)
+ {
+ 	struct applesmc_node_group *grp;
+ 	struct applesmc_dev_attr *node;
+@@ -1157,7 +1693,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
+ 			sysfs_attr_init(attr);
+ 			attr->name = node->name;
+ 			attr->mode = 0444 | (grp->store ? 0200 : 0);
+-			ret = sysfs_create_file(&pdev->dev.kobj, attr);
++			ret = sysfs_create_file(&smc->dev->dev.kobj, attr);
+ 			if (ret) {
+ 				attr->name = NULL;
+ 				goto out;
+@@ -1167,57 +1703,56 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
+ 
+ 	return 0;
+ out:
+-	applesmc_destroy_nodes(groups);
++	applesmc_destroy_nodes(smc, groups);
+ 	return ret;
+ }
+ 
+ /* Create accelerometer resources */
+-static int applesmc_create_accelerometer(void)
++static int applesmc_create_accelerometer(struct applesmc_device *smc)
+ {
+ 	int ret;
+-
+-	if (!smcreg.has_accelerometer)
++	if (!smc->reg.has_accelerometer)
+ 		return 0;
+ 
+-	ret = applesmc_create_nodes(accelerometer_group, 1);
++	ret = applesmc_create_nodes(smc, accelerometer_group, 1);
+ 	if (ret)
+ 		goto out;
+ 
+-	applesmc_idev = input_allocate_device();
+-	if (!applesmc_idev) {
++	smc->idev = input_allocate_device();
++	if (!smc->idev) {
+ 		ret = -ENOMEM;
+ 		goto out_sysfs;
+ 	}
+ 
+ 	/* initial calibrate for the input device */
+-	applesmc_calibrate();
++	applesmc_calibrate(smc);
+ 
+ 	/* initialize the input device */
+-	applesmc_idev->name = "applesmc";
+-	applesmc_idev->id.bustype = BUS_HOST;
+-	applesmc_idev->dev.parent = &pdev->dev;
+-	input_set_abs_params(applesmc_idev, ABS_X,
++	smc->idev->name = "applesmc";
++	smc->idev->id.bustype = BUS_HOST;
++	smc->idev->dev.parent = &smc->dev->dev;
++	input_set_abs_params(smc->idev, ABS_X,
+ 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
+-	input_set_abs_params(applesmc_idev, ABS_Y,
++	input_set_abs_params(smc->idev, ABS_Y,
+ 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
+ 
+-	ret = input_setup_polling(applesmc_idev, applesmc_idev_poll);
++	ret = input_setup_polling(smc->idev, applesmc_idev_poll);
+ 	if (ret)
+ 		goto out_idev;
+ 
+-	input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL);
++	input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL);
+ 
+-	ret = input_register_device(applesmc_idev);
++	ret = input_register_device(smc->idev);
+ 	if (ret)
+ 		goto out_idev;
+ 
+ 	return 0;
+ 
+ out_idev:
+-	input_free_device(applesmc_idev);
++	input_free_device(smc->idev);
+ 
+ out_sysfs:
+-	applesmc_destroy_nodes(accelerometer_group);
++	applesmc_destroy_nodes(smc, accelerometer_group);
+ 
+ out:
+ 	pr_warn("driver init failed (ret=%d)!\n", ret);
+@@ -1225,44 +1760,55 @@ static int applesmc_create_accelerometer(void)
+ }
+ 
+ /* Release all resources used by the accelerometer */
+-static void applesmc_release_accelerometer(void)
++static void applesmc_release_accelerometer(struct applesmc_device *smc)
+ {
+-	if (!smcreg.has_accelerometer)
++	if (!smc->reg.has_accelerometer)
+ 		return;
+-	input_unregister_device(applesmc_idev);
+-	applesmc_destroy_nodes(accelerometer_group);
++	input_unregister_device(smc->idev);
++	applesmc_destroy_nodes(smc, accelerometer_group);
+ }
+ 
+-static int applesmc_create_light_sensor(void)
++static int applesmc_create_light_sensor(struct applesmc_device *smc)
+ {
+-	if (!smcreg.num_light_sensors)
++	if (!smc->reg.num_light_sensors)
+ 		return 0;
+-	return applesmc_create_nodes(light_sensor_group, 1);
++	return applesmc_create_nodes(smc, light_sensor_group, 1);
+ }
+ 
+-static void applesmc_release_light_sensor(void)
++static void applesmc_release_light_sensor(struct applesmc_device *smc)
+ {
+-	if (!smcreg.num_light_sensors)
++	if (!smc->reg.num_light_sensors)
+ 		return;
+-	applesmc_destroy_nodes(light_sensor_group);
++	applesmc_destroy_nodes(smc, light_sensor_group);
+ }
+ 
+-static int applesmc_create_key_backlight(void)
++static int applesmc_create_key_backlight(struct applesmc_device *smc)
+ {
+-	if (!smcreg.has_key_backlight)
++	int ret;
++
++	if (!smc->reg.has_key_backlight)
+ 		return 0;
+-	applesmc_led_wq = create_singlethread_workqueue("applesmc-led");
+-	if (!applesmc_led_wq)
++	smc->backlight_wq = create_singlethread_workqueue("applesmc-led");
++	if (!smc->backlight_wq)
+ 		return -ENOMEM;
+-	return led_classdev_register(&pdev->dev, &applesmc_backlight);
++
++	INIT_WORK(&smc->backlight_work, applesmc_backlight_set);
++	smc->backlight_dev.name = "smc::kbd_backlight";
++	smc->backlight_dev.default_trigger = "nand-disk";
++	smc->backlight_dev.brightness_set = applesmc_brightness_set;
++	ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev);
++	if (ret)
++		destroy_workqueue(smc->backlight_wq);
++
++	return ret;
+ }
+ 
+-static void applesmc_release_key_backlight(void)
++static void applesmc_release_key_backlight(struct applesmc_device *smc)
+ {
+-	if (!smcreg.has_key_backlight)
++	if (!smc->reg.has_key_backlight)
+ 		return;
+-	led_classdev_unregister(&applesmc_backlight);
+-	destroy_workqueue(applesmc_led_wq);
++	led_classdev_unregister(&smc->backlight_dev);
++	destroy_workqueue(smc->backlight_wq);
+ }
+ 
+ static int applesmc_dmi_match(const struct dmi_system_id *id)
+@@ -1291,6 +1837,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
+ 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ 	  DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") },
+ 	},
++	{ applesmc_dmi_match, "Apple iMacPro", {
++	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
++	  DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") },
++	},
+ 	{ applesmc_dmi_match, "Apple MacPro", {
+ 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") },
+@@ -1306,90 +1856,91 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
+ 	{ .ident = NULL }
+ };
+ 
+-static int __init applesmc_init(void)
++static int applesmc_create_modules(struct applesmc_device *smc)
+ {
+ 	int ret;
+ 
+-	if (!dmi_check_system(applesmc_whitelist)) {
+-		pr_warn("supported laptop not found!\n");
+-		ret = -ENODEV;
+-		goto out;
+-	}
+-
+-	if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
+-								"applesmc")) {
+-		ret = -ENXIO;
+-		goto out;
+-	}
+-
+-	ret = platform_driver_register(&applesmc_driver);
+-	if (ret)
+-		goto out_region;
+-
+-	pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
+-					       NULL, 0);
+-	if (IS_ERR(pdev)) {
+-		ret = PTR_ERR(pdev);
+-		goto out_driver;
+-	}
+-
+-	/* create register cache */
+-	ret = applesmc_init_smcreg();
++	ret = applesmc_create_nodes(smc, info_group, 1);
+ 	if (ret)
+-		goto out_device;
+-
+-	ret = applesmc_create_nodes(info_group, 1);
++		goto out;
++	ret = applesmc_create_nodes(smc, BCLM_group, 1);
+ 	if (ret)
+-		goto out_smcreg;
++		goto out_info;
+ 
+-	ret = applesmc_create_nodes(fan_group, smcreg.fan_count);
++	ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count);
+ 	if (ret)
+-		goto out_info;
++		goto out_bclm;
+ 
+-	ret = applesmc_create_nodes(temp_group, smcreg.index_count);
++	ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count);
+ 	if (ret)
+ 		goto out_fans;
+ 
+-	ret = applesmc_create_accelerometer();
++	ret = applesmc_create_accelerometer(smc);
+ 	if (ret)
+ 		goto out_temperature;
+ 
+-	ret = applesmc_create_light_sensor();
++	ret = applesmc_create_light_sensor(smc);
+ 	if (ret)
+ 		goto out_accelerometer;
+ 
+-	ret = applesmc_create_key_backlight();
++	ret = applesmc_create_key_backlight(smc);
+ 	if (ret)
+ 		goto out_light_sysfs;
+ 
+-	hwmon_dev = hwmon_device_register(&pdev->dev);
+-	if (IS_ERR(hwmon_dev)) {
+-		ret = PTR_ERR(hwmon_dev);
++	smc->hwmon_dev = hwmon_device_register(&smc->dev->dev);
++	if (IS_ERR(smc->hwmon_dev)) {
++		ret = PTR_ERR(smc->hwmon_dev);
+ 		goto out_light_ledclass;
+ 	}
+ 
+ 	return 0;
+ 
+ out_light_ledclass:
+-	applesmc_release_key_backlight();
++	applesmc_release_key_backlight(smc);
+ out_light_sysfs:
+-	applesmc_release_light_sensor();
++	applesmc_release_light_sensor(smc);
+ out_accelerometer:
+-	applesmc_release_accelerometer();
++	applesmc_release_accelerometer(smc);
+ out_temperature:
+-	applesmc_destroy_nodes(temp_group);
++	applesmc_destroy_nodes(smc, temp_group);
+ out_fans:
+-	applesmc_destroy_nodes(fan_group);
++	applesmc_destroy_nodes(smc, fan_group);
++out_bclm:
++	applesmc_destroy_nodes(smc, BCLM_group);
+ out_info:
+-	applesmc_destroy_nodes(info_group);
+-out_smcreg:
+-	applesmc_destroy_smcreg();
+-out_device:
+-	platform_device_unregister(pdev);
+-out_driver:
+-	platform_driver_unregister(&applesmc_driver);
+-out_region:
+-	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
++	applesmc_destroy_nodes(smc, info_group);
++out:
++	return ret;
++}
++
++static void applesmc_destroy_modules(struct applesmc_device *smc)
++{
++	hwmon_device_unregister(smc->hwmon_dev);
++	applesmc_release_key_backlight(smc);
++	applesmc_release_light_sensor(smc);
++	applesmc_release_accelerometer(smc);
++	applesmc_destroy_nodes(smc, temp_group);
++	applesmc_destroy_nodes(smc, fan_group);
++	applesmc_destroy_nodes(smc, BCLM_group);
++	applesmc_destroy_nodes(smc, info_group);
++}
++
++static int __init applesmc_init(void)
++{
++	int ret;
++
++	if (!dmi_check_system(applesmc_whitelist)) {
++		pr_warn("supported laptop not found!\n");
++		ret = -ENODEV;
++		goto out;
++	}
++
++	ret = acpi_bus_register_driver(&applesmc_driver);
++	if (ret)
++		goto out;
++
++	return 0;
++
+ out:
+ 	pr_warn("driver init failed (ret=%d)!\n", ret);
+ 	return ret;
+@@ -1397,23 +1948,14 @@ static int __init applesmc_init(void)
+ 
+ static void __exit applesmc_exit(void)
+ {
+-	hwmon_device_unregister(hwmon_dev);
+-	applesmc_release_key_backlight();
+-	applesmc_release_light_sensor();
+-	applesmc_release_accelerometer();
+-	applesmc_destroy_nodes(temp_group);
+-	applesmc_destroy_nodes(fan_group);
+-	applesmc_destroy_nodes(info_group);
+-	applesmc_destroy_smcreg();
+-	platform_device_unregister(pdev);
+-	platform_driver_unregister(&applesmc_driver);
+-	release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
++	acpi_bus_unregister_driver(&applesmc_driver);
+ }
+ 
+ module_init(applesmc_init);
+ module_exit(applesmc_exit);
+ 
+ MODULE_AUTHOR("Nicolas Boichat");
++MODULE_AUTHOR("Paul Pawlowski");
+ MODULE_DESCRIPTION("Apple SMC");
+ MODULE_LICENSE("GPL v2");
+ MODULE_DEVICE_TABLE(dmi, applesmc_whitelist);
+diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
+index 10a03a566905..8c3ccd98ba93 100644
+--- a/drivers/input/mouse/bcm5974.c
++++ b/drivers/input/mouse/bcm5974.c
+@@ -83,6 +83,24 @@
+ #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO	0x0273
+ #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS	0x0274
+ 
++/* T2-Attached Devices */
++/* MacbookAir8,1 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K	0x027a
++/* MacbookPro15,2 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132	0x027b
++/* MacbookPro15,1 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680	0x027c
++/* MacbookPro15,4 (2019) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213	0x027d
++/* MacbookPro16,2 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K	0x027e
++/* MacbookPro16,3 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223	0x027f
++/* MacbookAir9,1 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K	0x0280
++/* MacbookPro16,1 (2019)*/
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F	0x0340
++
+ #define BCM5974_DEVICE(prod) {					\
+ 	.match_flags = (USB_DEVICE_ID_MATCH_DEVICE |		\
+ 			USB_DEVICE_ID_MATCH_INT_CLASS |		\
+@@ -147,6 +165,22 @@ static const struct usb_device_id bcm5974_table[] = {
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
++	/* MacbookAir8,1 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K),
++	/* MacbookPro15,2 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132),
++	/* MacbookPro15,1 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680),
++	/* MacbookPro15,4 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213),
++	/* MacbookPro16,2 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K),
++	/* MacbookPro16,3 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223),
++	/* MacbookAir9,1 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K),
++	/* MacbookPro16,1 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F),
+ 	/* Terminating entry */
+ 	{}
+ };
+@@ -483,6 +517,110 @@ static const struct bcm5974_config bcm5974_config_table[] = {
+ 		{ SN_COORD, -203, 6803 },
+ 		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
+ 	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -6243, 6749 },
++		{ SN_COORD, -170, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -6243, 6749 },
++		{ SN_COORD, -170, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -7456, 7976 },
++		{ SN_COORD, -1768, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -6243, 6749 },
++		{ SN_COORD, -170, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -7823, 8329 },
++		{ SN_COORD, -370, 7925 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -6243, 6749 },
++		{ SN_COORD, -170, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -6243, 6749 },
++		{ SN_COORD, -170, 7685 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F,
++		0,
++		0,
++		HAS_INTEGRATED_BUTTON,
++		0, sizeof(struct bt_data),
++		0x83, DATAFORMAT(TYPE4),
++		{ SN_PRESSURE, 0, 300 },
++		{ SN_WIDTH, 0, 2048 },
++		{ SN_COORD, -8916, 9918 },
++		{ SN_COORD, -1934, 9835 },
++		{ SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++	},
+ 	{}
+ };
+ 
+diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
+index 78748e8d2dba..2b2b558cebe6 100644
+--- a/drivers/pci/vgaarb.c
++++ b/drivers/pci/vgaarb.c
+@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev)
+ 	pci_dev_put(vga_default);
+ 	vga_default = pci_dev_get(pdev);
+ }
++EXPORT_SYMBOL_GPL(vga_set_default_device);
+ 
+ /**
+  * vga_remove_vgacon - deactivate VGA console
+diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
+index 1417e230edbd..e69785af8e1d 100644
+--- a/drivers/platform/x86/apple-gmux.c
++++ b/drivers/platform/x86/apple-gmux.c
+@@ -21,6 +21,7 @@
+ #include <linux/delay.h>
+ #include <linux/pci.h>
+ #include <linux/vga_switcheroo.h>
++#include <linux/vgaarb.h>
+ #include <linux/debugfs.h>
+ #include <acpi/video.h>
+ #include <asm/io.h>
+@@ -107,6 +108,10 @@ struct apple_gmux_config {
+ 
+ # define MMIO_GMUX_MAX_BRIGHTNESS	0xffff
+ 
++static bool force_igd;
++module_param(force_igd, bool, 0);
++MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)");
++
+ static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port)
+ {
+ 	return inb(gmux_data->iostart + port);
+@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
+ 	gmux_enable_interrupts(gmux_data);
+ 	gmux_read_switch_state(gmux_data);
+ 
++	if (force_igd) {
++		struct pci_dev *pdev;
++
++		pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0));
++		if (pdev) {
++			pr_info("Switching to IGD");
++			gmux_switchto(VGA_SWITCHEROO_IGD);
++			vga_set_default_device(pdev);
++		} else {
++			pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?");
++		}
++	}
++
+ 	/*
+ 	 * Retina MacBook Pros cannot switch the panel's AUX separately
+ 	 * and need eDP pre-calibration. They are distinguishable from
+diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
+index db4a392841b1..580df4ce4f9f 100644
+--- a/drivers/staging/Kconfig
++++ b/drivers/staging/Kconfig
+@@ -66,4 +66,6 @@ source "drivers/staging/fieldbus/Kconfig"
+ 
+ source "drivers/staging/vme_user/Kconfig"
+ 
++source "drivers/staging/apple-bce/Kconfig"
++
+ endif # STAGING
+diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
+index 5390879b5d1b..528be2d3b546 100644
+--- a/drivers/staging/Makefile
++++ b/drivers/staging/Makefile
+@@ -22,3 +22,4 @@ obj-$(CONFIG_GREYBUS)		+= greybus/
+ obj-$(CONFIG_BCM2835_VCHIQ)	+= vc04_services/
+ obj-$(CONFIG_XIL_AXIS_FIFO)	+= axis-fifo/
+ obj-$(CONFIG_FIELDBUS_DEV)     += fieldbus/
++obj-$(CONFIG_APPLE_BCE)		+= apple-bce/
+diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig
+new file mode 100644
+index 000000000000..fe92bc441e89
+--- /dev/null
++++ b/drivers/staging/apple-bce/Kconfig
+@@ -0,0 +1,18 @@
++config APPLE_BCE
++	tristate "Apple BCE driver (VHCI and Audio support)"
++	default m
++	depends on X86
++	select SOUND
++	select SND
++	select SND_PCM
++	select SND_JACK
++	help
++	  VHCI and audio support on Apple MacBooks with the T2 Chip.
++	  This driver is divided in three components:
++	    - BCE (Buffer Copy Engine): which establishes a basic communication
++	      channel with the T2 chip. This component is required by the other two:
++	      - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse
++	        and other system devices depend on this virtual USB host controller
++	      - Audio: a driver for the T2 audio interface.
++	 
++	  If "M" is selected, the module will be called apple-bce.'
+diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile
+new file mode 100644
+index 000000000000..8cfbd3f64af6
+--- /dev/null
++++ b/drivers/staging/apple-bce/Makefile
+@@ -0,0 +1,28 @@
++modname := apple-bce
++obj-$(CONFIG_APPLE_BCE) += $(modname).o
++
++apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o
++
++MY_CFLAGS += -DWITHOUT_NVME_PATCH
++#MY_CFLAGS += -g -DDEBUG
++ccflags-y += ${MY_CFLAGS}
++CC += ${MY_CFLAGS}
++
++KVERSION := $(KERNELRELEASE)
++ifeq ($(origin KERNELRELEASE), undefined)
++KVERSION := $(shell uname -r)
++endif
++
++KDIR := /lib/modules/$(KVERSION)/build
++PWD := $(shell pwd)
++
++.PHONY: all
++
++all:
++	$(MAKE) -C $(KDIR) M=$(PWD) modules
++
++clean:
++	$(MAKE) -C $(KDIR) M=$(PWD) clean
++
++install:
++	$(MAKE) -C $(KDIR) M=$(PWD) modules_install
+diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c
+new file mode 100644
+index 000000000000..4fd2415d7028
+--- /dev/null
++++ b/drivers/staging/apple-bce/apple_bce.c
+@@ -0,0 +1,445 @@
++#include "apple_bce.h"
++#include <linux/module.h>
++#include <linux/crc32.h>
++#include "audio/audio.h"
++#include <linux/version.h>
++
++static dev_t bce_chrdev;
++static struct class *bce_class;
++
++struct apple_bce_device *global_bce;
++
++static int bce_create_command_queues(struct apple_bce_device *bce);
++static void bce_free_command_queues(struct apple_bce_device *bce);
++static irqreturn_t bce_handle_mb_irq(int irq, void *dev);
++static irqreturn_t bce_handle_dma_irq(int irq, void *dev);
++static int bce_fw_version_handshake(struct apple_bce_device *bce);
++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq);
++
++static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id)
++{
++    struct apple_bce_device *bce = NULL;
++    int status = 0;
++    int nvec;
++
++    pr_info("apple-bce: capturing our device\n");
++
++    if (pci_enable_device(dev))
++        return -ENODEV;
++    if (pci_request_regions(dev, "apple-bce")) {
++        status = -ENODEV;
++        goto fail;
++    }
++    pci_set_master(dev);
++    nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI);
++    if (nvec < 5) {
++        status = -EINVAL;
++        goto fail;
++    }
++
++    bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL);
++    if (!bce) {
++        status = -ENOMEM;
++        goto fail;
++    }
++
++    bce->pci = dev;
++    pci_set_drvdata(dev, bce);
++
++    bce->devt = bce_chrdev;
++    bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce");
++    if (IS_ERR_OR_NULL(bce->dev)) {
++        status = PTR_ERR(bce_class);
++        goto fail;
++    }
++
++    bce->reg_mem_mb = pci_iomap(dev, 4, 0);
++    bce->reg_mem_dma = pci_iomap(dev, 2, 0);
++
++    if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) {
++        dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n");
++        goto fail;
++    }
++
++    bce_mailbox_init(&bce->mbox, bce->reg_mem_mb);
++    bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb);
++
++    spin_lock_init(&bce->queues_lock);
++    ida_init(&bce->queue_ida);
++
++    if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox")))
++        goto fail;
++    if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma")))
++        goto fail_interrupt_0;
++
++    if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) {
++        dev_warn(&dev->dev, "dma: Setting mask failed\n");
++        goto fail_interrupt;
++    }
++
++    /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0
++       is a bus master, so we need to work around this. */
++    bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
++#ifndef WITHOUT_NVME_PATCH
++    if ((status = pci_enable_device_mem(bce->pci0))) {
++        dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n");
++        goto fail_dev0;
++    }
++#endif
++    pci_set_master(bce->pci0);
++
++    bce_timestamp_start(&bce->timestamp, true);
++
++    if ((status = bce_fw_version_handshake(bce)))
++        goto fail_ts;
++    pr_info("apple-bce: handshake done\n");
++
++    if ((status = bce_create_command_queues(bce))) {
++        pr_info("apple-bce: Creating command queues failed\n");
++        goto fail_ts;
++    }
++
++    global_bce = bce;
++
++    bce_vhci_create(bce, &bce->vhci);
++
++    return 0;
++
++fail_ts:
++    bce_timestamp_stop(&bce->timestamp);
++#ifndef WITHOUT_NVME_PATCH
++    pci_disable_device(bce->pci0);
++fail_dev0:
++#endif
++    pci_dev_put(bce->pci0);
++fail_interrupt:
++    pci_free_irq(dev, 4, dev);
++fail_interrupt_0:
++    pci_free_irq(dev, 0, dev);
++fail:
++    if (bce && bce->dev) {
++        device_destroy(bce_class, bce->devt);
++
++        if (!IS_ERR_OR_NULL(bce->reg_mem_mb))
++            pci_iounmap(dev, bce->reg_mem_mb);
++        if (!IS_ERR_OR_NULL(bce->reg_mem_dma))
++            pci_iounmap(dev, bce->reg_mem_dma);
++
++        kfree(bce);
++    }
++
++    pci_free_irq_vectors(dev);
++    pci_release_regions(dev);
++    pci_disable_device(dev);
++
++    if (!status)
++        status = -EINVAL;
++    return status;
++}
++
++static int bce_create_command_queues(struct apple_bce_device *bce)
++{
++    int status;
++    struct bce_queue_memcfg *cfg;
++
++    bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20);
++    bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20);
++    if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) {
++        status = -ENOMEM;
++        goto err;
++    }
++    bce->queues[0] = (struct bce_queue *) bce->cmd_cq;
++    bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq;
++
++    cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL);
++    if (!cfg) {
++        status = -ENOMEM;
++        goto err;
++    }
++    bce_get_cq_memcfg(bce->cmd_cq, cfg);
++    if ((status = bce_register_command_queue(bce, cfg, false)))
++        goto err;
++    bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg);
++    if ((status = bce_register_command_queue(bce, cfg, true)))
++        goto err;
++    kfree(cfg);
++
++    return 0;
++
++err:
++    if (bce->cmd_cq)
++        bce_free_cq(bce, bce->cmd_cq);
++    if (bce->cmd_cmdq)
++        bce_free_cmdq(bce, bce->cmd_cmdq);
++    return status;
++}
++
++static void bce_free_command_queues(struct apple_bce_device *bce)
++{
++    bce_free_cq(bce, bce->cmd_cq);
++    bce_free_cmdq(bce, bce->cmd_cmdq);
++    bce->cmd_cq = NULL;
++    bce->queues[0] = NULL;
++}
++
++static irqreturn_t bce_handle_mb_irq(int irq, void *dev)
++{
++    struct apple_bce_device *bce = pci_get_drvdata(dev);
++    bce_mailbox_handle_interrupt(&bce->mbox);
++    return IRQ_HANDLED;
++}
++
++static irqreturn_t bce_handle_dma_irq(int irq, void *dev)
++{
++    int i;
++    struct apple_bce_device *bce = pci_get_drvdata(dev);
++    spin_lock(&bce->queues_lock);
++    for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++)
++        if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ)
++            bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]);
++    spin_unlock(&bce->queues_lock);
++    return IRQ_HANDLED;
++}
++
++static int bce_fw_version_handshake(struct apple_bce_device *bce)
++{
++    u64 result;
++    int status;
++
++    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION),
++            &result)))
++        return status;
++    if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION ||
++        BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) {
++        pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result));
++        return -EINVAL;
++    }
++    return 0;
++}
++
++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq)
++{
++    int status;
++    int cmd_type;
++    u64 result;
++    // OS X uses an bidirectional direction, but that's not really needed
++    dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
++    if (dma_mapping_error(&bce->pci->dev, a))
++        return -ENOMEM;
++    cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ;
++    status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result);
++    dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
++    if (status)
++        return status;
++    if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY)
++        return -EINVAL;
++    return 0;
++}
++
++static void apple_bce_remove(struct pci_dev *dev)
++{
++    struct apple_bce_device *bce = pci_get_drvdata(dev);
++    bce->is_being_removed = true;
++
++    bce_vhci_destroy(&bce->vhci);
++
++    bce_timestamp_stop(&bce->timestamp);
++#ifndef WITHOUT_NVME_PATCH
++    pci_disable_device(bce->pci0);
++#endif
++    pci_dev_put(bce->pci0);
++    pci_free_irq(dev, 0, dev);
++    pci_free_irq(dev, 4, dev);
++    bce_free_command_queues(bce);
++    pci_iounmap(dev, bce->reg_mem_mb);
++    pci_iounmap(dev, bce->reg_mem_dma);
++    device_destroy(bce_class, bce->devt);
++    pci_free_irq_vectors(dev);
++    pci_release_regions(dev);
++    pci_disable_device(dev);
++    kfree(bce);
++}
++
++static int bce_save_state_and_sleep(struct apple_bce_device *bce)
++{
++    int attempt, status = 0;
++    u64 resp;
++    dma_addr_t dma_addr;
++    void *dma_ptr = NULL;
++    size_t size = max(PAGE_SIZE, 4096UL);
++
++    for (attempt = 0; attempt < 5; ++attempt) {
++        pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size);
++        dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL);
++        if (!dma_ptr) {
++            pr_err("apple-bce: suspend failed (data alloc failed)\n");
++            break;
++        }
++        BUG_ON((dma_addr % 4096) != 0);
++        status = bce_mailbox_send(&bce->mbox,
++                BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp);
++        if (status) {
++            pr_err("apple-bce: suspend failed (mailbox send)\n");
++            break;
++        }
++        if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
++            bce->saved_data_dma_addr = dma_addr;
++            bce->saved_data_dma_ptr = dma_ptr;
++            bce->saved_data_dma_size = size;
++            return 0;
++        } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) {
++            dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
++            /* The 0x10ff magic value was extracted from Apple's driver */
++            size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1);
++            pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size);
++            continue;
++        } else {
++            pr_err("apple-bce: suspend failed (invalid device response)\n");
++            status = -EINVAL;
++            break;
++        }
++    }
++    if (dma_ptr)
++        dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
++    if (!status)
++        return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp);
++    return status;
++}
++
++static int bce_restore_state_and_wake(struct apple_bce_device *bce)
++{
++    int status;
++    u64 resp;
++    if (!bce->saved_data_dma_ptr) {
++        if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) {
++            pr_err("apple-bce: resume with no state failed (mailbox send)\n");
++            return status;
++        }
++        if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) {
++            pr_err("apple-bce: resume with no state failed (invalid device response)\n");
++            return -EINVAL;
++        }
++        return 0;
++    }
++
++    if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE,
++            (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) {
++        pr_err("apple-bce: resume with state failed (mailbox send)\n");
++        goto finish_with_state;
++    }
++    if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
++        pr_err("apple-bce: resume with state failed (invalid device response)\n");
++        status = -EINVAL;
++        goto finish_with_state;
++    }
++
++finish_with_state:
++    dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr);
++    bce->saved_data_dma_ptr = NULL;
++    return status;
++}
++
++static int apple_bce_suspend(struct device *dev)
++{
++    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
++    int status;
++
++    bce_timestamp_stop(&bce->timestamp);
++
++    if ((status = bce_save_state_and_sleep(bce)))
++        return status;
++
++    return 0;
++}
++
++static int apple_bce_resume(struct device *dev)
++{
++    struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
++    int status;
++
++    pci_set_master(bce->pci);
++    pci_set_master(bce->pci0);
++
++    if ((status = bce_restore_state_and_wake(bce)))
++        return status;
++
++    bce_timestamp_start(&bce->timestamp, false);
++
++    return 0;
++}
++
++static struct pci_device_id apple_bce_ids[  ] = {
++        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) },
++        { 0, },
++};
++
++MODULE_DEVICE_TABLE(pci, apple_bce_ids);
++
++struct dev_pm_ops apple_bce_pci_driver_pm = {
++        .suspend = apple_bce_suspend,
++        .resume = apple_bce_resume
++};
++struct pci_driver apple_bce_pci_driver = {
++        .name = "apple-bce",
++        .id_table = apple_bce_ids,
++        .probe = apple_bce_probe,
++        .remove = apple_bce_remove,
++        .driver = {
++                .pm = &apple_bce_pci_driver_pm
++        }
++};
++
++
++static int __init apple_bce_module_init(void)
++{
++    int result;
++    if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce")))
++        goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++    bce_class = class_create(THIS_MODULE, "apple-bce");
++#else
++    bce_class = class_create("apple-bce");
++#endif
++    if (IS_ERR(bce_class)) {
++        result = PTR_ERR(bce_class);
++        goto fail_class;
++    }
++    if ((result = bce_vhci_module_init())) {
++        pr_err("apple-bce: bce-vhci init failed");
++        goto fail_class;
++    }
++
++    result = pci_register_driver(&apple_bce_pci_driver);
++    if (result)
++        goto fail_drv;
++
++    aaudio_module_init();
++
++    return 0;
++
++fail_drv:
++    pci_unregister_driver(&apple_bce_pci_driver);
++fail_class:
++    class_destroy(bce_class);
++fail_chrdev:
++    unregister_chrdev_region(bce_chrdev, 1);
++    if (!result)
++        result = -EINVAL;
++    return result;
++}
++static void __exit apple_bce_module_exit(void)
++{
++    pci_unregister_driver(&apple_bce_pci_driver);
++
++    aaudio_module_exit();
++    bce_vhci_module_exit();
++    class_destroy(bce_class);
++    unregister_chrdev_region(bce_chrdev, 1);
++}
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("MrARM");
++MODULE_DESCRIPTION("Apple BCE Driver");
++MODULE_VERSION("0.01");
++module_init(apple_bce_module_init);
++module_exit(apple_bce_module_exit);
+diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h
+new file mode 100644
+index 000000000000..f13ab8d5742e
+--- /dev/null
++++ b/drivers/staging/apple-bce/apple_bce.h
+@@ -0,0 +1,38 @@
++#pragma once
++
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "mailbox.h"
++#include "queue.h"
++#include "vhci/vhci.h"
++
++#define BC_PROTOCOL_VERSION 0x20001
++#define BCE_MAX_QUEUE_COUNT 0x100
++
++#define BCE_QUEUE_USER_MIN 2
++#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1)
++
++struct apple_bce_device {
++    struct pci_dev *pci, *pci0;
++    dev_t devt;
++    struct device *dev;
++    void __iomem *reg_mem_mb;
++    void __iomem *reg_mem_dma;
++    struct bce_mailbox mbox;
++    struct bce_timestamp timestamp;
++    struct bce_queue *queues[BCE_MAX_QUEUE_COUNT];
++    struct spinlock queues_lock;
++    struct ida queue_ida;
++    struct bce_queue_cq *cmd_cq;
++    struct bce_queue_cmdq *cmd_cmdq;
++    struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT];
++    bool is_being_removed;
++
++    dma_addr_t saved_data_dma_addr;
++    void *saved_data_dma_ptr;
++    size_t saved_data_dma_size;
++
++    struct bce_vhci vhci;
++};
++
++extern struct apple_bce_device *global_bce;
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c
+new file mode 100644
+index 000000000000..bd16ddd16c1d
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/audio.c
+@@ -0,0 +1,711 @@
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/module.h>
++#include <linux/random.h>
++#include <sound/core.h>
++#include <sound/initval.h>
++#include <sound/pcm.h>
++#include <sound/jack.h>
++#include "audio.h"
++#include "pcm.h"
++#include <linux/version.h>
++
++static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1;
++static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1;
++
++static dev_t aaudio_chrdev;
++static struct class *aaudio_class;
++
++static int aaudio_init_cmd(struct aaudio_device *a);
++static int aaudio_init_bs(struct aaudio_device *a);
++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id);
++static void aaudio_free_dev(struct aaudio_subdevice *sdev);
++
++static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id)
++{
++    struct aaudio_device *aaudio = NULL;
++    struct aaudio_subdevice *sdev = NULL;
++    int status = 0;
++    u32 cfg;
++
++    pr_info("aaudio: capturing our device\n");
++
++    if (pci_enable_device(dev))
++        return -ENODEV;
++    if (pci_request_regions(dev, "aaudio")) {
++        status = -ENODEV;
++        goto fail;
++    }
++    pci_set_master(dev);
++
++    aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL);
++    if (!aaudio) {
++        status = -ENOMEM;
++        goto fail;
++    }
++
++    aaudio->bce = global_bce;
++    if (!aaudio->bce) {
++        dev_warn(&dev->dev, "aaudio: No BCE available\n");
++        status = -EINVAL;
++        goto fail;
++    }
++
++    aaudio->pci = dev;
++    pci_set_drvdata(dev, aaudio);
++
++    aaudio->devt = aaudio_chrdev;
++    aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio");
++    if (IS_ERR_OR_NULL(aaudio->dev)) {
++        status = PTR_ERR(aaudio_class);
++        goto fail;
++    }
++    device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER);
++
++    init_completion(&aaudio->remote_alive);
++    INIT_LIST_HEAD(&aaudio->subdevice_list);
++
++    /* Init: set an unknown flag in the bitset */
++    if (pci_read_config_dword(dev, 4, &cfg))
++        dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n");
++    if (pci_write_config_dword(dev, 4, cfg | 6u))
++        dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n");
++
++    dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0));
++    aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0);
++    aaudio->reg_mem_bs = pci_iomap(dev, 0, 0);
++    aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0);
++
++    aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000);
++
++    if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) {
++        dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n");
++        goto fail;
++    }
++
++    if (aaudio_bce_init(aaudio)) {
++        dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n");
++        goto fail;
++    }
++
++    if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) {
++        dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n");
++        goto fail;
++    }
++
++    strcpy(aaudio->card->shortname, "Apple T2 Audio");
++    strcpy(aaudio->card->longname, "Apple T2 Audio");
++    strcpy(aaudio->card->mixername, "Apple T2 Audio");
++    /* Dynamic alsa ids start at 100 */
++    aaudio->next_alsa_id = 100;
++
++    if (aaudio_init_cmd(aaudio)) {
++        dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n");
++        goto fail_snd;
++    }
++
++    if (aaudio_init_bs(aaudio)) {
++        dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n");
++        goto fail_snd;
++    }
++
++    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
++        dev_err(&dev->dev, "Failed to set remote access\n");
++        return status;
++    }
++
++    if (snd_card_register(aaudio->card)) {
++        dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n");
++        goto fail_snd;
++    }
++
++    list_for_each_entry(sdev, &aaudio->subdevice_list, list) {
++        struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id];
++
++        if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) {
++            struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc;
++
++            snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min);
++        }
++    }
++
++    return 0;
++
++fail_snd:
++    snd_card_free(aaudio->card);
++fail:
++    if (aaudio && aaudio->dev)
++        device_destroy(aaudio_class, aaudio->devt);
++    kfree(aaudio);
++
++    if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs))
++        pci_iounmap(dev, aaudio->reg_mem_bs);
++    if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg))
++        pci_iounmap(dev, aaudio->reg_mem_cfg);
++
++    pci_release_regions(dev);
++    pci_disable_device(dev);
++
++    if (!status)
++        status = -EINVAL;
++    return status;
++}
++
++
++
++static void aaudio_remove(struct pci_dev *dev)
++{
++    struct aaudio_subdevice *sdev;
++    struct aaudio_device *aaudio = pci_get_drvdata(dev);
++
++    snd_card_free(aaudio->card);
++    while (!list_empty(&aaudio->subdevice_list)) {
++        sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list);
++        list_del(&sdev->list);
++        aaudio_free_dev(sdev);
++    }
++    pci_iounmap(dev, aaudio->reg_mem_bs);
++    pci_iounmap(dev, aaudio->reg_mem_cfg);
++    device_destroy(aaudio_class, aaudio->devt);
++    pci_free_irq_vectors(dev);
++    pci_release_regions(dev);
++    pci_disable_device(dev);
++    kfree(aaudio);
++}
++
++static int aaudio_suspend(struct device *dev)
++{
++    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
++
++    if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF))
++        dev_warn(aaudio->dev, "Failed to reset remote access\n");
++
++    pci_disable_device(aaudio->pci);
++    return 0;
++}
++
++static int aaudio_resume(struct device *dev)
++{
++    int status;
++    struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
++
++    if ((status = pci_enable_device(aaudio->pci)))
++        return status;
++    pci_set_master(aaudio->pci);
++
++    if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
++        dev_err(aaudio->dev, "Failed to set remote access\n");
++        return status;
++    }
++
++    return 0;
++}
++
++static int aaudio_init_cmd(struct aaudio_device *a)
++{
++    int status;
++    struct aaudio_send_ctx sctx;
++    struct aaudio_msg buf;
++    u64 dev_cnt, dev_i;
++    aaudio_device_id_t *dev_l;
++
++    if ((status = aaudio_send(a, &sctx, 500,
++                              aaudio_msg_write_alive_notification, 1, 3))) {
++        dev_err(a->dev, "Sending alive notification failed\n");
++        return status;
++    }
++
++    if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) {
++        dev_err(a->dev, "Timed out waiting for remote\n");
++        return -ETIMEDOUT;
++    }
++    dev_info(a->dev, "Continuing init\n");
++
++    buf = aaudio_reply_alloc();
++    if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) {
++        dev_err(a->dev, "Failed to get device list\n");
++        aaudio_reply_free(&buf);
++        return status;
++    }
++    for (dev_i = 0; dev_i < dev_cnt; ++dev_i)
++        aaudio_init_dev(a, dev_l[dev_i]);
++    aaudio_reply_free(&buf);
++
++    return 0;
++}
++
++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm);
++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev);
++
++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id)
++{
++    struct aaudio_subdevice *sdev;
++    struct aaudio_msg buf = aaudio_reply_alloc();
++    u64 uid_len, stream_cnt, i;
++    aaudio_object_id_t *stream_list;
++    char *uid;
++
++    sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL);
++
++    if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0),
++            NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) {
++        dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id);
++        goto fail;
++    }
++    dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid);
++
++    sdev->a = a;
++    INIT_LIST_HEAD(&sdev->list);
++    sdev->dev_id = dev_id;
++    sdev->buf_id = AAUDIO_BUFFER_ID_NONE;
++    strncpy(sdev->uid, uid, uid_len);
++    sdev->uid[uid_len + 1] = '\0';
++
++    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
++            AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32)))
++        dev_warn(a->dev, "Failed to query device input latency\n");
++    if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
++            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32)))
++        dev_warn(a->dev, "Failed to query device output latency\n");
++
++    if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
++        dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id);
++        goto fail;
++    }
++    if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) {
++        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
++                sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS);
++        stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS;
++    }
++    sdev->in_stream_cnt = stream_cnt;
++    for (i = 0; i < stream_cnt; i++) {
++        sdev->in_streams[i].id = stream_list[i];
++        sdev->in_streams[i].buffer_cnt = 0;
++        aaudio_init_stream_info(sdev, &sdev->in_streams[i]);
++        sdev->in_streams[i].latency += sdev->in_latency;
++    }
++
++    if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
++        dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id);
++        goto fail;
++    }
++    if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) {
++        dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
++                 sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS);
++        stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS;
++    }
++    sdev->out_stream_cnt = stream_cnt;
++    for (i = 0; i < stream_cnt; i++) {
++        sdev->out_streams[i].id = stream_list[i];
++        sdev->out_streams[i].buffer_cnt = 0;
++        aaudio_init_stream_info(sdev, &sdev->out_streams[i]);
++        sdev->out_streams[i].latency += sdev->in_latency;
++    }
++
++    if (sdev->is_pcm)
++        aaudio_create_pcm(sdev);
++    /* Headphone Jack status */
++    if (!strcmp(sdev->uid, "Codec Output")) {
++        if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false))
++            dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid);
++        aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id,
++                AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0));
++        aaudio_handle_jack_connection_change(sdev);
++    }
++
++    aaudio_reply_free(&buf);
++
++    list_add_tail(&sdev->list, &a->subdevice_list);
++    return;
++
++fail:
++    aaudio_reply_free(&buf);
++    kfree(sdev);
++}
++
++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm)
++{
++    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
++            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0,
++            &strm->desc, sizeof(strm->desc)))
++        dev_warn(sdev->a->dev, "Failed to query stream descriptor\n");
++    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
++            AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32)))
++        dev_warn(sdev->a->dev, "Failed to query stream latency\n");
++    if (strm->desc.format_id == AAUDIO_FORMAT_LPCM)
++        sdev->is_pcm = true;
++}
++
++static void aaudio_free_dev(struct aaudio_subdevice *sdev)
++{
++    size_t i;
++    for (i = 0; i < sdev->in_stream_cnt; i++) {
++        if (sdev->in_streams[i].alsa_hw_desc)
++            kfree(sdev->in_streams[i].alsa_hw_desc);
++        if (sdev->in_streams[i].buffers)
++            kfree(sdev->in_streams[i].buffers);
++    }
++    for (i = 0; i < sdev->out_stream_cnt; i++) {
++        if (sdev->out_streams[i].alsa_hw_desc)
++            kfree(sdev->out_streams[i].alsa_hw_desc);
++        if (sdev->out_streams[i].buffers)
++            kfree(sdev->out_streams[i].buffers);
++    }
++    kfree(sdev);
++}
++
++static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id)
++{
++    struct aaudio_subdevice *sdev;
++    list_for_each_entry(sdev, &a->subdevice_list, list) {
++        if (dev_id == sdev->dev_id)
++            return sdev;
++    }
++    return NULL;
++}
++
++static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid)
++{
++    struct aaudio_subdevice *sdev;
++    list_for_each_entry(sdev, &a->subdevice_list, list) {
++        if (!strcmp(uid, sdev->uid))
++            return sdev;
++    }
++    return NULL;
++}
++
++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
++        struct aaudio_buffer_struct_stream *bs_strm);
++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
++        struct aaudio_buffer_struct_stream *bs_strm);
++
++static int aaudio_init_bs(struct aaudio_device *a)
++{
++    int i, j;
++    struct aaudio_buffer_struct_device *dev;
++    struct aaudio_subdevice *sdev;
++    u32 ver, sig, bs_base;
++
++    ver = ioread32(&a->reg_mem_gpr[0]);
++    if (ver < 3) {
++        dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver);
++        return -EINVAL;
++    }
++    sig = ioread32(&a->reg_mem_gpr[1]);
++    if (sig != AAUDIO_SIG) {
++        dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig);
++        return -EINVAL;
++    }
++    bs_base = ioread32(&a->reg_mem_gpr[2]);
++    a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base);
++    if (a->bs->signature != AAUDIO_SIG) {
++        dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature);
++        return -EINVAL;
++    }
++    dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version);
++    dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices);
++    for (i = 0; i < a->bs->num_devices; i++) {
++        dev = &a->bs->devices[i];
++        dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name);
++
++        sdev = aaudio_find_dev_by_uid(a, dev->name);
++        if (!sdev) {
++            dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name);
++            continue;
++        }
++        sdev->buf_id = (u8) i;
++        dev->num_input_streams = 0;
++        for (j = 0; j < dev->num_output_streams; j++) {
++            dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j,
++                     dev->output_streams[j].num_buffers);
++            if (j < sdev->out_stream_cnt)
++                aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]);
++        }
++    }
++
++    list_for_each_entry(sdev, &a->subdevice_list, list) {
++        if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE)
++            continue;
++        sdev->buf_id = i;
++        dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid);
++        strcpy(a->bs->devices[i].name, sdev->uid);
++        a->bs->devices[i].num_input_streams = 0;
++        a->bs->devices[i].num_output_streams = 0;
++        a->bs->num_devices = ++i;
++    }
++    list_for_each_entry(sdev, &a->subdevice_list, list) {
++        if (sdev->in_stream_cnt == 1) {
++            dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id);
++            aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]);
++            a->bs->devices[sdev->buf_id].num_input_streams = 1;
++            wmb();
++
++            if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) {
++                dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n");
++            }
++        }
++    }
++
++    return 0;
++}
++
++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
++                                  struct aaudio_buffer_struct_stream *bs_strm)
++{
++    size_t i;
++    strm->buffer_cnt = bs_strm->num_buffers;
++    if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) {
++        dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers,
++                AAUDIO_DEIVCE_MAX_BUFFER_COUNT);
++        strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT;
++    }
++    if (!strm->buffer_cnt)
++        return;
++    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
++    if (!strm->buffers) {
++        dev_err(a->dev, "Buffer list allocation failed\n");
++        return;
++    }
++    for (i = 0; i < strm->buffer_cnt; i++) {
++        strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address;
++        strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address;
++        strm->buffers[i].size = bs_strm->buffers[i].size;
++    }
++
++    if (strm->buffer_cnt == 1) {
++        strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
++        if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
++            kfree(strm->alsa_hw_desc);
++            strm->alsa_hw_desc = NULL;
++        }
++    }
++}
++
++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
++        struct aaudio_buffer_struct_stream *bs_strm)
++{
++    size_t size;
++    dma_addr_t dma_addr;
++    void *dma_ptr;
++    size = strm->desc.bytes_per_packet * 16640;
++    dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL);
++    if (!dma_ptr) {
++        dev_err(a->dev, "dma_alloc_coherent failed\n");
++        return;
++    }
++    bs_strm->buffers[0].address = dma_addr;
++    bs_strm->buffers[0].size = size;
++    bs_strm->num_buffers = 1;
++
++    memset(dma_ptr, 0, size);
++
++    strm->buffer_cnt = 1;
++    strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
++    if (!strm->buffers) {
++        dev_err(a->dev, "Buffer list allocation failed\n");
++        return;
++    }
++    strm->buffers[0].dma_addr = dma_addr;
++    strm->buffers[0].ptr = dma_ptr;
++    strm->buffers[0].size = size;
++
++    strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
++    if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
++        kfree(strm->alsa_hw_desc);
++        strm->alsa_hw_desc = NULL;
++    }
++}
++
++static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg);
++
++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++    struct aaudio_send_ctx sctx;
++    struct aaudio_msg_base base;
++    if (aaudio_msg_read_base(msg, &base))
++        return;
++    switch (base.msg) {
++        case AAUDIO_MSG_NOTIFICATION_BOOT:
++            dev_info(a->dev, "Received boot notification from remote\n");
++
++            /* Resend the alive notify */
++            if (aaudio_send(a, &sctx, 500,
++                    aaudio_msg_write_alive_notification, 1, 3)) {
++                pr_err("Sending alive notification failed\n");
++            }
++            break;
++        case AAUDIO_MSG_NOTIFICATION_ALIVE:
++            dev_info(a->dev, "Received alive notification from remote\n");
++            complete_all(&a->remote_alive);
++            break;
++        case AAUDIO_MSG_PROPERTY_CHANGED:
++            aaudio_handle_prop_change(a, msg);
++            break;
++        default:
++            dev_info(a->dev, "Unhandled notification %i", base.msg);
++            break;
++    }
++}
++
++struct aaudio_prop_change_work_struct {
++    struct work_struct ws;
++    struct aaudio_device *a;
++    aaudio_device_id_t dev;
++    aaudio_object_id_t obj;
++    struct aaudio_prop_addr prop;
++};
++
++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev)
++{
++    u32 plugged;
++    if (!sdev->jack)
++        return;
++    /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just
++     * always pick the OUTPUT status. */
++    if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id,
++            AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) {
++        dev_err(sdev->a->dev, "Failed to get jack enable status\n");
++        return;
++    }
++    dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged");
++    snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0);
++}
++
++void aaudio_handle_prop_change_work(struct work_struct *ws)
++{
++    struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws);
++    struct aaudio_subdevice *sdev;
++
++    sdev = aaudio_find_dev_by_dev_id(work->a, work->dev);
++    if (!sdev) {
++        dev_err(work->a->dev, "Property notification change: device not found\n");
++        goto done;
++    }
++    dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid);
++
++    if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) {
++        aaudio_handle_jack_connection_change(sdev);
++    }
++
++done:
++    kfree(work);
++}
++
++void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++    /* NOTE: This is a scheduled work because this callback will generally need to query device information and this
++     * is not possible when we are in the reply parsing code's context. */
++    struct aaudio_prop_change_work_struct *work;
++    work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL);
++    work->a = a;
++    INIT_WORK(&work->ws, aaudio_handle_prop_change_work);
++    aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop);
++    schedule_work(&work->ws);
++}
++
++#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \
++    if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \
++        pr_err("aaudio: Failed to reply to a command\n");
++
++void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++    ktime_t time_os = ktime_get_boottime();
++    struct aaudio_send_ctx sctx;
++    struct aaudio_subdevice *sdev;
++    u64 devid, timestamp, update_seed;
++    aaudio_msg_read_update_timestamp(msg, &devid, &timestamp, &update_seed);
++    dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed);
++
++    sdev = aaudio_find_dev_by_dev_id(a, devid);
++    aaudio_handle_timestamp(sdev, time_os, timestamp);
++
++    aaudio_send_cmd_response(a, &sctx, msg,
++            aaudio_msg_write_update_timestamp_response);
++}
++
++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++    struct aaudio_msg_base base;
++    if (aaudio_msg_read_base(msg, &base))
++        return;
++    switch (base.msg) {
++        case AAUDIO_MSG_UPDATE_TIMESTAMP:
++            aaudio_handle_cmd_timestamp(a, msg);
++            break;
++        default:
++            dev_info(a->dev, "Unhandled device command %i", base.msg);
++            break;
++    }
++}
++
++static struct pci_device_id aaudio_ids[  ] = {
++        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) },
++        { 0, },
++};
++
++struct dev_pm_ops aaudio_pci_driver_pm = {
++        .suspend = aaudio_suspend,
++        .resume = aaudio_resume
++};
++struct pci_driver aaudio_pci_driver = {
++        .name = "aaudio",
++        .id_table = aaudio_ids,
++        .probe = aaudio_probe,
++        .remove = aaudio_remove,
++        .driver = {
++                .pm = &aaudio_pci_driver_pm
++        }
++};
++
++
++int aaudio_module_init(void)
++{
++    int result;
++    if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio")))
++        goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++    aaudio_class = class_create(THIS_MODULE, "aaudio");
++#else
++    aaudio_class = class_create("aaudio");
++#endif
++    if (IS_ERR(aaudio_class)) {
++        result = PTR_ERR(aaudio_class);
++        goto fail_class;
++    }
++    
++    result = pci_register_driver(&aaudio_pci_driver);
++    if (result)
++        goto fail_drv;
++    return 0;
++
++fail_drv:
++    pci_unregister_driver(&aaudio_pci_driver);
++fail_class:
++    class_destroy(aaudio_class);
++fail_chrdev:
++    unregister_chrdev_region(aaudio_chrdev, 1);
++    if (!result)
++        result = -EINVAL;
++    return result;
++}
++
++void aaudio_module_exit(void)
++{
++    pci_unregister_driver(&aaudio_pci_driver);
++    class_destroy(aaudio_class);
++    unregister_chrdev_region(aaudio_chrdev, 1);
++}
++
++struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = {
++        {"Speaker", 0},
++        {"Digital Mic", 1},
++        {"Codec Output", 2},
++        {"Codec Input", 3},
++        {"Bridge Loopback", 4},
++        {}
++};
++
++module_param_named(index, aaudio_alsa_index, int, 0444);
++MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard.");
++module_param_named(id, aaudio_alsa_id, charp, 0444);
++MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard.");
+diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h
+new file mode 100644
+index 000000000000..004bc1e22ea4
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/audio.h
+@@ -0,0 +1,125 @@
++#ifndef AAUDIO_H
++#define AAUDIO_H
++
++#include <linux/types.h>
++#include <sound/pcm.h>
++#include "../apple_bce.h"
++#include "protocol_bce.h"
++#include "description.h"
++
++#define AAUDIO_SIG 0x19870423
++
++#define AAUDIO_DEVICE_MAX_UID_LEN 128
++#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1
++#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1
++#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1
++
++#define AAUDIO_BUFFER_ID_NONE 0xffu
++
++struct snd_card;
++struct snd_pcm;
++struct snd_pcm_hardware;
++struct snd_jack;
++
++struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer {
++    size_t address;
++    size_t size;
++    size_t pad[4];
++};
++struct aaudio_buffer_struct_stream {
++    u8 num_buffers;
++    struct aaudio_buffer_struct_buffer buffers[100];
++    char filler[32];
++};
++struct aaudio_buffer_struct_device {
++    char name[128];
++    u8 num_input_streams;
++    u8 num_output_streams;
++    struct aaudio_buffer_struct_stream input_streams[5];
++    struct aaudio_buffer_struct_stream output_streams[5];
++    char filler[128];
++};
++struct aaudio_buffer_struct {
++    u32 version;
++    u32 signature;
++    u32 flags;
++    u8 num_devices;
++    struct aaudio_buffer_struct_device devices[20];
++};
++
++struct aaudio_device;
++struct aaudio_dma_buf {
++    dma_addr_t dma_addr;
++    void *ptr;
++    size_t size;
++};
++struct aaudio_stream {
++    aaudio_object_id_t id;
++    size_t buffer_cnt;
++    struct aaudio_dma_buf *buffers;
++
++    struct aaudio_apple_description desc;
++    struct snd_pcm_hardware *alsa_hw_desc;
++    u32 latency;
++
++    bool waiting_for_first_ts;
++
++    ktime_t remote_timestamp;
++    snd_pcm_sframes_t frame_min;
++    int started;
++};
++struct aaudio_subdevice {
++    struct aaudio_device *a;
++    struct list_head list;
++    aaudio_device_id_t dev_id;
++    u32 in_latency, out_latency;
++    u8 buf_id;
++    int alsa_id;
++    char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1];
++    size_t in_stream_cnt;
++    struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS];
++    size_t out_stream_cnt;
++    struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS];
++    bool is_pcm;
++    struct snd_pcm *pcm;
++    struct snd_jack *jack;
++};
++struct aaudio_alsa_pcm_id_mapping {
++    const char *name;
++    int alsa_id;
++};
++
++struct aaudio_device {
++    struct pci_dev *pci;
++    dev_t devt;
++    struct device *dev;
++    void __iomem *reg_mem_bs;
++    dma_addr_t reg_mem_bs_dma;
++    void __iomem *reg_mem_cfg;
++
++    u32 __iomem *reg_mem_gpr;
++
++    struct aaudio_buffer_struct *bs;
++
++    struct apple_bce_device *bce;
++    struct aaudio_bce bcem;
++
++    struct snd_card *card;
++
++    struct list_head subdevice_list;
++    int next_alsa_id;
++
++    struct completion remote_alive;
++};
++
++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg);
++void aaudio_handle_prop_change_work(struct work_struct *ws);
++void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg);
++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg);
++
++int aaudio_module_init(void);
++void aaudio_module_exit(void);
++
++extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[];
++
++#endif //AAUDIO_H
+diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h
+new file mode 100644
+index 000000000000..dfef3ab68f27
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/description.h
+@@ -0,0 +1,42 @@
++#ifndef AAUDIO_DESCRIPTION_H
++#define AAUDIO_DESCRIPTION_H
++
++#include <linux/types.h>
++
++struct aaudio_apple_description {
++    u64 sample_rate_double;
++    u32 format_id;
++    u32 format_flags;
++    u32 bytes_per_packet;
++    u32 frames_per_packet;
++    u32 bytes_per_frame;
++    u32 channels_per_frame;
++    u32 bits_per_channel;
++    u32 reserved;
++};
++
++enum {
++    AAUDIO_FORMAT_LPCM = 0x6c70636d  // 'lpcm'
++};
++
++enum {
++    AAUDIO_FORMAT_FLAG_FLOAT = 1,
++    AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2,
++    AAUDIO_FORMAT_FLAG_SIGNED = 4,
++    AAUDIO_FORMAT_FLAG_PACKED = 8,
++    AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16,
++    AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32,
++    AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64
++};
++
++static inline u64 aaudio_double_to_u64(u64 d)
++{
++    u8 sign = (u8) ((d >> 63) & 1);
++    s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023;
++    u64 fr = d & ((1LL << 52) - 1);
++    if (sign || exp < 0)
++        return 0;
++    return (u64) ((1LL << exp) + (fr >> (52 - exp)));
++}
++
++#endif //AAUDIO_DESCRIPTION_H
+diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c
+new file mode 100644
+index 000000000000..1026e10a9ac5
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/pcm.c
+@@ -0,0 +1,308 @@
++#include "pcm.h"
++#include "audio.h"
++
++static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc)
++{
++    if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) {
++        if (desc->bits_per_channel == 32) {
++            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
++                return SNDRV_PCM_FMTBIT_FLOAT_BE;
++            else
++                return SNDRV_PCM_FMTBIT_FLOAT_LE;
++        } else if (desc->bits_per_channel == 64) {
++            if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
++                return SNDRV_PCM_FMTBIT_FLOAT64_BE;
++            else
++                return SNDRV_PCM_FMTBIT_FLOAT64_LE;
++        } else {
++            pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel);
++            return 0;
++        }
++    }
++#define DEFINE_BPC_OPTION(val, b) \
++    case val: \
++        if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \
++            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
++                return SNDRV_PCM_FMTBIT_S ## b ## BE; \
++            else \
++                return SNDRV_PCM_FMTBIT_U ## b ## BE; \
++        } else { \
++            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
++                return SNDRV_PCM_FMTBIT_S ## b ## LE; \
++            else \
++                return SNDRV_PCM_FMTBIT_U ## b ## LE; \
++        }
++    if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) {
++        switch (desc->bits_per_channel) {
++            case 8:
++            case 16:
++            case 32:
++                break;
++            DEFINE_BPC_OPTION(24, 24_3)
++            default:
++                pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel);
++                return 0;
++        }
++    }
++    if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) {
++        switch (desc->bits_per_channel) {
++            DEFINE_BPC_OPTION(24, 32_)
++            default:
++                pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel);
++                return 0;
++        }
++    }
++    switch (desc->bits_per_channel) {
++        case 8:
++            if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED)
++                return SNDRV_PCM_FMTBIT_S8;
++            else
++                return SNDRV_PCM_FMTBIT_U8;
++        DEFINE_BPC_OPTION(16, 16_)
++        DEFINE_BPC_OPTION(24, 24_)
++        DEFINE_BPC_OPTION(32, 32_)
++        default:
++            pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel);
++            return 0;
++    }
++}
++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw,
++        size_t buf_size)
++{
++    uint rate;
++    alsa_hw->info = (SNDRV_PCM_INFO_MMAP |
++                     SNDRV_PCM_INFO_BLOCK_TRANSFER |
++                     SNDRV_PCM_INFO_MMAP_VALID |
++                     SNDRV_PCM_INFO_DOUBLE);
++    if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE)
++        pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n");
++    if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED))
++        alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED;
++    alsa_hw->formats = aaudio_get_alsa_fmtbit(desc);
++    if (!alsa_hw->formats)
++        return -EINVAL;
++    rate = (uint) aaudio_double_to_u64(desc->sample_rate_double);
++    alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate);
++    alsa_hw->rate_min = rate;
++    alsa_hw->rate_max = rate;
++    alsa_hw->channels_min = desc->channels_per_frame;
++    alsa_hw->channels_max = desc->channels_per_frame;
++    alsa_hw->buffer_bytes_max = buf_size;
++    alsa_hw->period_bytes_min = desc->bytes_per_packet;
++    alsa_hw->period_bytes_max = desc->bytes_per_packet;
++    alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet);
++    alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet);
++    pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n",
++            alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min,
++            alsa_hw->period_bytes_min);
++    return 0;
++}
++
++static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream)
++{
++    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++    if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
++        return &sdev->out_streams[substream->number];
++    else
++        return &sdev->in_streams[substream->number];
++}
++
++static int aaudio_pcm_open(struct snd_pcm_substream *substream)
++{
++    pr_debug("aaudio_pcm_open\n");
++    substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc;
++
++    return 0;
++}
++
++static int aaudio_pcm_close(struct snd_pcm_substream *substream)
++{
++    pr_debug("aaudio_pcm_close\n");
++    return 0;
++}
++
++static int aaudio_pcm_prepare(struct snd_pcm_substream *substream)
++{
++    return 0;
++}
++
++static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params)
++{
++    struct aaudio_stream *astream = aaudio_pcm_stream(substream);
++    pr_debug("aaudio_pcm_hw_params\n");
++
++    if (!astream->buffer_cnt || !astream->buffers)
++        return -EINVAL;
++
++    substream->runtime->dma_area = astream->buffers[0].ptr;
++    substream->runtime->dma_addr = astream->buffers[0].dma_addr;
++    substream->runtime->dma_bytes = astream->buffers[0].size;
++    return 0;
++}
++
++static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream)
++{
++    pr_debug("aaudio_pcm_hw_free\n");
++    return 0;
++}
++
++static void aaudio_pcm_start(struct snd_pcm_substream *substream)
++{
++    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++    void *buf;
++    size_t s;
++    ktime_t time_start, time_end;
++    bool back_buffer;
++    time_start = ktime_get();
++
++    back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
++
++    if (back_buffer) {
++        s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr);
++        buf = kmalloc(s, GFP_KERNEL);
++        memcpy_fromio(buf, substream->runtime->dma_area, s);
++        time_end = ktime_get();
++        pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start),
++                substream->runtime->control->appl_ptr);
++    }
++
++    stream->waiting_for_first_ts = true;
++    stream->frame_min = stream->latency;
++
++    aaudio_cmd_start_io(sdev->a, sdev->dev_id);
++    if (back_buffer)
++        memcpy_toio(substream->runtime->dma_area, buf, s);
++
++    time_end = ktime_get();
++    pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start));
++}
++
++static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
++{
++    struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++    pr_debug("aaudio_pcm_trigger %x\n", cmd);
++
++    /* We only supports triggers on the #0 buffer */
++    if (substream->number != 0)
++        return 0;
++    switch (cmd) {
++        case SNDRV_PCM_TRIGGER_START:
++            aaudio_pcm_start(substream);
++            stream->started = 1;
++            break;
++        case SNDRV_PCM_TRIGGER_STOP:
++            aaudio_cmd_stop_io(sdev->a, sdev->dev_id);
++            stream->started = 0;
++            break;
++        default:
++            return -EINVAL;
++    }
++    return 0;
++}
++
++static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream)
++{
++    struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++    ktime_t time_from_start;
++    snd_pcm_sframes_t frames;
++    snd_pcm_sframes_t buffer_time_length;
++
++    if (!stream->started || stream->waiting_for_first_ts) {
++        pr_warn("aaudio_pcm_pointer while not started\n");
++        return 0;
++    }
++
++    /* Approximate the pointer based on the last received timestamp */
++    time_from_start = ktime_get_boottime() - stream->remote_timestamp;
++    buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate;
++    frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length;
++    if (ktime_to_ns(time_from_start) < buffer_time_length) {
++        if (frames < stream->frame_min)
++            frames = stream->frame_min;
++        else
++            stream->frame_min = 0;
++    } else {
++        if (ktime_to_ns(time_from_start) < 2 * buffer_time_length)
++            stream->frame_min = frames;
++        else
++            stream->frame_min = 0; /* Heavy desync */
++    }
++    frames -= stream->latency;
++    if (frames < 0)
++        frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size;
++    return (snd_pcm_uframes_t) frames;
++}
++
++static struct snd_pcm_ops aaudio_pcm_ops = {
++        .open =        aaudio_pcm_open,
++        .close =       aaudio_pcm_close,
++        .ioctl =       snd_pcm_lib_ioctl,
++        .hw_params =   aaudio_pcm_hw_params,
++        .hw_free =     aaudio_pcm_hw_free,
++        .prepare =     aaudio_pcm_prepare,
++        .trigger =     aaudio_pcm_trigger,
++        .pointer =     aaudio_pcm_pointer,
++        .mmap    =     snd_pcm_lib_mmap_iomem
++};
++
++int aaudio_create_pcm(struct aaudio_subdevice *sdev)
++{
++    struct snd_pcm *pcm;
++    struct aaudio_alsa_pcm_id_mapping *id_mapping;
++    int err;
++
++    if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) {
++        return -EINVAL;
++    }
++
++    for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) {
++        if (!strcmp(sdev->uid, id_mapping->name)) {
++            sdev->alsa_id = id_mapping->alsa_id;
++            break;
++        }
++    }
++    if (!id_mapping->name)
++        sdev->alsa_id = sdev->a->next_alsa_id++;
++    err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id,
++            (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm);
++    if (err < 0)
++        return err;
++    pcm->private_data = sdev;
++    pcm->nonatomic = 1;
++    sdev->pcm = pcm;
++    strcpy(pcm->name, sdev->uid);
++    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops);
++    snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops);
++    return 0;
++}
++
++static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp)
++{
++    unsigned long flags;
++    struct aaudio_stream *stream;
++
++    stream = aaudio_pcm_stream(substream);
++    snd_pcm_stream_lock_irqsave(substream, flags);
++    stream->remote_timestamp = timestamp;
++    if (stream->waiting_for_first_ts) {
++        stream->waiting_for_first_ts = false;
++        snd_pcm_stream_unlock_irqrestore(substream, flags);
++        return;
++    }
++    snd_pcm_stream_unlock_irqrestore(substream, flags);
++    snd_pcm_period_elapsed(substream);
++}
++
++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp)
++{
++    struct snd_pcm_substream *substream;
++
++    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
++    if (substream)
++        aaudio_handle_stream_timestamp(substream, dev_timestamp);
++    substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
++    if (substream)
++        aaudio_handle_stream_timestamp(substream, os_timestamp);
++}
+diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h
+new file mode 100644
+index 000000000000..ea5f35fbe408
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/pcm.h
+@@ -0,0 +1,16 @@
++#ifndef AAUDIO_PCM_H
++#define AAUDIO_PCM_H
++
++#include <linux/types.h>
++#include <linux/ktime.h>
++
++struct aaudio_subdevice;
++struct aaudio_apple_description;
++struct snd_pcm_hardware;
++
++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size);
++int aaudio_create_pcm(struct aaudio_subdevice *sdev);
++
++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp);
++
++#endif //AAUDIO_PCM_H
+diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c
+new file mode 100644
+index 000000000000..2314813aeead
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol.c
+@@ -0,0 +1,347 @@
++#include "protocol.h"
++#include "protocol_bce.h"
++#include "audio.h"
++
++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base)
++{
++    if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2)
++        return -EINVAL;
++    *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1));
++    return 0;
++}
++
++#define READ_START(type) \
++    size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \
++    if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \
++        return -EINVAL;
++#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id
++#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); })
++#define READ_VAR(type, var) *var = READ_VAL(type)
++
++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg)
++{
++    READ_START(AAUDIO_MSG_START_IO_RESPONSE);
++    return 0;
++}
++
++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg)
++{
++    READ_START(AAUDIO_MSG_STOP_IO_RESPONSE);
++    return 0;
++}
++
++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
++        u64 *timestamp, u64 *update_seed)
++{
++    READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP);
++    READ_DEVID_VAR(devid);
++    READ_VAR(u64, timestamp);
++    READ_VAR(u64, update_seed);
++    return 0;
++}
++
++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop, void **data, u64 *data_size)
++{
++    READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE);
++    READ_VAR(aaudio_object_id_t, obj);
++    READ_VAR(u32, &prop->element);
++    READ_VAR(u32, &prop->scope);
++    READ_VAR(u32, &prop->selector);
++    READ_VAR(u64, data_size);
++    *data = ((u8 *) msg->data + offset);
++    /* offset += data_size; */
++    return 0;
++}
++
++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj)
++{
++    READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE);
++    READ_VAR(aaudio_object_id_t, obj);
++    return 0;
++}
++
++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop)
++{
++    READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE);
++    READ_VAR(aaudio_object_id_t, obj);
++    READ_VAR(u32, &prop->element);
++    READ_VAR(u32, &prop->scope);
++    READ_VAR(u32, &prop->selector);
++    return 0;
++}
++
++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop)
++{
++    READ_START(AAUDIO_MSG_PROPERTY_CHANGED);
++    READ_DEVID_VAR(devid);
++    READ_VAR(aaudio_object_id_t, obj);
++    READ_VAR(u32, &prop->element);
++    READ_VAR(u32, &prop->scope);
++    READ_VAR(u32, &prop->selector);
++    return 0;
++}
++
++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg)
++{
++    READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE);
++    return 0;
++}
++
++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++    READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE);
++    READ_VAR(u64, str_cnt);
++    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
++    return 0;
++}
++
++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++    READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE);
++    READ_VAR(u64, str_cnt);
++    *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++    /* offset += str_cnt * sizeof(aaudio_object_id_t); */
++    return 0;
++}
++
++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg)
++{
++    READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE);
++    return 0;
++}
++
++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt)
++{
++    READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE);
++    READ_VAR(u64, dev_cnt);
++    *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++    /* offset += dev_cnt * sizeof(aaudio_device_id_t); */
++    return 0;
++}
++
++#define WRITE_START_OF_TYPE(typev, devid) \
++    size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \
++    ((struct aaudio_msg_header *) msg->data)->type = (typev); \
++    ((struct aaudio_msg_header *) msg->data)->device_id = (devid);
++#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid)
++#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0)
++#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0)
++#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); }
++#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; }
++#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0)
++#define WRITE_END() { msg->size = offset; }
++
++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
++{
++    WRITE_START_COMMAND(dev);
++    WRITE_BASE(AAUDIO_MSG_START_IO);
++    WRITE_END();
++}
++
++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
++{
++    WRITE_START_COMMAND(dev);
++    WRITE_BASE(AAUDIO_MSG_STOP_IO);
++    WRITE_END();
++}
++
++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size)
++{
++    WRITE_START_COMMAND(dev);
++    WRITE_BASE(AAUDIO_MSG_GET_PROPERTY);
++    WRITE_VAL(aaudio_object_id_t, obj);
++    WRITE_VAL(u32, prop.element);
++    WRITE_VAL(u32, prop.scope);
++    WRITE_VAL(u32, prop.selector);
++    WRITE_VAL(u64, qualifier_size);
++    WRITE_BIN(qualifier, qualifier_size);
++    WRITE_END();
++}
++
++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size)
++{
++    WRITE_START_COMMAND(dev);
++    WRITE_BASE(AAUDIO_MSG_SET_PROPERTY);
++    WRITE_VAL(aaudio_object_id_t, obj);
++    WRITE_VAL(u32, prop.element);
++    WRITE_VAL(u32, prop.scope);
++    WRITE_VAL(u32, prop.selector);
++    WRITE_VAL(u64, data_size);
++    WRITE_BIN(data, data_size);
++    WRITE_VAL(u64, qualifier_size);
++    WRITE_BIN(qualifier, qualifier_size);
++    WRITE_END();
++}
++
++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop)
++{
++    WRITE_START_COMMAND(dev);
++    WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER);
++    WRITE_VAL(aaudio_object_id_t, obj);
++    WRITE_VAL(u32, prop.element);
++    WRITE_VAL(u32, prop.scope);
++    WRITE_VAL(u32, prop.selector);
++    WRITE_END();
++}
++
++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++    WRITE_START_COMMAND(devid);
++    WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES);
++    WRITE_END();
++}
++
++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++    WRITE_START_COMMAND(devid);
++    WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST);
++    WRITE_END();
++}
++
++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++    WRITE_START_COMMAND(devid);
++    WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST);
++    WRITE_END();
++}
++
++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode)
++{
++    WRITE_START_COMMAND(0);
++    WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS);
++    WRITE_VAL(u64, mode);
++    WRITE_END();
++}
++
++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver)
++{
++    WRITE_START_NOTIFICATION();
++    WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE);
++    WRITE_VAL(u32, proto_ver);
++    WRITE_VAL(u32, msg_ver);
++    WRITE_END();
++}
++
++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg)
++{
++    WRITE_START_RESPONSE();
++    WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE);
++    WRITE_END();
++}
++
++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg)
++{
++    WRITE_START_COMMAND(0);
++    WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST);
++    WRITE_END();
++}
++
++#define CMD_SHARED_VARS_NO_REPLY \
++    int status = 0; \
++    struct aaudio_send_ctx sctx;
++#define CMD_SHARED_VARS \
++    CMD_SHARED_VARS_NO_REPLY \
++    struct aaudio_msg reply = aaudio_reply_alloc(); \
++    struct aaudio_msg *buf = &reply;
++#define CMD_SEND_REQUEST(fn, ...) \
++    if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \
++        return status;
++#define CMD_DEF_SHARED_AND_SEND(fn, ...) \
++    CMD_SHARED_VARS \
++    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
++#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \
++    CMD_SHARED_VARS_NO_REPLY \
++    CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
++#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \
++    status = fn(buf, ##__VA_ARGS__); \
++    return status;
++#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \
++    status = fn(buf, ##__VA_ARGS__); \
++    aaudio_reply_free(&reply); \
++    return status;
++
++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response);
++}
++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response);
++}
++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
++        aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size)
++{
++    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size);
++    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size);
++}
++int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
++        aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
++{
++    int status;
++    struct aaudio_msg reply = aaudio_reply_alloc();
++    void *r_data;
++    u64 r_data_size;
++    if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size,
++            &r_data, &r_data_size)))
++        goto finish;
++    if (r_data_size != data_size) {
++        status = -EINVAL;
++        goto finish;
++    }
++    memcpy(data, r_data, data_size);
++finish:
++    aaudio_reply_free(&reply);
++    return status;
++}
++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size,
++            qualifier, qualifier_size);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj);
++}
++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop);
++}
++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response);
++}
++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++        aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid);
++    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt);
++}
++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++        aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid);
++    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt);
++}
++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode)
++{
++    CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode);
++    CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response);
++}
++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
++        aaudio_device_id_t **dev_l, u64 *dev_cnt)
++{
++    CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list);
++    CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h
+new file mode 100644
+index 000000000000..3427486f3f57
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol.h
+@@ -0,0 +1,147 @@
++#ifndef AAUDIO_PROTOCOL_H
++#define AAUDIO_PROTOCOL_H
++
++#include <linux/types.h>
++
++struct aaudio_device;
++
++typedef u64 aaudio_device_id_t;
++typedef u64 aaudio_object_id_t;
++
++struct aaudio_msg {
++    void *data;
++    size_t size;
++};
++
++struct __attribute__((packed)) aaudio_msg_header {
++    char tag[4];
++    u8 type;
++    aaudio_device_id_t device_id; // Idk, use zero for commands?
++};
++struct __attribute__((packed)) aaudio_msg_base {
++    u32 msg;
++    u32 status;
++};
++
++struct aaudio_prop_addr {
++    u32 scope;
++    u32 selector;
++    u32 element;
++};
++#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el }
++
++enum {
++    AAUDIO_MSG_TYPE_COMMAND = 1,
++    AAUDIO_MSG_TYPE_RESPONSE = 2,
++    AAUDIO_MSG_TYPE_NOTIFICATION = 3
++};
++
++enum {
++    AAUDIO_MSG_START_IO = 0,
++    AAUDIO_MSG_START_IO_RESPONSE = 1,
++    AAUDIO_MSG_STOP_IO = 2,
++    AAUDIO_MSG_STOP_IO_RESPONSE = 3,
++    AAUDIO_MSG_UPDATE_TIMESTAMP = 4,
++    AAUDIO_MSG_GET_PROPERTY = 7,
++    AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8,
++    AAUDIO_MSG_SET_PROPERTY = 9,
++    AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10,
++    AAUDIO_MSG_PROPERTY_LISTENER = 11,
++    AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12,
++    AAUDIO_MSG_PROPERTY_CHANGED = 13,
++    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18,
++    AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19,
++    AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24,
++    AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25,
++    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26,
++    AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27,
++    AAUDIO_MSG_SET_REMOTE_ACCESS = 32,
++    AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33,
++    AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34,
++
++    AAUDIO_MSG_NOTIFICATION_ALIVE = 100,
++    AAUDIO_MSG_GET_DEVICE_LIST = 101,
++    AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102,
++    AAUDIO_MSG_NOTIFICATION_BOOT = 104
++};
++
++enum {
++    AAUDIO_REMOTE_ACCESS_OFF = 0,
++    AAUDIO_REMOTE_ACCESS_ON = 2
++};
++
++enum {
++    AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob'
++    AAUDIO_PROP_SCOPE_INPUT  = 0x696e7074, // 'inpt'
++    AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470  // 'outp'
++};
++
++enum {
++    AAUDIO_PROP_UID          = 0x75696420, // 'uid '
++    AAUDIO_PROP_BOOL_VALUE   = 0x6263766c, // 'bcvl'
++    AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack'
++    AAUDIO_PROP_SEL_VOLUME   = 0x64656176, // 'deav'
++    AAUDIO_PROP_LATENCY      = 0x6c746e63, // 'ltnc'
++    AAUDIO_PROP_PHYS_FORMAT  = 0x70667420  // 'pft '
++};
++
++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base);
++
++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg);
++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg);
++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
++        u64 *timestamp, u64 *update_seed);
++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop, void **data, u64 *data_size);
++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj);
++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop);
++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
++        struct aaudio_prop_addr *prop);
++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg);
++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg);
++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt);
++
++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size);
++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size);
++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop);
++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode);
++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver);
++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg);
++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg);
++
++
++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
++        aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size);
++int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
++        aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++        struct aaudio_prop_addr prop);
++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++        aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++        aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode);
++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
++        aaudio_device_id_t **dev_l, u64 *dev_cnt);
++
++
++
++#endif //AAUDIO_PROTOCOL_H
+diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c
+new file mode 100644
+index 000000000000..28f2dfd44d67
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol_bce.c
+@@ -0,0 +1,226 @@
++#include "protocol_bce.h"
++
++#include "audio.h"
++
++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq);
++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq);
++static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
++                                 bce_sq_completion cfn);
++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count);
++
++int aaudio_bce_init(struct aaudio_device *dev)
++{
++    int status;
++    struct aaudio_bce *bce = &dev->bcem;
++    bce->cq = bce_create_cq(dev->bce, 0x80);
++    spin_lock_init(&bce->spinlock);
++    if (!bce->cq)
++        return -EINVAL;
++    if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE,
++            aaudio_bce_out_queue_completion))) {
++        return status;
++    }
++    if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE,
++            aaudio_bce_in_queue_completion))) {
++        return status;
++    }
++    aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count);
++    return 0;
++}
++
++int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
++        bce_sq_completion cfn)
++{
++    q->cq = dev->bcem.cq;
++    q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
++    q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT;
++    /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */
++    q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev);
++    if (!q->sq)
++        return -EINVAL;
++
++    q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL);
++    if (!q->data) {
++        bce_destroy_sq(dev->bce, q->sq);
++        return -EINVAL;
++    }
++    return 0;
++}
++
++static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4])
++{
++    char tag_zero[5];
++    b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT;
++    *tagn = b->tag_num;
++    snprintf(tag_zero, 5, "S%03d", b->tag_num);
++    *((u32 *) tag) = *((u32 *) tag_zero);
++}
++
++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag)
++{
++    int status;
++    size_t index;
++    void *dptr;
++    struct aaudio_msg_header *header;
++    if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout)))
++        return status;
++    spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
++    index = b->qout.data_tail;
++    dptr = (u8 *) b->qout.data + index * b->qout.el_size;
++    ctx->msg.data = dptr;
++    header = dptr;
++    if (tag)
++        *((u32 *) header->tag) = *((u32 *) tag);
++    else
++        aaudio_send_create_tag(b, &ctx->tag_n, header->tag);
++    return 0;
++}
++
++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx)
++{
++    struct bce_qe_submission *s = bce_next_submission(b->qout.sq);
++#ifdef DEBUG
++    pr_debug("aaudio: Sending command data\n");
++    print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true);
++#endif
++    bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size);
++    bce_submit_to_device(b->qout.sq);
++    b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count;
++    spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
++}
++
++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply)
++{
++    struct aaudio_bce_queue_entry ent;
++    DECLARE_COMPLETION_ONSTACK(cmpl);
++    ent.msg = reply;
++    ent.cmpl = &cmpl;
++    b->pending_entries[ctx->tag_n] = &ent;
++    __aaudio_send(b, ctx); /* unlocks the spinlock */
++    ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout);
++    if (ctx->timeout == 0) {
++        /* Remove the pending queue entry; this will be normally handled by the completion route but
++         * during a timeout it won't */
++        spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
++        if (b->pending_entries[ctx->tag_n] == &ent)
++            b->pending_entries[ctx->tag_n] = NULL;
++        spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
++        return -ETIMEDOUT;
++    }
++    return 0;
++}
++
++static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply)
++{
++    const char *tag;
++    int tagn;
++    unsigned long irq_flags;
++    char tag_zero[5];
++    struct aaudio_bce_queue_entry *entry;
++
++    tag = ((struct aaudio_msg_header *) reply->data)->tag;
++    if (tag[0] != 'S') {
++        pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag);
++        return;
++    }
++    *((u32 *) tag_zero) = *((u32 *) tag);
++    tag_zero[4] = 0;
++    if (kstrtoint(&tag_zero[1], 10, &tagn)) {
++        pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag);
++        return;
++    }
++
++    spin_lock_irqsave(&b->spinlock, irq_flags);
++    entry = b->pending_entries[tagn];
++    if (entry) {
++        if (reply->size < entry->msg->size)
++            entry->msg->size = reply->size;
++        memcpy(entry->msg->data, reply->data, entry->msg->size);
++        complete(entry->cmpl);
++
++        b->pending_entries[tagn] = NULL;
++    } else {
++        pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag);
++    }
++    spin_unlock_irqrestore(&b->spinlock, irq_flags);
++}
++
++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq)
++{
++    while (bce_next_completion(sq)) {
++        //pr_info("aaudio: Send confirmed\n");
++        bce_notify_submission_complete(sq);
++    }
++}
++
++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg);
++
++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq)
++{
++    struct aaudio_msg msg;
++    struct aaudio_device *dev = sq->userdata;
++    struct aaudio_bce_queue *q = &dev->bcem.qin;
++    struct bce_sq_completion_data *c;
++    size_t cnt = 0;
++
++    mb();
++    while ((c = bce_next_completion(sq))) {
++        msg.data = (u8 *) q->data + q->data_head * q->el_size;
++        msg.size = c->data_size;
++#ifdef DEBUG
++        pr_debug("aaudio: Received command data %llx\n", c->data_size);
++        print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true);
++#endif
++        aaudio_bce_in_queue_handle_msg(dev, &msg);
++
++        q->data_head = (q->data_head + 1) % q->el_count;
++
++        bce_notify_submission_complete(sq);
++        ++cnt;
++    }
++    aaudio_bce_in_queue_submit_pending(q, cnt);
++}
++
++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++    struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data;
++    if (msg->size < sizeof(struct aaudio_msg_header)) {
++        pr_err("aaudio: Msg size smaller than header (%lx)", msg->size);
++        return;
++    }
++    if (header->type == AAUDIO_MSG_TYPE_RESPONSE) {
++        aaudio_handle_reply(&a->bcem, msg);
++    } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) {
++        aaudio_handle_command(a, msg);
++    } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) {
++        aaudio_handle_notification(a, msg);
++    }
++}
++
++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count)
++{
++    struct bce_qe_submission *s;
++    while (count--) {
++        if (bce_reserve_submission(q->sq, NULL)) {
++            pr_err("aaudio: Failed to reserve an event queue submission\n");
++            break;
++        }
++        s = bce_next_submission(q->sq);
++        bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size);
++        q->data_tail = (q->data_tail + 1) % q->el_count;
++    }
++    bce_submit_to_device(q->sq);
++}
++
++struct aaudio_msg aaudio_reply_alloc(void)
++{
++    struct aaudio_msg ret;
++    ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
++    ret.data = kmalloc(ret.size, GFP_KERNEL);
++    return ret;
++}
++
++void aaudio_reply_free(struct aaudio_msg *reply)
++{
++    kfree(reply->data);
++}
+diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h
+new file mode 100644
+index 000000000000..14d26c05ddf9
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol_bce.h
+@@ -0,0 +1,72 @@
++#ifndef AAUDIO_PROTOCOL_BCE_H
++#define AAUDIO_PROTOCOL_BCE_H
++
++#include "protocol.h"
++#include "../queue.h"
++
++#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000
++#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20
++
++#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000
++
++struct aaudio_device;
++
++struct aaudio_bce_queue_entry {
++    struct aaudio_msg *msg;
++    struct completion *cmpl;
++};
++struct aaudio_bce_queue {
++    struct bce_queue_cq *cq;
++    struct bce_queue_sq *sq;
++    void *data;
++    dma_addr_t dma_addr;
++    size_t data_head, data_tail;
++    size_t el_size, el_count;
++};
++struct aaudio_bce {
++    struct bce_queue_cq *cq;
++    struct aaudio_bce_queue qin;
++    struct aaudio_bce_queue qout;
++    int tag_num;
++    struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT];
++    struct spinlock spinlock;
++};
++
++struct aaudio_send_ctx {
++    int status;
++    int tag_n;
++    unsigned long irq_flags;
++    struct aaudio_msg msg;
++    unsigned long timeout;
++};
++
++int aaudio_bce_init(struct aaudio_device *dev);
++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag);
++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx);
++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply);
++
++#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \
++    (ctx)->timeout = msecs_to_jiffies(tout); \
++    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \
++    if (!(ctx)->status) { \
++        fn(&(ctx)->msg, ##__VA_ARGS__); \
++        __aaudio_send(&(a)->bcem, (ctx)); \
++    } \
++    (ctx)->status; \
++})
++#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__)
++
++#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \
++    (ctx)->timeout = msecs_to_jiffies(tout); \
++    (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \
++    if (!(ctx)->status) { \
++        fn(&(ctx)->msg, ##__VA_ARGS__); \
++        (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \
++    } \
++    (ctx)->status; \
++})
++
++struct aaudio_msg aaudio_reply_alloc(void);
++void aaudio_reply_free(struct aaudio_msg *reply);
++
++#endif //AAUDIO_PROTOCOL_BCE_H
+diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c
+new file mode 100644
+index 000000000000..e24bd35215c0
+--- /dev/null
++++ b/drivers/staging/apple-bce/mailbox.c
+@@ -0,0 +1,151 @@
++#include "mailbox.h"
++#include <linux/atomic.h>
++#include "apple_bce.h"
++
++#define REG_MBOX_OUT_BASE 0x820
++#define REG_MBOX_REPLY_COUNTER 0x108
++#define REG_MBOX_REPLY_BASE 0x810
++#define REG_TIMESTAMP_BASE 0xC000
++
++#define BCE_MBOX_TIMEOUT_MS 200
++
++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb)
++{
++    mb->reg_mb = reg_mb;
++    init_completion(&mb->mb_completion);
++}
++
++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv)
++{
++    u32 __iomem *regb;
++
++    if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) {
++        return -EEXIST; // We don't support two messages at once
++    }
++    reinit_completion(&mb->mb_completion);
++
++    pr_debug("bce_mailbox_send: %llx\n", msg);
++    regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE);
++    iowrite32((u32) msg, regb);
++    iowrite32((u32) (msg >> 32), regb + 1);
++    iowrite32(0, regb + 2);
++    iowrite32(0, regb + 3);
++
++    wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS));
++    if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply
++        atomic_set(&mb->mb_status, 0);
++        return -ETIMEDOUT;
++    }
++
++    *recv = mb->mb_result;
++    pr_debug("bce_mailbox_send: reply %llx\n", *recv);
++
++    atomic_set(&mb->mb_status, 0);
++    return 0;
++}
++
++static int bce_mailbox_retrive_response(struct bce_mailbox *mb)
++{
++    u32 __iomem *regb;
++    u32 lo, hi;
++    int count, counter;
++    u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER);
++    count = (res >> 20) & 0xf;
++    counter = count;
++    pr_debug("bce_mailbox_retrive_response count=%i\n", count);
++    while (counter--) {
++        regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE);
++        lo = ioread32(regb);
++        hi = ioread32(regb + 1);
++        ioread32(regb + 2);
++        ioread32(regb + 3);
++        pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo);
++        mb->mb_result = ((u64) hi << 32) | lo;
++    }
++    return count > 0 ? 0 : -ENODATA;
++}
++
++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb)
++{
++    int status = bce_mailbox_retrive_response(mb);
++    if (!status) {
++        atomic_set(&mb->mb_status, 2);
++        complete(&mb->mb_completion);
++    }
++    return status;
++}
++
++static void bc_send_timestamp(struct timer_list *tl);
++
++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg)
++{
++    u32 __iomem *regb;
++
++    spin_lock_init(&ts->stop_sl);
++    ts->stopped = false;
++
++    ts->reg = reg;
++
++    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++    ioread32(regb);
++    mb();
++
++    timer_setup(&ts->timer, bc_send_timestamp, 0);
++}
++
++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial)
++{
++    unsigned long flags;
++    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++    if (is_initial) {
++        iowrite32((u32) -4, regb + 2);
++        iowrite32((u32) -1, regb);
++    } else {
++        iowrite32((u32) -3, regb + 2);
++        iowrite32((u32) -1, regb);
++    }
++
++    spin_lock_irqsave(&ts->stop_sl, flags);
++    ts->stopped = false;
++    spin_unlock_irqrestore(&ts->stop_sl, flags);
++    mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
++}
++
++void bce_timestamp_stop(struct bce_timestamp *ts)
++{
++    unsigned long flags;
++    u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++    spin_lock_irqsave(&ts->stop_sl, flags);
++    ts->stopped = true;
++    spin_unlock_irqrestore(&ts->stop_sl, flags);
++    del_timer_sync(&ts->timer);
++
++    iowrite32((u32) -2, regb + 2);
++    iowrite32((u32) -1, regb);
++}
++
++static void bc_send_timestamp(struct timer_list *tl)
++{
++    struct bce_timestamp *ts;
++    unsigned long flags;
++    u32 __iomem *regb;
++    ktime_t bt;
++
++    ts = container_of(tl, struct bce_timestamp, timer);
++    regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++    local_irq_save(flags);
++    ioread32(regb + 2);
++    mb();
++    bt = ktime_get_boottime();
++    iowrite32((u32) bt, regb + 2);
++    iowrite32((u32) (bt >> 32), regb);
++
++    spin_lock(&ts->stop_sl);
++    if (!ts->stopped)
++        mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
++    spin_unlock(&ts->stop_sl);
++    local_irq_restore(flags);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h
+new file mode 100644
+index 000000000000..f3323f95ba51
+--- /dev/null
++++ b/drivers/staging/apple-bce/mailbox.h
+@@ -0,0 +1,53 @@
++#ifndef BCE_MAILBOX_H
++#define BCE_MAILBOX_H
++
++#include <linux/completion.h>
++#include <linux/pci.h>
++#include <linux/timer.h>
++
++struct bce_mailbox {
++    void __iomem *reg_mb;
++
++    atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply)
++    struct completion mb_completion;
++    uint64_t mb_result;
++};
++
++enum bce_message_type {
++    BCE_MB_REGISTER_COMMAND_SQ = 0x7,            // to-device
++    BCE_MB_REGISTER_COMMAND_CQ = 0x8,            // to-device
++    BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB,   // to-host
++    BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC,        // both
++    BCE_MB_SLEEP_NO_STATE = 0x14,                // to-device
++    BCE_MB_RESTORE_NO_STATE = 0x15,              // to-device
++    BCE_MB_SAVE_STATE_AND_SLEEP = 0x17,          // to-device
++    BCE_MB_RESTORE_STATE_AND_WAKE = 0x18,        // to-device
++    BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19,  // from-device
++    BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A,   // from-device
++};
++
++#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL))
++#define BCE_MB_TYPE(v) ((u32) (v >> 58))
++#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL)
++
++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb);
++
++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv);
++
++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb);
++
++
++struct bce_timestamp {
++    void __iomem *reg;
++    struct timer_list timer;
++    struct spinlock stop_sl;
++    bool stopped;
++};
++
++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg);
++
++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial);
++
++void bce_timestamp_stop(struct bce_timestamp *ts);
++
++#endif //BCEDRIVER_MAILBOX_H
+diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c
+new file mode 100644
+index 000000000000..bc9cd3bc6f0c
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue.c
+@@ -0,0 +1,390 @@
++#include "queue.h"
++#include "apple_bce.h"
++
++#define REG_DOORBELL_BASE 0x44000
++
++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count)
++{
++    struct bce_queue_cq *q;
++    q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL);
++    q->qid = qid;
++    q->type = BCE_QUEUE_CQ;
++    q->el_count = el_count;
++    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion),
++            &q->dma_handle, GFP_KERNEL);
++    if (!q->data) {
++        pr_err("DMA queue memory alloc failed\n");
++        kfree(q);
++        return NULL;
++    }
++    return q;
++}
++
++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
++{
++    cfg->qid = (u16) cq->qid;
++    cfg->el_count = (u16) cq->el_count;
++    cfg->vector_or_cq = 0;
++    cfg->_pad = 0;
++    cfg->addr = cq->dma_handle;
++    cfg->length = cq->el_count * sizeof(struct bce_qe_completion);
++}
++
++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++    dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle);
++    kfree(cq);
++}
++
++static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce)
++{
++    struct bce_queue *target;
++    struct bce_queue_sq *target_sq;
++    struct bce_sq_completion_data *cmpl;
++    if (e->qid >= BCE_MAX_QUEUE_COUNT) {
++        pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid);
++        return;
++    }
++    target = dev->queues[e->qid];
++    if (!target || target->type != BCE_QUEUE_SQ) {
++        pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid);
++        return;
++    }
++    target_sq = (struct bce_queue_sq *) target;
++    if (target_sq->completion_tail != e->completion_index) {
++        pr_err("Completion index mismatch; this is likely going to make this driver unusable\n");
++        return;
++    }
++    if (!target_sq->has_pending_completions) {
++        target_sq->has_pending_completions = true;
++        dev->int_sq_list[(*ce)++] = target_sq;
++    }
++    cmpl = &target_sq->completion_data[e->completion_index];
++    cmpl->status = e->status;
++    cmpl->data_size = e->data_size;
++    cmpl->result = e->result;
++    wmb();
++    target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count;
++}
++
++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++    size_t ce = 0;
++    struct bce_qe_completion *e;
++    struct bce_queue_sq *sq;
++    e = bce_cq_element(cq, cq->index);
++    if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
++        return;
++    mb();
++    while (true) {
++        e = bce_cq_element(cq, cq->index);
++        if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
++            break;
++        // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result);
++        bce_handle_cq_completion(dev, e, &ce);
++        e->flags = 0;
++        cq->index = (cq->index + 1) % cq->el_count;
++    }
++    mb();
++    iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma +  REG_DOORBELL_BASE) + cq->qid);
++    while (ce) {
++        --ce;
++        sq = dev->int_sq_list[ce];
++        sq->completion(sq);
++        sq->has_pending_completions = false;
++    }
++}
++
++
++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
++        bce_sq_completion compl, void *userdata)
++{
++    struct bce_queue_sq *q;
++    q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL);
++    q->qid = qid;
++    q->type = BCE_QUEUE_SQ;
++    q->el_size = el_size;
++    q->el_count = el_count;
++    q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size,
++                                 &q->dma_handle, GFP_KERNEL);
++    q->completion = compl;
++    q->userdata = userdata;
++    q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL);
++    q->reg_mem_dma = dev->reg_mem_dma;
++    atomic_set(&q->available_commands, el_count - 1);
++    init_completion(&q->available_command_completion);
++    atomic_set(&q->available_command_completion_waiting_count, 0);
++    if (!q->data) {
++        pr_err("DMA queue memory alloc failed\n");
++        kfree(q);
++        return NULL;
++    }
++    return q;
++}
++
++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
++{
++    cfg->qid = (u16) sq->qid;
++    cfg->el_count = (u16) sq->el_count;
++    cfg->vector_or_cq = (u16) cq->qid;
++    cfg->_pad = 0;
++    cfg->addr = sq->dma_handle;
++    cfg->length = sq->el_count * sq->el_size;
++}
++
++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
++{
++    dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle);
++    kfree(sq);
++}
++
++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout)
++{
++    while (atomic_dec_if_positive(&sq->available_commands) < 0) {
++        if (!timeout || !*timeout)
++            return -EAGAIN;
++        atomic_inc(&sq->available_command_completion_waiting_count);
++        *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout);
++        if (!*timeout) {
++            if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0)
++                try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */
++        }
++    }
++    return 0;
++}
++
++void bce_cancel_submission_reservation(struct bce_queue_sq *sq)
++{
++    atomic_inc(&sq->available_commands);
++}
++
++void *bce_next_submission(struct bce_queue_sq *sq)
++{
++    void *ret = bce_sq_element(sq, sq->tail);
++    sq->tail = (sq->tail + 1) % sq->el_count;
++    return ret;
++}
++
++void bce_submit_to_device(struct bce_queue_sq *sq)
++{
++    mb();
++    iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma +  REG_DOORBELL_BASE) + sq->qid);
++}
++
++void bce_notify_submission_complete(struct bce_queue_sq *sq)
++{
++    sq->head = (sq->head + 1) % sq->el_count;
++    atomic_inc(&sq->available_commands);
++    if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) {
++        complete(&sq->available_command_completion);
++    }
++}
++
++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size)
++{
++    element->addr = addr;
++    element->length = size;
++    element->segl_addr = element->segl_length = 0;
++}
++
++static void bce_cmdq_completion(struct bce_queue_sq *q);
++
++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count)
++{
++    struct bce_queue_cmdq *q;
++    q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL);
++    q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q);
++    if (!q->sq) {
++        kfree(q);
++        return NULL;
++    }
++    spin_lock_init(&q->lck);
++    q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL);
++    if (!q->tres) {
++        kfree(q);
++        return NULL;
++    }
++    return q;
++}
++
++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq)
++{
++    bce_free_sq(dev, cmdq->sq);
++    kfree(cmdq->tres);
++    kfree(cmdq);
++}
++
++void bce_cmdq_completion(struct bce_queue_sq *q)
++{
++    struct bce_queue_cmdq_result_el *el;
++    struct bce_queue_cmdq *cmdq = q->userdata;
++    struct bce_sq_completion_data *result;
++
++    spin_lock(&cmdq->lck);
++    while ((result = bce_next_completion(q))) {
++        el = cmdq->tres[cmdq->sq->head];
++        if (el) {
++            el->result = result->result;
++            el->status = result->status;
++            mb();
++            complete(&el->cmpl);
++        } else {
++            pr_err("apple-bce: Unexpected command queue completion\n");
++        }
++        cmdq->tres[cmdq->sq->head] = NULL;
++        bce_notify_submission_complete(q);
++    }
++    spin_unlock(&cmdq->lck);
++}
++
++static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
++{
++    void *ret;
++    unsigned long timeout;
++    init_completion(&res->cmpl);
++    mb();
++
++    timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */
++    if (bce_reserve_submission(cmdq->sq, &timeout))
++        return NULL;
++
++    spin_lock(&cmdq->lck);
++    cmdq->tres[cmdq->sq->tail] = res;
++    ret = bce_next_submission(cmdq->sq);
++    return ret;
++}
++
++static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
++{
++    bce_submit_to_device(cmdq->sq);
++    spin_unlock(&cmdq->lck);
++
++    wait_for_completion(&res->cmpl);
++    mb();
++}
++
++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout)
++{
++    struct bce_queue_cmdq_result_el res;
++    struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++    if (!cmd)
++        return (u32) -1;
++    cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE;
++    cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0));
++    cmd->qid = cfg->qid;
++    cmd->el_count = cfg->el_count;
++    cmd->vector_or_cq = cfg->vector_or_cq;
++    memset(cmd->name, 0, sizeof(cmd->name));
++    if (name) {
++        cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name));
++        memcpy(cmd->name, name, cmd->name_len);
++    } else {
++        cmd->name_len = 0;
++    }
++    cmd->addr = cfg->addr;
++    cmd->length = cfg->length;
++
++    bce_cmd_finish(cmdq, &res);
++    return res.status;
++}
++
++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
++{
++    struct bce_queue_cmdq_result_el res;
++    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++    if (!cmd)
++        return (u32) -1;
++    cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE;
++    cmd->flags = 0;
++    cmd->qid = qid;
++    bce_cmd_finish(cmdq, &res);
++    return res.status;
++}
++
++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
++{
++    struct bce_queue_cmdq_result_el res;
++    struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++    if (!cmd)
++        return (u32) -1;
++    cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE;
++    cmd->flags = 0;
++    cmd->qid = qid;
++    bce_cmd_finish(cmdq, &res);
++    return res.status;
++}
++
++
++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count)
++{
++    struct bce_queue_cq *cq;
++    struct bce_queue_memcfg cfg;
++    int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
++    if (qid < 0)
++        return NULL;
++    cq = bce_alloc_cq(dev, qid, el_count);
++    if (!cq)
++        return NULL;
++    bce_get_cq_memcfg(cq, &cfg);
++    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) {
++        pr_err("apple-bce: CQ registration failed (%i)", qid);
++        bce_free_cq(dev, cq);
++        ida_simple_remove(&dev->queue_ida, (uint) qid);
++        return NULL;
++    }
++    dev->queues[qid] = (struct bce_queue *) cq;
++    return cq;
++}
++
++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
++        int direction, bce_sq_completion compl, void *userdata)
++{
++    struct bce_queue_sq *sq;
++    struct bce_queue_memcfg cfg;
++    int qid;
++    if (cq == NULL)
++        return NULL; /* cq can not be null */
++    if (name == NULL)
++        return NULL; /* name can not be null */
++    if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE)
++        return NULL; /* unsupported direction */
++    qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
++    if (qid < 0)
++        return NULL;
++    sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata);
++    if (!sq)
++        return NULL;
++    bce_get_sq_memcfg(sq, cq, &cfg);
++    if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) {
++        pr_err("apple-bce: SQ registration failed (%i)", qid);
++        bce_free_sq(dev, sq);
++        ida_simple_remove(&dev->queue_ida, (uint) qid);
++        return NULL;
++    }
++    spin_lock(&dev->queues_lock);
++    dev->queues[qid] = (struct bce_queue *) sq;
++    spin_unlock(&dev->queues_lock);
++    return sq;
++}
++
++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid))
++        pr_err("apple-bce: CQ unregister failed");
++    spin_lock(&dev->queues_lock);
++    dev->queues[cq->qid] = NULL;
++    spin_unlock(&dev->queues_lock);
++    ida_simple_remove(&dev->queue_ida, (uint) cq->qid);
++    bce_free_cq(dev, cq);
++}
++
++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
++{
++    if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid))
++        pr_err("apple-bce: CQ unregister failed");
++    spin_lock(&dev->queues_lock);
++    dev->queues[sq->qid] = NULL;
++    spin_unlock(&dev->queues_lock);
++    ida_simple_remove(&dev->queue_ida, (uint) sq->qid);
++    bce_free_sq(dev, sq);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h
+new file mode 100644
+index 000000000000..8368ac5dfca8
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue.h
+@@ -0,0 +1,177 @@
++#ifndef BCE_QUEUE_H
++#define BCE_QUEUE_H
++
++#include <linux/completion.h>
++#include <linux/pci.h>
++
++#define BCE_CMD_SIZE 0x40
++
++struct apple_bce_device;
++
++enum bce_queue_type {
++    BCE_QUEUE_CQ, BCE_QUEUE_SQ
++};
++struct bce_queue {
++    int qid;
++    int type;
++};
++struct bce_queue_cq {
++    int qid;
++    int type;
++    u32 el_count;
++    dma_addr_t dma_handle;
++    void *data;
++
++    u32 index;
++};
++struct bce_queue_sq;
++typedef void (*bce_sq_completion)(struct bce_queue_sq *q);
++struct bce_sq_completion_data {
++    u32 status;
++    u64 data_size;
++    u64 result;
++};
++struct bce_queue_sq {
++    int qid;
++    int type;
++    u32 el_size;
++    u32 el_count;
++    dma_addr_t dma_handle;
++    void *data;
++    void *userdata;
++    void __iomem *reg_mem_dma;
++
++    atomic_t available_commands;
++    struct completion available_command_completion;
++    atomic_t available_command_completion_waiting_count;
++    u32 head, tail;
++
++    u32 completion_cidx, completion_tail;
++    struct bce_sq_completion_data *completion_data;
++    bool has_pending_completions;
++    bce_sq_completion completion;
++};
++
++struct bce_queue_cmdq_result_el {
++    struct completion cmpl;
++    u32 status;
++    u64 result;
++};
++struct bce_queue_cmdq {
++    struct bce_queue_sq *sq;
++    struct spinlock lck;
++    struct bce_queue_cmdq_result_el **tres;
++};
++
++struct bce_queue_memcfg {
++    u16 qid;
++    u16 el_count;
++    u16 vector_or_cq;
++    u16 _pad;
++    u64 addr;
++    u64 length;
++};
++
++enum bce_qe_completion_status {
++    BCE_COMPLETION_SUCCESS = 0,
++    BCE_COMPLETION_ERROR = 1,
++    BCE_COMPLETION_ABORTED = 2,
++    BCE_COMPLETION_NO_SPACE = 3,
++    BCE_COMPLETION_OVERRUN = 4
++};
++enum bce_qe_completion_flags {
++    BCE_COMPLETION_FLAG_PENDING = 0x8000
++};
++struct bce_qe_completion {
++    u64 result;
++    u64 data_size;
++    u16 qid;
++    u16 completion_index;
++    u16 status; // bce_qe_completion_status
++    u16 flags;  // bce_qe_completion_flags
++};
++
++struct bce_qe_submission {
++    u64 length;
++    u64 addr;
++
++    u64 segl_addr;
++    u64 segl_length;
++};
++
++enum bce_cmdq_command {
++    BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20,
++    BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30,
++    BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40,
++    BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50
++};
++struct bce_cmdq_simple_memory_queue_cmd {
++    u16 cmd; // bce_cmdq_command
++    u16 flags;
++    u16 qid;
++};
++struct bce_cmdq_register_memory_queue_cmd {
++    u16 cmd; // bce_cmdq_command
++    u16 flags;
++    u16 qid;
++    u16 _pad;
++    u16 el_count;
++    u16 vector_or_cq;
++    u16 _pad2;
++    u16 name_len;
++    char name[0x20];
++    u64 addr;
++    u64 length;
++};
++
++static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) {
++    return (void *) ((u8 *) q->data + q->el_size * i);
++}
++static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) {
++    return (void *) ((struct bce_qe_completion *) q->data + i);
++}
++
++static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) {
++    struct bce_sq_completion_data *res;
++    rmb();
++    if (sq->completion_cidx == sq->completion_tail)
++        return NULL;
++    res = &sq->completion_data[sq->completion_cidx];
++    sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count;
++    return res;
++}
++
++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count);
++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++
++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
++        bce_sq_completion compl, void *userdata);
++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout);
++void bce_cancel_submission_reservation(struct bce_queue_sq *sq);
++void *bce_next_submission(struct bce_queue_sq *sq);
++void bce_submit_to_device(struct bce_queue_sq *sq);
++void bce_notify_submission_complete(struct bce_queue_sq *sq);
++
++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size);
++
++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count);
++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq);
++
++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout);
++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
++
++
++/* User API - Creates and registers the queue */
++
++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count);
++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
++        int direction, bce_sq_completion compl, void *userdata);
++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
++
++#endif //BCEDRIVER_MAILBOX_H
+diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c
+new file mode 100644
+index 000000000000..b236613285c0
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue_dma.c
+@@ -0,0 +1,220 @@
++#include "queue_dma.h"
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include "queue.h"
++
++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len);
++static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
++        struct device *dev, struct scatterlist *pages, int pagen);
++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list);
++
++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
++        enum dma_data_direction dir)
++{
++    int cnt;
++
++    buf->direction = dir;
++    buf->scatterlist = scatterlist;
++    buf->seglist_hostinfo = NULL;
++
++    cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++    if (cnt != buf->scatterlist.nents) {
++        pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt);
++        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++        return -EIO;
++    }
++    if (cnt == 1)
++        return 0;
++
++    buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents);
++    if (!buf->seglist_hostinfo) {
++        pr_err("apple-bce: Creating segment list failed\n");
++        dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++        return -EIO;
++    }
++    return 0;
++}
++
++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++                          enum dma_data_direction dir)
++{
++    int status;
++    struct sg_table scatterlist;
++    if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len)))
++        return status;
++    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
++        sg_free_table(&scatterlist);
++        return status;
++    }
++    return 0;
++}
++
++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++                          enum dma_data_direction dir)
++{
++    /* Kernel memory is continuous which is great for us. */
++    int status;
++    struct sg_table scatterlist;
++    if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) {
++        sg_free_table(&scatterlist);
++        return status;
++    }
++    sg_set_buf(scatterlist.sgl, data, (uint) len);
++    if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
++        sg_free_table(&scatterlist);
++        return status;
++    }
++    return 0;
++}
++
++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf)
++{
++    dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction);
++    bce_unmap_segement_list(dev, buf->seglist_hostinfo);
++}
++
++
++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len)
++{
++    int status, i;
++    struct page **pages;
++    size_t off, start_page, end_page, page_count;
++    off        = (size_t) data % PAGE_SIZE;
++    start_page = (size_t) data  / PAGE_SIZE;
++    end_page   = ((size_t) data + len - 1) / PAGE_SIZE;
++    page_count = end_page - start_page + 1;
++
++    if (page_count > PAGE_SIZE / sizeof(struct page *))
++        pages = vmalloc(page_count * sizeof(struct page *));
++    else
++        pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL);
++
++    for (i = 0; i < page_count; i++)
++        pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE));
++
++    if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) {
++        sg_free_table(tbl);
++    }
++
++    if (page_count > PAGE_SIZE / sizeof(struct page *))
++        vfree(pages);
++    else
++        kfree(pages);
++    return status;
++}
++
++#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \
++                               / sizeof(struct bce_segment_list_element))
++#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element))
++
++static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
++        struct device *dev, struct scatterlist *pages, int pagen)
++{
++    size_t ptr, pptr = 0;
++    struct bce_segment_list_header theader; /* a temp header, to store the initial seg */
++    struct bce_segment_list_header *header;
++    struct bce_segment_list_element *el, *el_end;
++    struct bce_segment_list_element_hostinfo *out, *pout, *out_root;
++    struct scatterlist *sg;
++    int i;
++    header = &theader;
++    out = out_root = NULL;
++    el = el_end = NULL;
++    for_each_sg(pages, sg, pagen, i) {
++        if (el >= el_end) {
++            /* allocate a new page, this will be also done for the first element */
++            ptr = __get_free_page(GFP_KERNEL);
++            if (pptr && ptr == pptr + PAGE_SIZE) {
++                out->page_count++;
++                header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
++                el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
++            } else {
++                header = (void *) ptr;
++                header->element_count = BCE_ELEMENTS_PER_PAGE;
++                header->data_size = 0;
++                header->next_segl_addr = 0;
++                header->next_segl_length = 0;
++                el = (void *) (header + 1);
++                el_end = el + BCE_ELEMENTS_PER_PAGE;
++
++                if (out) {
++                    out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
++                    out = out->next;
++                } else {
++                    out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
++                }
++                out->page_start = (void *) ptr;
++                out->page_count = 1;
++                out->dma_start = DMA_MAPPING_ERROR;
++                out->next = NULL;
++            }
++            pptr = ptr;
++        }
++        el->addr = sg->dma_address;
++        el->length = sg->length;
++        header->data_size += el->length;
++    }
++
++    /* DMA map */
++    out = out_root;
++    pout = NULL;
++    while (out) {
++        out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE);
++        if (dma_mapping_error(dev, out->dma_start))
++            goto error;
++        if (pout) {
++            header = pout->page_start;
++            header->next_segl_addr = out->dma_start;
++            header->next_segl_length = out->page_count * PAGE_SIZE;
++        }
++        pout = out;
++        out = out->next;
++    }
++    return out_root;
++
++    error:
++    bce_unmap_segement_list(dev, out_root);
++    return NULL;
++}
++
++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list)
++{
++    struct bce_segment_list_element_hostinfo *next;
++    while (list) {
++        if (list->dma_start != DMA_MAPPING_ERROR)
++            dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE);
++        next = list->next;
++        kfree(list);
++        list = next;
++    }
++}
++
++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length)
++{
++    struct bce_segment_list_element_hostinfo *seg;
++    struct bce_segment_list_header *seg_header;
++
++    seg = buf->seglist_hostinfo;
++    if (!seg) {
++        element->addr = buf->scatterlist.sgl->dma_address + offset;
++        element->length = length;
++        element->segl_addr = 0;
++        element->segl_length = 0;
++        return 0;
++    }
++
++    while (seg) {
++        seg_header = seg->page_start;
++        if (offset <= seg_header->data_size)
++            break;
++        offset -= seg_header->data_size;
++        seg = seg->next;
++    }
++    if (!seg)
++        return -EINVAL;
++    element->addr = offset;
++    element->length = buf->scatterlist.sgl->dma_length;
++    element->segl_addr = seg->dma_start;
++    element->segl_length = seg->page_count * PAGE_SIZE;
++    return 0;
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h
+new file mode 100644
+index 000000000000..f8a57e50e7a3
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue_dma.h
+@@ -0,0 +1,50 @@
++#ifndef BCE_QUEUE_DMA_H
++#define BCE_QUEUE_DMA_H
++
++#include <linux/pci.h>
++
++struct bce_qe_submission;
++
++struct bce_segment_list_header {
++    u64 element_count;
++    u64 data_size;
++
++    u64 next_segl_addr;
++    u64 next_segl_length;
++};
++struct bce_segment_list_element {
++    u64 addr;
++    u64 length;
++};
++
++struct bce_segment_list_element_hostinfo {
++    struct bce_segment_list_element_hostinfo *next;
++    void *page_start;
++    size_t page_count;
++    dma_addr_t dma_start;
++};
++
++
++struct bce_dma_buffer {
++    enum dma_data_direction direction;
++    struct sg_table scatterlist;
++    struct bce_segment_list_element_hostinfo *seglist_hostinfo;
++};
++
++/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */
++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
++        enum dma_data_direction dir);
++
++/* Creates a buffer from virtual memory (vmalloc) */
++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++        enum dma_data_direction dir);
++
++/* Creates a buffer from kernel memory (kmalloc) */
++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++                          enum dma_data_direction dir);
++
++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf);
++
++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length);
++
++#endif //BCE_QUEUE_DMA_H
+diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h
+new file mode 100644
+index 000000000000..26619e0bccfa
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/command.h
+@@ -0,0 +1,204 @@
++#ifndef BCE_VHCI_COMMAND_H
++#define BCE_VHCI_COMMAND_H
++
++#include "queue.h"
++#include <linux/jiffies.h>
++#include <linux/usb.h>
++
++#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000)
++#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000)
++
++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2
++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2)
++
++typedef u8 bce_vhci_port_t;
++typedef u8 bce_vhci_device_t;
++
++enum bce_vhci_command {
++    BCE_VHCI_CMD_CONTROLLER_ENABLE = 1,
++    BCE_VHCI_CMD_CONTROLLER_DISABLE = 2,
++    BCE_VHCI_CMD_CONTROLLER_START = 3,
++    BCE_VHCI_CMD_CONTROLLER_PAUSE = 4,
++
++    BCE_VHCI_CMD_PORT_POWER_ON = 0x10,
++    BCE_VHCI_CMD_PORT_POWER_OFF = 0x11,
++    BCE_VHCI_CMD_PORT_RESUME = 0x12,
++    BCE_VHCI_CMD_PORT_SUSPEND = 0x13,
++    BCE_VHCI_CMD_PORT_RESET = 0x14,
++    BCE_VHCI_CMD_PORT_DISABLE = 0x15,
++    BCE_VHCI_CMD_PORT_STATUS = 0x16,
++
++    BCE_VHCI_CMD_DEVICE_CREATE = 0x30,
++    BCE_VHCI_CMD_DEVICE_DESTROY = 0x31,
++
++    BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40,
++    BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41,
++    BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42,
++    BCE_VHCI_CMD_ENDPOINT_RESET = 0x44,
++
++    /* Device to host only */
++    BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43,
++    BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000,
++    BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005
++};
++
++enum bce_vhci_endpoint_state {
++    BCE_VHCI_ENDPOINT_ACTIVE = 0,
++    BCE_VHCI_ENDPOINT_PAUSED = 1,
++    BCE_VHCI_ENDPOINT_STALLED = 2
++};
++
++static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask)
++{
++    int status;
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE;
++    cmd.param1 = 0x7100u | busNum;
++    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++    if (!status)
++        *portMask = (u16) res.param2;
++    return status;
++}
++static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++
++static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON;
++    cmd.param1 = port;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF;
++    cmd.param1 = port;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_RESUME;
++    cmd.param1 = port;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND;
++    cmd.param1 = port;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_RESET;
++    cmd.param1 = port;
++    cmd.param2 = timeout;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE;
++    cmd.param1 = port;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
++        u32 clearFlags, u32 *resStatus)
++{
++    int status;
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_PORT_STATUS;
++    cmd.param1 = port;
++    cmd.param2 = clearFlags & 0x560000;
++    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++    if (status >= 0)
++        *resStatus = (u32) res.param2;
++    return status;
++}
++
++static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
++        bce_vhci_device_t *dev)
++{
++    int status;
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE;
++    cmd.param1 = port;
++    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++    if (!status)
++        *dev = (bce_vhci_device_t) res.param2;
++    return status;
++}
++static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY;
++    cmd.param1 = dev;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++
++static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev,
++        struct usb_endpoint_descriptor *desc)
++{
++    struct bce_vhci_message cmd, res;
++    int endpoint_type = usb_endpoint_type(desc);
++    int maxp = usb_endpoint_maxp(desc);
++    int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp;
++    u8 max_active_requests_pow2 = 0;
++    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE;
++    cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8);
++    if (endpoint_type == USB_ENDPOINT_XFER_BULK)
++        max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2;
++    cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32);
++    if (endpoint_type == USB_ENDPOINT_XFER_INT)
++        cmd.param2 |= (desc->bInterval - 1) << 8;
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY;
++    cmd.param1 = dev | (endpoint << 8);
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint,
++        enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState)
++{
++    int status;
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE;
++    cmd.param1 = dev | (endpoint << 8);
++    cmd.param2 = (u64) newState;
++    status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++    if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER)
++        *retState = (enum bce_vhci_endpoint_state) res.param2;
++    return status;
++}
++static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
++{
++    struct bce_vhci_message cmd, res;
++    cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET;
++    cmd.param1 = dev | (endpoint << 8);
++    return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++
++
++#endif //BCE_VHCI_COMMAND_H
+diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c
+new file mode 100644
+index 000000000000..7b0b5027157b
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/queue.c
+@@ -0,0 +1,268 @@
++#include "queue.h"
++#include "vhci.h"
++#include "../apple_bce.h"
++
++
++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq);
++
++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name)
++{
++    int status;
++    ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT);
++    if (!ret->cq)
++        return -EINVAL;
++    ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE,
++                            bce_vhci_message_queue_completion, ret);
++    if (!ret->sq) {
++        status = -EINVAL;
++        goto fail_cq;
++    }
++    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++                                   &ret->dma_addr, GFP_KERNEL);
++    if (!ret->data) {
++        status = -EINVAL;
++        goto fail_sq;
++    }
++    return 0;
++
++fail_sq:
++    bce_destroy_sq(vhci->dev, ret->sq);
++    ret->sq = NULL;
++fail_cq:
++    bce_destroy_cq(vhci->dev, ret->cq);
++    ret->cq = NULL;
++    return status;
++}
++
++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q)
++{
++    if (!q->cq)
++        return;
++    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++                      q->data, q->dma_addr);
++    bce_destroy_sq(vhci->dev, q->sq);
++    bce_destroy_cq(vhci->dev, q->cq);
++}
++
++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req)
++{
++    int sidx;
++    struct bce_qe_submission *s;
++    sidx = q->sq->tail;
++    s = bce_next_submission(q->sq);
++    pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2);
++    q->data[sidx] = *req;
++    bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx,
++            sizeof(struct bce_vhci_message));
++    bce_submit_to_device(q->sq);
++}
++
++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq)
++{
++    while (bce_next_completion(sq))
++        bce_notify_submission_complete(sq);
++}
++
++
++
++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq);
++
++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++                                  bce_sq_completion compl)
++{
++    ret->vhci = vhci;
++
++    ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret);
++    if (!ret->sq)
++        return -EINVAL;
++    ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++                                   &ret->dma_addr, GFP_KERNEL);
++    if (!ret->data) {
++        bce_destroy_sq(vhci->dev, ret->sq);
++        ret->sq = NULL;
++        return -EINVAL;
++    }
++
++    init_completion(&ret->queue_empty_completion);
++    bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT);
++    return 0;
++}
++
++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++        bce_vhci_event_queue_callback cb)
++{
++    ret->cb = cb;
++    return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion);
++}
++
++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q)
++{
++    if (!q->sq)
++        return;
++    dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++                      q->data, q->dma_addr);
++    bce_destroy_sq(vhci->dev, q->sq);
++}
++
++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq)
++{
++    struct bce_sq_completion_data *cd;
++    struct bce_vhci_event_queue *ev = sq->userdata;
++    struct bce_vhci_message *msg;
++    size_t cnt = 0;
++
++    while ((cd = bce_next_completion(sq))) {
++        if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
++            bce_notify_submission_complete(sq);
++            continue;
++        }
++        msg = &ev->data[sq->head];
++        pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
++        ev->cb(ev, msg);
++
++        bce_notify_submission_complete(sq);
++        ++cnt;
++    }
++    bce_vhci_event_queue_submit_pending(ev, cnt);
++    if (atomic_read(&sq->available_commands) == sq->el_count - 1)
++        complete(&ev->queue_empty_completion);
++}
++
++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count)
++{
++    int idx;
++    struct bce_qe_submission *s;
++    while (count--) {
++        if (bce_reserve_submission(q->sq, NULL)) {
++            pr_err("bce-vhci: Failed to reserve an event queue submission\n");
++            break;
++        }
++        idx = q->sq->tail;
++        s = bce_next_submission(q->sq);
++        bce_set_submission_single(s,
++                                  q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message));
++    }
++    bce_submit_to_device(q->sq);
++}
++
++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q)
++{
++    unsigned long timeout;
++    reinit_completion(&q->queue_empty_completion);
++    if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid))
++        pr_warn("bce-vhci: failed to flush event queue\n");
++    timeout = msecs_to_jiffies(5000);
++    while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
++        timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout);
++        if (timeout == 0) {
++            pr_err("bce-vhci: waiting for queue to be flushed timed out\n");
++            break;
++        }
++    }
++}
++
++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q)
++{
++    if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
++        pr_err("bce-vhci: resume of a queue with pending submissions\n");
++        return;
++    }
++    bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT);
++}
++
++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq)
++{
++    ret->mq = mq;
++    ret->completion.result = NULL;
++    init_completion(&ret->completion.completion);
++    spin_lock_init(&ret->completion_lock);
++    mutex_init(&ret->mutex);
++}
++
++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq)
++{
++    spin_lock(&cq->completion_lock);
++    if (cq->completion.result) {
++        memset(cq->completion.result, 0, sizeof(struct bce_vhci_message));
++        cq->completion.result->status = BCE_VHCI_ABORT;
++        complete(&cq->completion.completion);
++        cq->completion.result = NULL;
++    }
++    spin_unlock(&cq->completion_lock);
++    mutex_lock(&cq->mutex);
++    mutex_unlock(&cq->mutex);
++    mutex_destroy(&cq->mutex);
++}
++
++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg)
++{
++    struct bce_vhci_command_queue_completion *c = &cq->completion;
++
++    spin_lock(&cq->completion_lock);
++    if (c->result) {
++        *c->result = *msg;
++        complete(&c->completion);
++        c->result = NULL;
++    }
++    spin_unlock(&cq->completion_lock);
++}
++
++static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++        struct bce_vhci_message *res, unsigned long timeout)
++{
++    int status;
++    struct bce_vhci_command_queue_completion *c;
++    struct bce_vhci_message creq;
++    c = &cq->completion;
++
++    if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
++        return status;
++
++    spin_lock(&cq->completion_lock);
++    c->result = res;
++    reinit_completion(&c->completion);
++    spin_unlock(&cq->completion_lock);
++
++    bce_vhci_message_queue_write(cq->mq, req);
++
++    if (!wait_for_completion_timeout(&c->completion, timeout)) {
++        /* we ran out of time, send cancellation */
++        pr_debug("bce-vhci: command timed out req=%x\n", req->cmd);
++        if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
++            return status;
++
++        creq = *req;
++        creq.cmd |= 0x4000;
++        bce_vhci_message_queue_write(cq->mq, &creq);
++
++        if (!wait_for_completion_timeout(&c->completion, 1000)) {
++            pr_err("bce-vhci: Possible desync, cmd cancel timed out\n");
++
++            spin_lock(&cq->completion_lock);
++            c->result = NULL;
++            spin_unlock(&cq->completion_lock);
++            return -ETIMEDOUT;
++        }
++        if ((res->cmd & ~0x8000) == creq.cmd)
++            return -ETIMEDOUT;
++        /* reply for the previous command most likely arrived */
++    }
++
++    if ((res->cmd & ~0x8000) != req->cmd) {
++        pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd);
++        return -EIO;
++    }
++    if (res->status == BCE_VHCI_SUCCESS)
++        return 0;
++    return res->status;
++}
++
++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++                                   struct bce_vhci_message *res, unsigned long timeout)
++{
++    int status;
++    mutex_lock(&cq->mutex);
++    status = __bce_vhci_command_queue_execute(cq, req, res, timeout);
++    mutex_unlock(&cq->mutex);
++    return status;
++}
+diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h
+new file mode 100644
+index 000000000000..adb705b6ba1d
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/queue.h
+@@ -0,0 +1,76 @@
++#ifndef BCE_VHCI_QUEUE_H
++#define BCE_VHCI_QUEUE_H
++
++#include <linux/completion.h>
++#include "../queue.h"
++
++#define VHCI_EVENT_QUEUE_EL_COUNT 256
++#define VHCI_EVENT_PENDING_COUNT 32
++
++struct bce_vhci;
++struct bce_vhci_event_queue;
++
++enum bce_vhci_message_status {
++    BCE_VHCI_SUCCESS = 1,
++    BCE_VHCI_ERROR = 2,
++    BCE_VHCI_USB_PIPE_STALL = 3,
++    BCE_VHCI_ABORT = 4,
++    BCE_VHCI_BAD_ARGUMENT = 5,
++    BCE_VHCI_OVERRUN = 6,
++    BCE_VHCI_INTERNAL_ERROR = 7,
++    BCE_VHCI_NO_POWER = 8,
++    BCE_VHCI_UNSUPPORTED = 9
++};
++struct bce_vhci_message {
++    u16 cmd;
++    u16 status; // bce_vhci_message_status
++    u32 param1;
++    u64 param2;
++};
++
++struct bce_vhci_message_queue {
++    struct bce_queue_cq *cq;
++    struct bce_queue_sq *sq;
++    struct bce_vhci_message *data;
++    dma_addr_t dma_addr;
++};
++typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++struct bce_vhci_event_queue {
++    struct bce_vhci *vhci;
++    struct bce_queue_sq *sq;
++    struct bce_vhci_message *data;
++    dma_addr_t dma_addr;
++    bce_vhci_event_queue_callback cb;
++    struct completion queue_empty_completion;
++};
++struct bce_vhci_command_queue_completion {
++    struct bce_vhci_message *result;
++    struct completion completion;
++};
++struct bce_vhci_command_queue {
++    struct bce_vhci_message_queue *mq;
++    struct bce_vhci_command_queue_completion completion;
++    struct spinlock completion_lock;
++    struct mutex mutex;
++};
++
++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name);
++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q);
++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req);
++
++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++        bce_sq_completion compl);
++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++        bce_vhci_event_queue_callback cb);
++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q);
++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count);
++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q);
++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q);
++
++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq);
++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq);
++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++        struct bce_vhci_message *res, unsigned long timeout);
++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg);
++
++#endif //BCE_VHCI_QUEUE_H
+diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c
+new file mode 100644
+index 000000000000..8226363d69c8
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/transfer.c
+@@ -0,0 +1,661 @@
++#include "transfer.h"
++#include "../queue.h"
++#include "vhci.h"
++#include "../apple_bce.h"
++#include <linux/usb/hcd.h>
++
++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq);
++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q);
++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q);
++
++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb);
++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg);
++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c);
++
++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work);
++
++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
++        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir)
++{
++    char name[0x21];
++    INIT_LIST_HEAD(&q->evq);
++    INIT_LIST_HEAD(&q->giveback_urb_list);
++    spin_lock_init(&q->urb_lock);
++    mutex_init(&q->pause_lock);
++    q->vhci = vhci;
++    q->endp = endp;
++    q->dev_addr = dev_addr;
++    q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F);
++    q->state = BCE_VHCI_ENDPOINT_ACTIVE;
++    q->active = true;
++    q->stalled = false;
++    q->max_active_requests = 1;
++    if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK)
++        q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS;
++    q->remaining_active_requests = q->max_active_requests;
++    q->cq = bce_create_cq(vhci->dev, 0x100);
++    INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w);
++    q->sq_in = NULL;
++    if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
++        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc));
++        q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE,
++                                 bce_vhci_transfer_queue_completion, q);
++    }
++    q->sq_out = NULL;
++    if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
++        snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc));
++        q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE,
++                                  bce_vhci_transfer_queue_completion, q);
++    }
++}
++
++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q)
++{
++    bce_vhci_transfer_queue_giveback(q);
++    bce_vhci_transfer_queue_remove_pending(q);
++    if (q->sq_in)
++        bce_destroy_sq(vhci->dev, q->sq_in);
++    if (q->sq_out)
++        bce_destroy_sq(vhci->dev, q->sq_out);
++    bce_destroy_cq(vhci->dev, q->cq);
++}
++
++static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q)
++{
++    return q->remaining_active_requests > 0;
++}
++
++static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
++{
++    struct bce_vhci_list_message *lm;
++    lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL);
++    INIT_LIST_HEAD(&lm->list);
++    lm->msg = *msg;
++    list_add_tail(&lm->list, &q->evq);
++}
++
++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q)
++{
++    unsigned long flags;
++    struct urb *urb;
++    spin_lock_irqsave(&q->urb_lock, flags);
++    while (!list_empty(&q->giveback_urb_list)) {
++        urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list);
++        list_del(&urb->urb_list);
++
++        spin_unlock_irqrestore(&q->urb_lock, flags);
++        usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status);
++        spin_lock_irqsave(&q->urb_lock, flags);
++    }
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++}
++
++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q);
++
++static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q)
++{
++    struct urb *urb;
++    struct bce_vhci_list_message *lm;
++
++    while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) {
++        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++
++        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
++        if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN)
++            break;
++        list_del(&lm->list);
++        kfree(lm);
++    }
++
++    /* some of the URBs could have been completed, so initialize more URBs if possible */
++    bce_vhci_transfer_queue_init_pending_urbs(q);
++}
++
++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q)
++{
++    unsigned long flags;
++    struct bce_vhci_list_message *lm;
++    spin_lock_irqsave(&q->urb_lock, flags);
++    while (!list_empty(&q->evq)) {
++        lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
++        list_del(&lm->list);
++        kfree(lm);
++    }
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++}
++
++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
++{
++    unsigned long flags;
++    struct bce_vhci_urb *turb;
++    struct urb *urb;
++    spin_lock_irqsave(&q->urb_lock, flags);
++    bce_vhci_transfer_queue_deliver_pending(q);
++
++    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST &&
++        (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) {
++        bce_vhci_transfer_queue_defer_event(q, msg);
++        goto complete;
++    }
++    if (list_empty(&q->endp->urb_list)) {
++        pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr);
++        goto complete;
++    }
++    urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++    turb = urb->hcpriv;
++    if (bce_vhci_urb_update(turb, msg) == -EAGAIN) {
++        bce_vhci_transfer_queue_defer_event(q, msg);
++    } else {
++        bce_vhci_transfer_queue_init_pending_urbs(q);
++    }
++
++complete:
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    bce_vhci_transfer_queue_giveback(q);
++}
++
++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
++{
++    unsigned long flags;
++    struct bce_sq_completion_data *c;
++    struct urb *urb;
++    struct bce_vhci_transfer_queue *q = sq->userdata;
++    spin_lock_irqsave(&q->urb_lock, flags);
++    while ((c = bce_next_completion(sq))) {
++        if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
++            pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr);
++            bce_notify_submission_complete(sq);
++            continue;
++        }
++        if (list_empty(&q->endp->urb_list)) {
++            pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr);
++            continue;
++        }
++        pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr);
++        urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++        bce_vhci_urb_transfer_completion(urb->hcpriv, c);
++        bce_notify_submission_complete(sq);
++    }
++    bce_vhci_transfer_queue_deliver_pending(q);
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    bce_vhci_transfer_queue_giveback(q);
++}
++
++int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q)
++{
++    unsigned long flags;
++    int status;
++    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
++    spin_lock_irqsave(&q->urb_lock, flags);
++    q->active = false;
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    if (q->sq_out) {
++        pr_err("bce-vhci: Not implemented: wait for pending output requests\n");
++    }
++    bce_vhci_transfer_queue_remove_pending(q);
++    if ((status = bce_vhci_cmd_endpoint_set_state(
++            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state)))
++        return status;
++    if (q->state != BCE_VHCI_ENDPOINT_PAUSED)
++        return -EINVAL;
++    if (q->sq_in)
++        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
++    if (q->sq_out)
++        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
++    return 0;
++}
++
++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb);
++
++int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q)
++{
++    unsigned long flags;
++    int status;
++    struct urb *urb, *urbt;
++    struct bce_vhci_urb *vurb;
++    u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
++    if ((status = bce_vhci_cmd_endpoint_set_state(
++            &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
++        return status;
++    if (q->state != BCE_VHCI_ENDPOINT_ACTIVE)
++        return -EINVAL;
++    spin_lock_irqsave(&q->urb_lock, flags);
++    q->active = true;
++    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
++        vurb = urb->hcpriv;
++        if (vurb->state == BCE_VHCI_URB_INIT_PENDING) {
++            if (!bce_vhci_transfer_queue_can_init_urb(q))
++                break;
++            bce_vhci_urb_init(vurb);
++        } else {
++            bce_vhci_urb_resume(vurb);
++        }
++    }
++    bce_vhci_transfer_queue_deliver_pending(q);
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    return 0;
++}
++
++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
++{
++    int ret = 0;
++    mutex_lock(&q->pause_lock);
++    if ((q->paused_by & src) != src) {
++        if (!q->paused_by)
++            ret = bce_vhci_transfer_queue_do_pause(q);
++        if (!ret)
++            q->paused_by |= src;
++    }
++    mutex_unlock(&q->pause_lock);
++    return ret;
++}
++
++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
++{
++    int ret = 0;
++    mutex_lock(&q->pause_lock);
++    if (q->paused_by & src) {
++        if (!(q->paused_by & ~src))
++            ret = bce_vhci_transfer_queue_do_resume(q);
++        if (!ret)
++            q->paused_by &= ~src;
++    }
++    mutex_unlock(&q->pause_lock);
++    return ret;
++}
++
++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work)
++{
++    unsigned long flags;
++    struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset);
++
++    mutex_lock(&q->pause_lock);
++    spin_lock_irqsave(&q->urb_lock, flags);
++    if (!q->stalled) {
++        spin_unlock_irqrestore(&q->urb_lock, flags);
++        mutex_unlock(&q->pause_lock);
++        return;
++    }
++    q->active = false;
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ;
++    bce_vhci_transfer_queue_remove_pending(q);
++    if (q->sq_in)
++        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
++    if (q->sq_out)
++        bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
++    bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F));
++    spin_lock_irqsave(&q->urb_lock, flags);
++    q->stalled = false;
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    mutex_unlock(&q->pause_lock);
++    bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++}
++
++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q)
++{
++    queue_work(q->vhci->tq_state_wq, &q->w_reset);
++}
++
++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q)
++{
++    struct urb *urb, *urbt;
++    struct bce_vhci_urb *vurb;
++    list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
++        vurb = urb->hcpriv;
++        if (!bce_vhci_transfer_queue_can_init_urb(q))
++            break;
++        if (vurb->state == BCE_VHCI_URB_INIT_PENDING)
++            bce_vhci_urb_init(vurb);
++    }
++}
++
++
++
++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout);
++
++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb)
++{
++    unsigned long flags;
++    int status = 0;
++    struct bce_vhci_urb *vurb;
++    vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL);
++    urb->hcpriv = vurb;
++
++    vurb->q = q;
++    vurb->urb = urb;
++    vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
++    vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0);
++
++    spin_lock_irqsave(&q->urb_lock, flags);
++    status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb);
++    if (status) {
++        spin_unlock_irqrestore(&q->urb_lock, flags);
++        urb->hcpriv = NULL;
++        kfree(vurb);
++        return status;
++    }
++
++    if (q->active) {
++        if (bce_vhci_transfer_queue_can_init_urb(vurb->q))
++            status = bce_vhci_urb_init(vurb);
++        else
++            vurb->state = BCE_VHCI_URB_INIT_PENDING;
++    } else {
++        if (q->stalled)
++            bce_vhci_transfer_queue_request_reset(q);
++        vurb->state = BCE_VHCI_URB_INIT_PENDING;
++    }
++    if (status) {
++        usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
++        urb->hcpriv = NULL;
++        kfree(vurb);
++    } else {
++        bce_vhci_transfer_queue_deliver_pending(q);
++    }
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++    pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr,
++            usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length);
++    return status;
++}
++
++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb)
++{
++    int status = 0;
++
++    if (vurb->q->remaining_active_requests == 0) {
++        pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n");
++        return -EINVAL;
++    }
++
++    if (vurb->is_control) {
++        vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST;
++    } else {
++        status = bce_vhci_urb_data_start(vurb, NULL);
++    }
++
++    if (!status) {
++        --vurb->q->remaining_active_requests;
++    }
++    return status;
++}
++
++static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status)
++{
++    struct bce_vhci_transfer_queue *q = urb->q;
++    struct bce_vhci *vhci = q->vhci;
++    struct urb *real_urb = urb->urb;
++    pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status);
++    usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb);
++    real_urb->hcpriv = NULL;
++    real_urb->status = status;
++    if (urb->state != BCE_VHCI_URB_INIT_PENDING)
++        ++urb->q->remaining_active_requests;
++    kfree(urb);
++    list_add_tail(&real_urb->urb_list, &q->giveback_urb_list);
++}
++
++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status)
++{
++    struct bce_vhci_urb *vurb;
++    unsigned long flags;
++    int ret;
++
++    spin_lock_irqsave(&q->urb_lock, flags);
++    if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) {
++        spin_unlock_irqrestore(&q->urb_lock, flags);
++        return ret;
++    }
++
++    vurb = urb->hcpriv;
++    /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */
++    if (vurb->state != BCE_VHCI_URB_INIT_PENDING) {
++        pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr);
++
++        spin_unlock_irqrestore(&q->urb_lock, flags);
++        bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++        spin_lock_irqsave(&q->urb_lock, flags);
++
++        ++q->remaining_active_requests;
++    }
++
++    usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
++
++    spin_unlock_irqrestore(&q->urb_lock, flags);
++
++    usb_hcd_giveback_urb(q->vhci->hcd, urb, status);
++
++    if (vurb->state != BCE_VHCI_URB_INIT_PENDING)
++        bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++
++    kfree(vurb);
++
++    return 0;
++}
++
++static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout)
++{
++    struct bce_vhci_message msg;
++    struct bce_qe_submission *s;
++    u32 tr_len;
++    int reservation1, reservation2 = -EFAULT;
++
++    pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr,
++             (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length);
++
++    /* Reserve both a message and a submission, so we don't run into issues later. */
++    reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout);
++    if (!reservation1)
++        reservation2 = bce_reserve_submission(urb->q->sq_in, timeout);
++    if (reservation1 || reservation2) {
++        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
++        if (!reservation1)
++            bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq);
++        return -ENOMEM;
++    }
++
++    urb->send_offset = urb->receive_offset;
++
++    tr_len = urb->urb->transfer_buffer_length - urb->send_offset;
++
++    spin_lock(&urb->q->vhci->msg_asynchronous_lock);
++    msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST;
++    msg.status = 0;
++    msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr;
++    msg.param2 = tr_len;
++    bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg);
++    spin_unlock(&urb->q->vhci->msg_asynchronous_lock);
++
++    s = bce_next_submission(urb->q->sq_in);
++    bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len);
++    bce_submit_to_device(urb->q->sq_in);
++
++    urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
++    return 0;
++}
++
++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout)
++{
++    if (urb->dir == DMA_TO_DEVICE) {
++        if (urb->urb->transfer_buffer_length > 0)
++            urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST;
++        else
++            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
++        return 0;
++    } else {
++        return bce_vhci_urb_data_transfer_in(urb, timeout);
++    }
++}
++
++static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size)
++{
++    struct bce_qe_submission *s;
++    unsigned long timeout = 0;
++    if (bce_reserve_submission(urb->q->sq_out, &timeout)) {
++        pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
++        return -EPIPE;
++    }
++
++    pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size);
++
++    s = bce_next_submission(urb->q->sq_out);
++    bce_set_submission_single(s, addr, size);
++    bce_submit_to_device(urb->q->sq_out);
++    return 0;
++}
++
++static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++    u32 tr_len;
++    int status;
++    if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) {
++        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
++            tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2);
++            if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len)))
++                return status;
++            urb->send_offset += tr_len;
++            urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
++            return 0;
++        }
++    }
++
++    /* 0x1000 in out queues aren't really unexpected */
++    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
++        return -EAGAIN;
++    pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n",
++            urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state,
++            msg->cmd, msg->status, msg->param1, msg->param2);
++    return -EAGAIN;
++}
++
++static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++        urb->receive_offset += c->data_size;
++        if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) {
++            urb->urb->actual_length = (u32) urb->receive_offset;
++            urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
++            if (!urb->is_control) {
++                bce_vhci_urb_complete(urb, 0);
++                return -ENOENT;
++            }
++        }
++    } else {
++        pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr);
++    }
++    return 0;
++}
++
++
++static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb)
++{
++    struct bce_vhci_transfer_queue *q = urb->q;
++    if (urb->received_status == 0)
++        return 0;
++    if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE ||
++        (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST &&
++        urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) {
++        urb->state = BCE_VHCI_URB_CONTROL_COMPLETE;
++        if (urb->received_status != BCE_VHCI_SUCCESS) {
++            pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status);
++            urb->q->active = false;
++            urb->q->stalled = true;
++            bce_vhci_urb_complete(urb, -EPIPE);
++            if (!list_empty(&q->endp->urb_list))
++                bce_vhci_transfer_queue_request_reset(q);
++            return -ENOENT;
++        }
++        bce_vhci_urb_complete(urb, 0);
++        return -ENOENT;
++    }
++    return 0;
++}
++
++static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++    int status;
++    if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
++        urb->received_status = msg->status;
++        return bce_vhci_urb_control_check_status(urb);
++    }
++
++    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) {
++        if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
++            if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) {
++                pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr);
++                return 0; /* TODO: fail the URB? */
++            }
++            urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION;
++            pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma);
++            return 0;
++        }
++    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
++               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++        if ((status = bce_vhci_urb_data_update(urb, msg)))
++            return status;
++        return bce_vhci_urb_control_check_status(urb);
++    }
++
++    /* 0x1000 in out queues aren't really unexpected */
++    if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
++        return -EAGAIN;
++    pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr,
++            urb->state, msg->cmd, msg->status, msg->param1, msg->param2);
++    return -EAGAIN;
++}
++
++static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++    int status;
++    unsigned long timeout;
++
++    if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) {
++        if (c->data_size != sizeof(struct usb_ctrlrequest))
++            pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n",
++                   urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest));
++
++        timeout = 1000;
++        status = bce_vhci_urb_data_start(urb, &timeout);
++        if (status) {
++            bce_vhci_urb_complete(urb, status);
++            return -ENOENT;
++        }
++        return 0;
++    } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
++               urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++        if ((status = bce_vhci_urb_data_transfer_completion(urb, c)))
++            return status;
++        return bce_vhci_urb_control_check_status(urb);
++    } else {
++        pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state);
++    }
++    return 0;
++}
++
++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++    if (urb->state == BCE_VHCI_URB_INIT_PENDING)
++        return -EAGAIN;
++    if (urb->is_control)
++        return bce_vhci_urb_control_update(urb, msg);
++    else
++        return bce_vhci_urb_data_update(urb, msg);
++}
++
++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++    if (urb->is_control)
++        return bce_vhci_urb_control_transfer_completion(urb, c);
++    else
++        return bce_vhci_urb_data_transfer_completion(urb, c);
++}
++
++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb)
++{
++    int status = 0;
++    if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++        status = bce_vhci_urb_data_transfer_in(urb, NULL);
++    }
++    if (status)
++        bce_vhci_urb_complete(urb, status);
++}
+diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h
+new file mode 100644
+index 000000000000..89ecad6bcf8f
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/transfer.h
+@@ -0,0 +1,73 @@
++#ifndef BCEDRIVER_TRANSFER_H
++#define BCEDRIVER_TRANSFER_H
++
++#include <linux/usb.h>
++#include "queue.h"
++#include "command.h"
++#include "../queue.h"
++
++struct bce_vhci_list_message {
++    struct list_head list;
++    struct bce_vhci_message msg;
++};
++enum bce_vhci_pause_source {
++    BCE_VHCI_PAUSE_INTERNAL_WQ = 1,
++    BCE_VHCI_PAUSE_FIRMWARE = 2,
++    BCE_VHCI_PAUSE_SUSPEND = 4,
++    BCE_VHCI_PAUSE_SHUTDOWN = 8
++};
++struct bce_vhci_transfer_queue {
++    struct bce_vhci *vhci;
++    struct usb_host_endpoint *endp;
++    enum bce_vhci_endpoint_state state;
++    u32 max_active_requests, remaining_active_requests;
++    bool active, stalled;
++    u32 paused_by;
++    bce_vhci_device_t dev_addr;
++    u8 endp_addr;
++    struct bce_queue_cq *cq;
++    struct bce_queue_sq *sq_in;
++    struct bce_queue_sq *sq_out;
++    struct list_head evq;
++    struct spinlock urb_lock;
++    struct mutex pause_lock;
++    struct list_head giveback_urb_list;
++
++    struct work_struct w_reset;
++};
++enum bce_vhci_urb_state {
++    BCE_VHCI_URB_INIT_PENDING,
++
++    BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST,
++    BCE_VHCI_URB_WAITING_FOR_COMPLETION,
++    BCE_VHCI_URB_DATA_TRANSFER_COMPLETE,
++
++    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST,
++    BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION,
++    BCE_VHCI_URB_CONTROL_COMPLETE
++};
++struct bce_vhci_urb {
++    struct urb *urb;
++    struct bce_vhci_transfer_queue *q;
++    enum dma_data_direction dir;
++    bool is_control;
++    enum bce_vhci_urb_state state;
++    int received_status;
++    u32 send_offset;
++    u32 receive_offset;
++};
++
++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
++        struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir);
++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q);
++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg);
++int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q);
++int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q);
++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q);
++
++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb);
++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status);
++
++#endif //BCEDRIVER_TRANSFER_H
+diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c
+new file mode 100644
+index 000000000000..eb26f55000d8
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/vhci.c
+@@ -0,0 +1,759 @@
++#include "vhci.h"
++#include "../apple_bce.h"
++#include "command.h"
++#include <linux/usb.h>
++#include <linux/usb/hcd.h>
++#include <linux/module.h>
++#include <linux/version.h>
++
++static dev_t bce_vhci_chrdev;
++static struct class *bce_vhci_class;
++static const struct hc_driver bce_vhci_driver;
++static u16 bce_vhci_port_mask = U16_MAX;
++
++static int bce_vhci_create_event_queues(struct bce_vhci *vhci);
++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci);
++static int bce_vhci_create_message_queues(struct bce_vhci *vhci);
++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci);
++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws);
++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq);
++
++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci)
++{
++    int status;
++
++    spin_lock_init(&vhci->hcd_spinlock);
++
++    vhci->dev = dev;
++
++    vhci->vdevt = bce_vhci_chrdev;
++    vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci");
++    if (IS_ERR_OR_NULL(vhci->vdev)) {
++        status = PTR_ERR(vhci->vdev);
++        goto fail_dev;
++    }
++
++    if ((status = bce_vhci_create_message_queues(vhci)))
++        goto fail_mq;
++    if ((status = bce_vhci_create_event_queues(vhci)))
++        goto fail_eq;
++
++    vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0);
++    INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w);
++
++    vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci");
++    if (!vhci->hcd) {
++        status = -ENOMEM;
++        goto fail_hcd;
++    }
++    vhci->hcd->self.sysdev = &dev->pci->dev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
++    vhci->hcd->self.uses_dma = 1;
++#endif
++    *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci;
++    vhci->hcd->speed = HCD_USB2;
++
++    if ((status = usb_add_hcd(vhci->hcd, 0, 0)))
++        goto fail_hcd;
++
++    return 0;
++
++fail_hcd:
++    bce_vhci_destroy_event_queues(vhci);
++fail_eq:
++    bce_vhci_destroy_message_queues(vhci);
++fail_mq:
++    device_destroy(bce_vhci_class, vhci->vdevt);
++fail_dev:
++    if (!status)
++        status = -EINVAL;
++    return status;
++}
++
++void bce_vhci_destroy(struct bce_vhci *vhci)
++{
++    usb_remove_hcd(vhci->hcd);
++    bce_vhci_destroy_event_queues(vhci);
++    bce_vhci_destroy_message_queues(vhci);
++    device_destroy(bce_vhci_class, vhci->vdevt);
++}
++
++struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd)
++{
++    return *((struct bce_vhci **) hcd->hcd_priv);
++}
++
++int bce_vhci_start(struct usb_hcd *hcd)
++{
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    int status;
++    u16 port_mask = 0;
++    bce_vhci_port_t port_no = 0;
++    if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask)))
++        return status;
++    vhci->port_mask = port_mask;
++    vhci->port_power_mask = 0;
++    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
++        return status;
++    port_mask = vhci->port_mask;
++    while (port_mask) {
++        port_no += 1;
++        port_mask >>= 1;
++    }
++    vhci->port_count = port_no;
++    return 0;
++}
++
++void bce_vhci_stop(struct usb_hcd *hcd)
++{
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    bce_vhci_cmd_controller_disable(&vhci->cq);
++}
++
++static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf)
++{
++    return 0;
++}
++
++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout);
++
++static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
++{
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    int status;
++    struct usb_hub_descriptor *hd;
++    struct usb_hub_status *hs;
++    struct usb_port_status *ps;
++    u32 port_status;
++    // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
++    if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) {
++        hd = (struct usb_hub_descriptor *) buf;
++        memset(hd, 0, sizeof(*hd));
++        hd->bDescLength = sizeof(struct usb_hub_descriptor);
++        hd->bDescriptorType = USB_DT_HUB;
++        hd->bNbrPorts = (u8) vhci->port_count;
++        hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM;
++        hd->bPwrOn2PwrGood = 0;
++        hd->bHubContrCurrent = 0;
++        return 0;
++    } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) {
++        hs = (struct usb_hub_status *) buf;
++        memset(hs, 0, sizeof(*hs));
++        hs->wHubStatus = 0;
++        hs->wHubChange = 0;
++        return 0;
++    } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) {
++        ps = (struct usb_port_status *) buf;
++        ps->wPortStatus = 0;
++        ps->wPortChange = 0;
++
++        if (vhci->port_power_mask & BIT(wIndex))
++            ps->wPortStatus |= USB_PORT_STAT_POWER;
++
++        if (!(bce_vhci_port_mask & BIT(wIndex)))
++            return 0;
++
++        if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status)))
++            return status;
++
++        if (port_status & 16)
++            ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED;
++        if (port_status & 4)
++            ps->wPortStatus |= USB_PORT_STAT_CONNECTION;
++        if (port_status & 2)
++            ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT;
++        if (port_status & 8)
++            ps->wPortStatus |= USB_PORT_STAT_RESET;
++        if (port_status & 0x60)
++            ps->wPortStatus |= USB_PORT_STAT_SUSPEND;
++
++        if (port_status & 0x40000)
++            ps->wPortChange |= USB_PORT_STAT_C_CONNECTION;
++
++        pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange);
++        return 0;
++    } else if (typeReq == SetPortFeature) {
++        if (wValue == USB_PORT_FEAT_POWER) {
++            status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex);
++            /* As far as I am aware, power status is not part of the port status so store it separately */
++            if (!status)
++                vhci->port_power_mask |= BIT(wIndex);
++            return status;
++        }
++        if (wValue == USB_PORT_FEAT_RESET) {
++            return bce_vhci_reset_device(vhci, wIndex, wValue);
++        }
++        if (wValue == USB_PORT_FEAT_SUSPEND) {
++            /* TODO: Am I supposed to also suspend the endpoints? */
++            pr_debug("bce-vhci: Suspending port %i\n", wIndex);
++            return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex);
++        }
++    } else if (typeReq == ClearPortFeature) {
++        if (wValue == USB_PORT_FEAT_ENABLE)
++            return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex);
++        if (wValue == USB_PORT_FEAT_POWER) {
++            status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex);
++            if (!status)
++                vhci->port_power_mask &= ~BIT(wIndex);
++            return status;
++        }
++        if (wValue == USB_PORT_FEAT_C_CONNECTION)
++            return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status);
++        if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */
++            return 0;
++        }
++        if (wValue == USB_PORT_FEAT_SUSPEND) {
++            pr_debug("bce-vhci: Resuming port %i\n", wIndex);
++            return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex);
++        }
++    }
++    pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
++    dump_stack();
++    return -EIO;
++}
++
++static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev)
++{
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    struct bce_vhci_device *vdev;
++    bce_vhci_device_t devid;
++    pr_info("bce_vhci_enable_device\n");
++
++    if (vhci->port_to_device[udev->portnum])
++        return 0;
++
++    /* We need to early address the device */
++    if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid))
++        return -EIO;
++
++    pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid);
++
++    vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL);
++    vhci->port_to_device[udev->portnum] = devid;
++    vhci->devices[devid] = vdev;
++
++    bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL);
++    udev->ep0.hcpriv = &vdev->tq[0];
++    vdev->tq_mask |= BIT(0);
++
++    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc);
++    return 0;
++}
++
++static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, unsigned int timeout_ms) //TODO: follow timeout
++{
++    /* This is the same as enable_device, but instead in the old scheme */
++    return bce_vhci_enable_device(hcd, udev);
++}
++
++static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev)
++{
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    int i;
++    bce_vhci_device_t devid;
++    struct bce_vhci_device *dev;
++    pr_info("bce_vhci_free_device %i\n", udev->portnum);
++    if (!vhci->port_to_device[udev->portnum])
++        return;
++    devid = vhci->port_to_device[udev->portnum];
++    dev = vhci->devices[devid];
++    for (i = 0; i < 32; i++) {
++        if (dev->tq_mask & BIT(i)) {
++            bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
++            bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
++            bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
++        }
++    }
++    vhci->devices[devid] = NULL;
++    vhci->port_to_device[udev->portnum] = 0;
++    bce_vhci_cmd_device_destroy(&vhci->cq, devid);
++    kfree(dev);
++}
++
++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout)
++{
++    struct bce_vhci_device *dev = NULL;
++    bce_vhci_device_t devid;
++    int i;
++    int status;
++    enum dma_data_direction dir;
++    pr_info("bce_vhci_reset_device %i\n", index);
++
++    devid = vhci->port_to_device[index];
++    if (devid) {
++        dev = vhci->devices[devid];
++
++        for (i = 0; i < 32; i++) {
++            if (dev->tq_mask & BIT(i)) {
++                bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
++                bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
++                bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
++            }
++        }
++        vhci->devices[devid] = NULL;
++        vhci->port_to_device[index] = 0;
++        bce_vhci_cmd_device_destroy(&vhci->cq, devid);
++    }
++    status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout);
++
++    if (dev) {
++        if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid)))
++            return status;
++        vhci->devices[devid] = dev;
++        vhci->port_to_device[index] = devid;
++
++        for (i = 0; i < 32; i++) {
++            if (dev->tq_mask & BIT(i)) {
++                dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
++                if (i == 0)
++                    dir = DMA_BIDIRECTIONAL;
++                bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir);
++                bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc);
++            }
++        }
++    }
++
++    return status;
++}
++
++static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
++{
++    return 0;
++}
++
++static int bce_vhci_get_frame_number(struct usb_hcd *hcd)
++{
++    return 0;
++}
++
++static int bce_vhci_bus_suspend(struct usb_hcd *hcd)
++{
++    int i, j;
++    int status;
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    pr_info("bce_vhci: suspend started\n");
++
++    pr_info("bce_vhci: suspend endpoints\n");
++    for (i = 0; i < 16; i++) {
++        if (!vhci->port_to_device[i])
++            continue;
++        for (j = 0; j < 32; j++) {
++            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
++                continue;
++            bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j],
++                    BCE_VHCI_PAUSE_SUSPEND);
++        }
++    }
++
++    pr_info("bce_vhci: suspend ports\n");
++    for (i = 0; i < 16; i++) {
++        if (!vhci->port_to_device[i])
++            continue;
++        bce_vhci_cmd_port_suspend(&vhci->cq, i);
++    }
++    pr_info("bce_vhci: suspend controller\n");
++    if ((status = bce_vhci_cmd_controller_pause(&vhci->cq)))
++        return status;
++
++    bce_vhci_event_queue_pause(&vhci->ev_commands);
++    bce_vhci_event_queue_pause(&vhci->ev_system);
++    bce_vhci_event_queue_pause(&vhci->ev_isochronous);
++    bce_vhci_event_queue_pause(&vhci->ev_interrupt);
++    bce_vhci_event_queue_pause(&vhci->ev_asynchronous);
++    pr_info("bce_vhci: suspend done\n");
++    return 0;
++}
++
++static int bce_vhci_bus_resume(struct usb_hcd *hcd)
++{
++    int i, j;
++    int status;
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    pr_info("bce_vhci: resume started\n");
++
++    bce_vhci_event_queue_resume(&vhci->ev_system);
++    bce_vhci_event_queue_resume(&vhci->ev_isochronous);
++    bce_vhci_event_queue_resume(&vhci->ev_interrupt);
++    bce_vhci_event_queue_resume(&vhci->ev_asynchronous);
++    bce_vhci_event_queue_resume(&vhci->ev_commands);
++
++    pr_info("bce_vhci: resume controller\n");
++    if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
++        return status;
++
++    pr_info("bce_vhci: resume ports\n");
++    for (i = 0; i < 16; i++) {
++        if (!vhci->port_to_device[i])
++            continue;
++        bce_vhci_cmd_port_resume(&vhci->cq, i);
++    }
++    pr_info("bce_vhci: resume endpoints\n");
++    for (i = 0; i < 16; i++) {
++        if (!vhci->port_to_device[i])
++            continue;
++        for (j = 0; j < 32; j++) {
++            if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
++                continue;
++            bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j],
++                    BCE_VHCI_PAUSE_SUSPEND);
++        }
++    }
++
++    pr_info("bce_vhci: resume done\n");
++    return 0;
++}
++
++static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
++{
++    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
++    pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress);
++    if (!q)
++        return -ENOENT;
++    return bce_vhci_urb_create(q, urb);
++}
++
++static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
++{
++    struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
++    pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress);
++    return bce_vhci_urb_request_cancel(q, urb, status);
++}
++
++static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
++{
++    struct bce_vhci_transfer_queue *q = ep->hcpriv;
++    pr_debug("bce_vhci_endpoint_reset\n");
++    if (q)
++        bce_vhci_transfer_queue_request_reset(q);
++}
++
++static u8 bce_vhci_endpoint_index(u8 addr)
++{
++    if (addr & 0x80)
++        return (u8) (0x10 + (addr & 0xf));
++    return (u8) (addr & 0xf);
++}
++
++static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
++{
++    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
++    struct bce_vhci_device *vdev = vhci->devices[devid];
++    pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index);
++
++    if (udev->bus->root_hub == udev) /* The USB hub */
++        return 0;
++    if (vdev == NULL)
++        return -ENODEV;
++    if (vdev->tq_mask & BIT(endp_index)) {
++        endp->hcpriv = &vdev->tq[endp_index];
++        return 0;
++    }
++
++    bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid,
++            usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
++    endp->hcpriv = &vdev->tq[endp_index];
++    vdev->tq_mask |= BIT(endp_index);
++
++    bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc);
++    return 0;
++}
++
++static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
++{
++    u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
++    struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++    bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
++    struct bce_vhci_transfer_queue *q = endp->hcpriv;
++    struct bce_vhci_device *vdev = vhci->devices[devid];
++    pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index);
++    if (!q) {
++        if (vdev && vdev->tq_mask & BIT(endp_index)) {
++            pr_err("something deleted the hcpriv?\n");
++            q = &vdev->tq[endp_index];
++        } else {
++            return 0;
++        }
++    }
++
++    bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu));
++    vhci->devices[devid]->tq_mask &= ~BIT(endp_index);
++    bce_vhci_destroy_transfer_queue(vhci, q);
++    return 0;
++}
++
++static int bce_vhci_create_message_queues(struct bce_vhci *vhci)
++{
++    if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") ||
++        bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") ||
++        bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") ||
++        bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") ||
++        bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) {
++        bce_vhci_destroy_message_queues(vhci);
++        return -EINVAL;
++    }
++    spin_lock_init(&vhci->msg_asynchronous_lock);
++    bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands);
++    return 0;
++}
++
++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci)
++{
++    bce_vhci_command_queue_destroy(&vhci->cq);
++    bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands);
++    bce_vhci_message_queue_destroy(vhci, &vhci->msg_system);
++    bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous);
++    bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt);
++    bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous);
++}
++
++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++
++static int bce_vhci_create_event_queues(struct bce_vhci *vhci)
++{
++    vhci->ev_cq = bce_create_cq(vhci->dev, 0x100);
++    if (!vhci->ev_cq)
++        return -EINVAL;
++#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb)
++    if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands",
++            bce_vhci_firmware_event_completion) ||
++        CREATE_EVENT_QUEUE(ev_system,       "VHC1FirmwareSystemEvents",       bce_vhci_handle_system_event) ||
++        CREATE_EVENT_QUEUE(ev_isochronous,  "VHC1FirmwareIsochronousEvents",  bce_vhci_handle_usb_event) ||
++        CREATE_EVENT_QUEUE(ev_interrupt,    "VHC1FirmwareInterruptEvents",    bce_vhci_handle_usb_event) ||
++        CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) {
++        bce_vhci_destroy_event_queues(vhci);
++        return -EINVAL;
++    }
++#undef CREATE_EVENT_QUEUE
++    return 0;
++}
++
++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci)
++{
++    bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands);
++    bce_vhci_event_queue_destroy(vhci, &vhci->ev_system);
++    bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous);
++    bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt);
++    bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous);
++    if (vhci->ev_cq)
++        bce_destroy_cq(vhci->dev, vhci->ev_cq);
++}
++
++static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status)
++{
++    unsigned long timeout = 1000;
++    struct bce_vhci_message r = *req;
++    r.cmd = (u16) (req->cmd | 0x8000u);
++    r.status = status;
++    r.param1 = req->param1;
++    r.param2 = 0;
++
++    if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) {
++        pr_err("bce-vhci: Cannot reserve submision for FW event reply\n");
++        return;
++    }
++    bce_vhci_message_queue_write(&vhci->msg_system, &r);
++}
++
++static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg)
++{
++    unsigned long flags;
++    bce_vhci_device_t devid;
++    u8 endp;
++    struct bce_vhci_device *dev;
++    struct bce_vhci_transfer_queue *tq;
++    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
++        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
++        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
++        dev = vhci->devices[devid];
++        if (!dev || !(dev->tq_mask & BIT(endp)))
++            return BCE_VHCI_BAD_ARGUMENT;
++        tq = &dev->tq[endp];
++    }
++
++    if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) {
++        if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) {
++            bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE);
++            return BCE_VHCI_SUCCESS;
++        } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) {
++            bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE);
++            return BCE_VHCI_SUCCESS;
++        }
++        return BCE_VHCI_BAD_ARGUMENT;
++    } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
++        if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) {
++            tq->state = msg->param2;
++            spin_lock_irqsave(&tq->urb_lock, flags);
++            tq->stalled = true;
++            spin_unlock_irqrestore(&tq->urb_lock, flags);
++            return BCE_VHCI_SUCCESS;
++        }
++        return BCE_VHCI_BAD_ARGUMENT;
++    }
++    pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n",
++            msg->cmd, msg->status, msg->param1, msg->param2);
++    return BCE_VHCI_BAD_ARGUMENT;
++}
++
++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws)
++{
++    size_t cnt = 0;
++    int result;
++    struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events);
++    struct bce_queue_sq *sq = vhci->ev_commands.sq;
++    struct bce_sq_completion_data *cq;
++    struct bce_vhci_message *msg, *msg2 = NULL;
++
++    while (true) {
++        if (msg2) {
++            msg = msg2;
++            msg2 = NULL;
++        } else if ((cq = bce_next_completion(sq))) {
++            if (cq->status == BCE_COMPLETION_ABORTED) {
++                bce_notify_submission_complete(sq);
++                continue;
++            }
++            msg = &vhci->ev_commands.data[sq->head];
++        } else {
++            break;
++        }
++
++        pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
++        if ((cq = bce_next_completion(sq))) {
++            msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count];
++            pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n",
++                    msg->cmd, msg->status, msg->param1, msg->param2);
++            if (cq->status != BCE_COMPLETION_ABORTED &&
++                msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) {
++                /* Take two elements */
++                pr_debug("bce-vhci: Cancelled\n");
++                bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT);
++
++                bce_notify_submission_complete(sq);
++                bce_notify_submission_complete(sq);
++                msg2 = NULL;
++                cnt += 2;
++                continue;
++            }
++
++            pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n");
++        }
++
++        result = bce_vhci_handle_firmware_event(vhci, msg);
++        bce_vhci_send_fw_event_response(vhci, msg, (u16) result);
++
++
++        bce_notify_submission_complete(sq);
++        ++cnt;
++    }
++    bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt);
++    if (atomic_read(&sq->available_commands) == sq->el_count - 1) {
++        pr_debug("bce-vhci: complete\n");
++        complete(&vhci->ev_commands.queue_empty_completion);
++    }
++}
++
++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq)
++{
++    struct bce_vhci_event_queue *q = sq->userdata;
++    queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events);
++}
++
++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
++{
++    if (msg->cmd & 0x8000) {
++        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
++    } else {
++        pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n",
++                msg->cmd, msg->status, msg->param1, msg->param2);
++    }
++}
++
++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
++{
++    bce_vhci_device_t devid;
++    u8 endp;
++    struct bce_vhci_device *dev;
++    if (msg->cmd & 0x8000) {
++        bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
++    } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
++        devid = (bce_vhci_device_t) (msg->param1 & 0xff);
++        endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
++        dev = q->vhci->devices[devid];
++        if (!dev || (dev->tq_mask & BIT(endp)) == 0) {
++            pr_err("bce-vhci: Didn't find destination for transfer queue event\n");
++            return;
++        }
++        bce_vhci_transfer_queue_event(&dev->tq[endp], msg);
++    } else {
++        pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n",
++                msg->cmd, msg->status, msg->param1, msg->param2);
++    }
++}
++
++
++
++static const struct hc_driver bce_vhci_driver = {
++        .description = "bce-vhci",
++        .product_desc = "BCE VHCI Host Controller",
++        .hcd_priv_size = sizeof(struct bce_vhci *),
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
++        .flags = HCD_USB2,
++#else
++        .flags = HCD_USB2 | HCD_DMA,
++#endif
++
++        .start = bce_vhci_start,
++        .stop = bce_vhci_stop,
++        .hub_status_data = bce_vhci_hub_status_data,
++        .hub_control = bce_vhci_hub_control,
++        .urb_enqueue = bce_vhci_urb_enqueue,
++        .urb_dequeue = bce_vhci_urb_dequeue,
++        .enable_device = bce_vhci_enable_device,
++        .free_dev = bce_vhci_free_device,
++        .address_device = bce_vhci_address_device,
++        .add_endpoint = bce_vhci_add_endpoint,
++        .drop_endpoint = bce_vhci_drop_endpoint,
++        .endpoint_reset = bce_vhci_endpoint_reset,
++        .check_bandwidth = bce_vhci_check_bandwidth,
++        .get_frame_number = bce_vhci_get_frame_number,
++        .bus_suspend = bce_vhci_bus_suspend,
++        .bus_resume = bce_vhci_bus_resume
++};
++
++
++int __init bce_vhci_module_init(void)
++{
++    int result;
++    if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci")))
++        goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++    bce_vhci_class = class_create(THIS_MODULE, "bce-vhci");
++#else
++    bce_vhci_class = class_create("bce-vhci");
++#endif
++    if (IS_ERR(bce_vhci_class)) {
++        result = PTR_ERR(bce_vhci_class);
++        goto fail_class;
++    }
++    return 0;
++
++fail_class:
++    class_destroy(bce_vhci_class);
++fail_chrdev:
++    unregister_chrdev_region(bce_vhci_chrdev, 1);
++    if (!result)
++        result = -EINVAL;
++    return result;
++}
++void __exit bce_vhci_module_exit(void)
++{
++    class_destroy(bce_vhci_class);
++    unregister_chrdev_region(bce_vhci_chrdev, 1);
++}
++
++module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444);
++MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled");
+diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h
+new file mode 100644
+index 000000000000..6c2e22622f4c
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/vhci.h
+@@ -0,0 +1,52 @@
++#ifndef BCE_VHCI_H
++#define BCE_VHCI_H
++
++#include "queue.h"
++#include "transfer.h"
++
++struct usb_hcd;
++struct bce_queue_cq;
++
++struct bce_vhci_device {
++    struct bce_vhci_transfer_queue tq[32];
++    u32 tq_mask;
++};
++struct bce_vhci {
++    struct apple_bce_device *dev;
++    dev_t vdevt;
++    struct device *vdev;
++    struct usb_hcd *hcd;
++    struct spinlock hcd_spinlock;
++    struct bce_vhci_message_queue msg_commands;
++    struct bce_vhci_message_queue msg_system;
++    struct bce_vhci_message_queue msg_isochronous;
++    struct bce_vhci_message_queue msg_interrupt;
++    struct bce_vhci_message_queue msg_asynchronous;
++    struct spinlock msg_asynchronous_lock;
++    struct bce_vhci_command_queue cq;
++    struct bce_queue_cq *ev_cq;
++    struct bce_vhci_event_queue ev_commands;
++    struct bce_vhci_event_queue ev_system;
++    struct bce_vhci_event_queue ev_isochronous;
++    struct bce_vhci_event_queue ev_interrupt;
++    struct bce_vhci_event_queue ev_asynchronous;
++    u16 port_mask;
++    u8 port_count;
++    u16 port_power_mask;
++    bce_vhci_device_t port_to_device[16];
++    struct bce_vhci_device *devices[16];
++    struct workqueue_struct *tq_state_wq;
++    struct work_struct w_fw_events;
++};
++
++int __init bce_vhci_module_init(void);
++void __exit bce_vhci_module_exit(void);
++
++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci);
++void bce_vhci_destroy(struct bce_vhci *vhci);
++int bce_vhci_start(struct usb_hcd *hcd);
++void bce_vhci_stop(struct usb_hcd *hcd);
++
++struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd);
++
++#endif //BCE_VHCI_H
+diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
+index 428d81afe215..aa1604d92c1a 100644
+--- a/include/drm/drm_format_helper.h
++++ b/include/drm/drm_format_helper.h
+@@ -96,6 +96,9 @@ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_
+ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch,
+ 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
+ 			       const struct drm_rect *clip, struct drm_format_conv_state *state);
++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
++			       const struct iosys_map *src, const struct drm_framebuffer *fb,
++			       const struct drm_rect *clip, struct drm_format_conv_state *state);
+ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch,
+ 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
+ 				 const struct drm_rect *clip, struct drm_format_conv_state *state);
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index 1533c9dcd3a6..2deff79f39a1 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -940,6 +940,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep
+ extern int hidinput_connect(struct hid_device *hid, unsigned int force);
+ extern void hidinput_disconnect(struct hid_device *);
+ 
++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
++				 unsigned int application, unsigned int usage);
+ int hid_set_field(struct hid_field *, unsigned, __s32);
+ int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
+ 		     int interrupt);
+diff --git a/lib/test_printf.c b/lib/test_printf.c
+index 965cb6f28527..db99014b8c13 100644
+--- a/lib/test_printf.c
++++ b/lib/test_printf.c
+@@ -745,18 +745,26 @@ static void __init fwnode_pointer(void)
+ static void __init fourcc_pointer(void)
+ {
+ 	struct {
++		char type;
+ 		u32 code;
+ 		char *str;
+ 	} const try[] = {
+-		{ 0x3231564e, "NV12 little-endian (0x3231564e)", },
+-		{ 0xb231564e, "NV12 big-endian (0xb231564e)", },
+-		{ 0x10111213, ".... little-endian (0x10111213)", },
+-		{ 0x20303159, "Y10  little-endian (0x20303159)", },
++		{ 'c', 0x3231564e, "NV12 little-endian (0x3231564e)", },
++		{ 'c', 0xb231564e, "NV12 big-endian (0xb231564e)", },
++		{ 'c', 0x10111213, ".... little-endian (0x10111213)", },
++		{ 'c', 0x20303159, "Y10  little-endian (0x20303159)", },
++		{ 'h', 0x67503030, "gP00 (0x67503030)", },
++		{ 'r', 0x30305067, "gP00 (0x67503030)", },
++		{ 'l', cpu_to_le32(0x67503030), "gP00 (0x67503030)", },
++		{ 'b', cpu_to_be32(0x67503030), "gP00 (0x67503030)", },
+ 	};
+ 	unsigned int i;
+ 
+-	for (i = 0; i < ARRAY_SIZE(try); i++)
+-		test(try[i].str, "%p4cc", &try[i].code);
++	for (i = 0; i < ARRAY_SIZE(try); i++) {
++		char fmt[] = { '%', 'p', '4', 'c', try[i].type, '\0' };
++
++		test(try[i].str, fmt, &try[i].code);
++	}
+ }
+ 
+ static void __init
+diff --git a/lib/vsprintf.c b/lib/vsprintf.c
+index 2d71b1115916..5274e3c881de 100644
+--- a/lib/vsprintf.c
++++ b/lib/vsprintf.c
+@@ -1760,27 +1760,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
+ 	char output[sizeof("0123 little-endian (0x01234567)")];
+ 	char *p = output;
+ 	unsigned int i;
++	bool pix_fmt = false;
+ 	u32 orig, val;
+ 
+-	if (fmt[1] != 'c' || fmt[2] != 'c')
++	if (fmt[1] != 'c')
+ 		return error_string(buf, end, "(%p4?)", spec);
+ 
+ 	if (check_pointer(&buf, end, fourcc, spec))
+ 		return buf;
+ 
+ 	orig = get_unaligned(fourcc);
+-	val = orig & ~BIT(31);
++	switch (fmt[2]) {
++	case 'h':
++		val = orig;
++		break;
++	case 'r':
++		val = orig = swab32(orig);
++		break;
++	case 'l':
++		val = orig = le32_to_cpu(orig);
++		break;
++	case 'b':
++		val = orig = be32_to_cpu(orig);
++		break;
++	case 'c':
++		/* Pixel formats are printed LSB-first */
++		val = swab32(orig & ~BIT(31));
++		pix_fmt = true;
++		break;
++	default:
++		return error_string(buf, end, "(%p4?)", spec);
++	}
+ 
+ 	for (i = 0; i < sizeof(u32); i++) {
+-		unsigned char c = val >> (i * 8);
++		unsigned char c = val >> ((3 - i) * 8);
+ 
+ 		/* Print non-control ASCII characters as-is, dot otherwise */
+ 		*p++ = isascii(c) && isprint(c) ? c : '.';
+ 	}
+ 
+-	*p++ = ' ';
+-	strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
+-	p += strlen(p);
++	if (pix_fmt) {
++		*p++ = ' ';
++		strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
++		p += strlen(p);
++	}
+ 
+ 	*p++ = ' ';
+ 	*p++ = '(';
+@@ -2355,6 +2378,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr);
+  *       read the documentation (path below) first.
+  * - 'NF' For a netdev_features_t
+  * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value.
++ * - '4c[hlbr]' Generic FourCC code.
+  * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with
+  *            a certain separator (' ' by default):
+  *              C colon
+diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
+index 4427572b2477..b60c99d61882 100755
+--- a/scripts/checkpatch.pl
++++ b/scripts/checkpatch.pl
+@@ -6917,7 +6917,7 @@ sub process {
+ 					    ($extension eq "f" &&
+ 					     defined $qualifier && $qualifier !~ /^w/) ||
+ 					    ($extension eq "4" &&
+-					     defined $qualifier && $qualifier !~ /^cc/)) {
++					     defined $qualifier && $qualifier !~ /^c[chlbr]/)) {
+ 						$bad_specifier = $specifier;
+ 						last;
+ 					}
+-- 
+2.47.0.rc0
+
+From 9c5f8134d6095a520bca8870f2477115b595ca07 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:53:44 +0800
+Subject: [PATCH 12/13] thp-shrinker
+
+Signed-off-by: Peter Jung <admin@ptr1337.dev>
+---
+ Documentation/admin-guide/mm/transhuge.rst    |  16 ++
+ include/linux/huge_mm.h                       |   4 +-
+ include/linux/khugepaged.h                    |   1 +
+ include/linux/page-flags.h                    |  13 +-
+ include/linux/rmap.h                          |   7 +-
+ include/linux/vm_event_item.h                 |   1 +
+ mm/huge_memory.c                              | 143 ++++++++++++++++--
+ mm/khugepaged.c                               |   3 +-
+ mm/migrate.c                                  |  75 +++++++--
+ mm/migrate_device.c                           |   4 +-
+ mm/rmap.c                                     |   5 +-
+ mm/vmscan.c                                   |   3 +-
+ mm/vmstat.c                                   |   1 +
+ .../selftests/mm/split_huge_page_test.c       |  71 +++++++++
+ tools/testing/selftests/mm/vm_util.c          |  22 +++
+ tools/testing/selftests/mm/vm_util.h          |   1 +
+ 16 files changed, 334 insertions(+), 36 deletions(-)
+
+diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
+index 058485daf186..02ae7bc9efbd 100644
+--- a/Documentation/admin-guide/mm/transhuge.rst
++++ b/Documentation/admin-guide/mm/transhuge.rst
+@@ -202,6 +202,16 @@ PMD-mappable transparent hugepage::
+ 
+ 	cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+ 
++All THPs at fault and collapse time will be added to _deferred_list,
++and will therefore be split under memory presure if they are considered
++"underused". A THP is underused if the number of zero-filled pages in
++the THP is above max_ptes_none (see below). It is possible to disable
++this behaviour by writing 0 to shrink_underused, and enable it by writing
++1 to it::
++
++	echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
++	echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
++
+ khugepaged will be automatically started when PMD-sized THP is enabled
+ (either of the per-size anon control or the top-level control are set
+ to "always" or "madvise"), and it'll be automatically shutdown when
+@@ -447,6 +457,12 @@ thp_deferred_split_page
+ 	splitting it would free up some memory. Pages on split queue are
+ 	going to be split under memory pressure.
+ 
++thp_underused_split_page
++	is incremented when a huge page on the split queue was split
++	because it was underused. A THP is underused if the number of
++	zero pages in the THP is above a certain threshold
++	(/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
++
+ thp_split_pmd
+ 	is incremented every time a PMD split into table of PTEs.
+ 	This can happen, for instance, when application calls mprotect() or
+diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
+index e25d9ebfdf89..00af84aa88ea 100644
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -321,7 +321,7 @@ static inline int split_huge_page(struct page *page)
+ {
+ 	return split_huge_page_to_list_to_order(page, NULL, 0);
+ }
+-void deferred_split_folio(struct folio *folio);
++void deferred_split_folio(struct folio *folio, bool partially_mapped);
+ 
+ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ 		unsigned long address, bool freeze, struct folio *folio);
+@@ -484,7 +484,7 @@ static inline int split_huge_page(struct page *page)
+ {
+ 	return 0;
+ }
+-static inline void deferred_split_folio(struct folio *folio) {}
++static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
+ #define split_huge_pmd(__vma, __pmd, __address)	\
+ 	do { } while (0)
+ 
+diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
+index f68865e19b0b..30baae91b225 100644
+--- a/include/linux/khugepaged.h
++++ b/include/linux/khugepaged.h
+@@ -4,6 +4,7 @@
+ 
+ #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+ 
++extern unsigned int khugepaged_max_ptes_none __read_mostly;
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ extern struct attribute_group khugepaged_attr_group;
+ 
+diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
+index 5769fe6e4950..5e7bc8522e91 100644
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -185,6 +185,7 @@ enum pageflags {
+ 	/* At least one page in this folio has the hwpoison flag set */
+ 	PG_has_hwpoisoned = PG_error,
+ 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
++	PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */
+ };
+ 
+ #define PAGEFLAGS_MASK		((1UL << NR_PAGEFLAGS) - 1)
+@@ -865,8 +866,18 @@ static inline void ClearPageCompound(struct page *page)
+ 	ClearPageHead(page);
+ }
+ FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE)
++FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
++/*
++ * PG_partially_mapped is protected by deferred_split split_queue_lock,
++ * so its safe to use non-atomic set/clear.
++ */
++__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
++__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE)
+ #else
+ FOLIO_FLAG_FALSE(large_rmappable)
++FOLIO_TEST_FLAG_FALSE(partially_mapped)
++__FOLIO_SET_FLAG_NOOP(partially_mapped)
++__FOLIO_CLEAR_FLAG_NOOP(partially_mapped)
+ #endif
+ 
+ #define PG_head_mask ((1UL << PG_head))
+@@ -1175,7 +1186,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
+  */
+ #define PAGE_FLAGS_SECOND						\
+ 	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
+-	 1UL << PG_large_rmappable)
++	 1UL << PG_large_rmappable	| 1UL << PG_partially_mapped)
+ 
+ #define PAGE_FLAGS_PRIVATE				\
+ 	(1UL << PG_private | 1UL << PG_private_2)
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index 0978c64f49d8..07854d1f9ad6 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -745,7 +745,12 @@ int folio_mkclean(struct folio *);
+ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
+ 		      struct vm_area_struct *vma);
+ 
+-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
++enum rmp_flags {
++	RMP_LOCKED		= 1 << 0,
++	RMP_USE_SHARED_ZEROPAGE	= 1 << 1,
++};
++
++void remove_migration_ptes(struct folio *src, struct folio *dst, int flags);
+ 
+ /*
+  * rmap_walk_control: To control rmap traversing for specific needs
+diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
+index 747943bc8cc2..d35e588e0ece 100644
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -104,6 +104,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
+ 		THP_SPLIT_PAGE,
+ 		THP_SPLIT_PAGE_FAILED,
+ 		THP_DEFERRED_SPLIT_PAGE,
++		THP_UNDERUSED_SPLIT_PAGE,
+ 		THP_SPLIT_PMD,
+ 		THP_SCAN_EXCEED_NONE_PTE,
+ 		THP_SCAN_EXCEED_SWAP_PTE,
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 4d2839fcf688..eb2e5c305547 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -77,6 +77,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
+ 					  struct shrink_control *sc);
+ static unsigned long deferred_split_scan(struct shrinker *shrink,
+ 					 struct shrink_control *sc);
++static bool split_underused_thp = true;
+ 
+ static atomic_t huge_zero_refcount;
+ struct folio *huge_zero_folio __read_mostly;
+@@ -449,6 +450,27 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj,
+ static struct kobj_attribute hpage_pmd_size_attr =
+ 	__ATTR_RO(hpage_pmd_size);
+ 
++static ssize_t split_underused_thp_show(struct kobject *kobj,
++			    struct kobj_attribute *attr, char *buf)
++{
++	return sysfs_emit(buf, "%d\n", split_underused_thp);
++}
++
++static ssize_t split_underused_thp_store(struct kobject *kobj,
++			     struct kobj_attribute *attr,
++			     const char *buf, size_t count)
++{
++	int err = kstrtobool(buf, &split_underused_thp);
++
++	if (err < 0)
++		return err;
++
++	return count;
++}
++
++static struct kobj_attribute split_underused_thp_attr = __ATTR(
++	shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store);
++
+ static struct attribute *hugepage_attr[] = {
+ 	&enabled_attr.attr,
+ 	&defrag_attr.attr,
+@@ -457,6 +479,7 @@ static struct attribute *hugepage_attr[] = {
+ #ifdef CONFIG_SHMEM
+ 	&shmem_enabled_attr.attr,
+ #endif
++	&split_underused_thp_attr.attr,
+ 	NULL,
+ };
+ 
+@@ -1013,6 +1036,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
+ 		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+ 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+ 		mm_inc_nr_ptes(vma->vm_mm);
++		deferred_split_folio(folio, false);
+ 		spin_unlock(vmf->ptl);
+ 		count_vm_event(THP_FAULT_ALLOC);
+ 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
+@@ -2784,7 +2808,7 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
+ 	return false;
+ }
+ 
+-static void remap_page(struct folio *folio, unsigned long nr)
++static void remap_page(struct folio *folio, unsigned long nr, int flags)
+ {
+ 	int i = 0;
+ 
+@@ -2792,7 +2816,7 @@ static void remap_page(struct folio *folio, unsigned long nr)
+ 	if (!folio_test_anon(folio))
+ 		return;
+ 	for (;;) {
+-		remove_migration_ptes(folio, folio, true);
++		remove_migration_ptes(folio, folio, RMP_LOCKED | flags);
+ 		i += folio_nr_pages(folio);
+ 		if (i >= nr)
+ 			break;
+@@ -3000,7 +3024,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ 
+ 	if (nr_dropped)
+ 		shmem_uncharge(folio->mapping->host, nr_dropped);
+-	remap_page(folio, nr);
++	remap_page(folio, nr, PageAnon(head) ? RMP_USE_SHARED_ZEROPAGE : 0);
+ 
+ 	/*
+ 	 * set page to its compound_head when split to non order-0 pages, so
+@@ -3235,6 +3259,9 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ 		if (folio_order(folio) > 1 &&
+ 		    !list_empty(&folio->_deferred_list)) {
+ 			ds_queue->split_queue_len--;
++			if (folio_test_partially_mapped(folio)) {
++				__folio_clear_partially_mapped(folio);
++			}
+ 			/*
+ 			 * Reinitialize page_deferred_list after removing the
+ 			 * page from the split_queue, otherwise a subsequent
+@@ -3269,7 +3296,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ 		if (mapping)
+ 			xas_unlock(&xas);
+ 		local_irq_enable();
+-		remap_page(folio, folio_nr_pages(folio));
++		remap_page(folio, folio_nr_pages(folio), 0);
+ 		ret = -EAGAIN;
+ 	}
+ 
+@@ -3297,12 +3324,16 @@ void __folio_undo_large_rmappable(struct folio *folio)
+ 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ 	if (!list_empty(&folio->_deferred_list)) {
+ 		ds_queue->split_queue_len--;
++		if (folio_test_partially_mapped(folio)) {
++			__folio_clear_partially_mapped(folio);
++		}
+ 		list_del_init(&folio->_deferred_list);
+ 	}
+ 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ }
+ 
+-void deferred_split_folio(struct folio *folio)
++/* partially_mapped=false won't clear PG_partially_mapped folio flag */
++void deferred_split_folio(struct folio *folio, bool partially_mapped)
+ {
+ 	struct deferred_split *ds_queue = get_deferred_split_queue(folio);
+ #ifdef CONFIG_MEMCG
+@@ -3317,6 +3348,9 @@ void deferred_split_folio(struct folio *folio)
+ 	if (folio_order(folio) <= 1)
+ 		return;
+ 
++	if (!partially_mapped && !split_underused_thp)
++		return;
++
+ 	/*
+ 	 * The try_to_unmap() in page reclaim path might reach here too,
+ 	 * this may cause a race condition to corrupt deferred split queue.
+@@ -3330,14 +3364,20 @@ void deferred_split_folio(struct folio *folio)
+ 	if (folio_test_swapcache(folio))
+ 		return;
+ 
+-	if (!list_empty(&folio->_deferred_list))
+-		return;
+-
+ 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
++	if (partially_mapped) {
++		if (!folio_test_partially_mapped(folio)) {
++			__folio_set_partially_mapped(folio);
++			if (folio_test_pmd_mappable(folio))
++				count_vm_event(THP_DEFERRED_SPLIT_PAGE);
++			count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
++
++		}
++	} else {
++		/* partially mapped folios cannot become non-partially mapped */
++		VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
++	}
+ 	if (list_empty(&folio->_deferred_list)) {
+-		if (folio_test_pmd_mappable(folio))
+-			count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+-		count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
+ 		list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
+ 		ds_queue->split_queue_len++;
+ #ifdef CONFIG_MEMCG
+@@ -3362,6 +3402,39 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
+ 	return READ_ONCE(ds_queue->split_queue_len);
+ }
+ 
++static bool thp_underused(struct folio *folio)
++{
++	int num_zero_pages = 0, num_filled_pages = 0;
++	void *kaddr;
++	int i;
++
++	if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1)
++		return false;
++
++	for (i = 0; i < folio_nr_pages(folio); i++) {
++		kaddr = kmap_local_folio(folio, i * PAGE_SIZE);
++		if (!memchr_inv(kaddr, 0, PAGE_SIZE)) {
++			num_zero_pages++;
++			if (num_zero_pages > khugepaged_max_ptes_none) {
++				kunmap_local(kaddr);
++				return true;
++			}
++		} else {
++			/*
++			 * Another path for early exit once the number
++			 * of non-zero filled pages exceeds threshold.
++			 */
++			num_filled_pages++;
++			if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) {
++				kunmap_local(kaddr);
++				return false;
++			}
++		}
++		kunmap_local(kaddr);
++	}
++	return false;
++}
++
+ static unsigned long deferred_split_scan(struct shrinker *shrink,
+ 		struct shrink_control *sc)
+ {
+@@ -3369,8 +3442,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
+ 	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ 	unsigned long flags;
+ 	LIST_HEAD(list);
+-	struct folio *folio, *next;
+-	int split = 0;
++	struct folio *folio, *next, *prev = NULL;
++	int split = 0, removed = 0;
+ 
+ #ifdef CONFIG_MEMCG
+ 	if (sc->memcg)
+@@ -3385,6 +3458,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
+ 			list_move(&folio->_deferred_list, &list);
+ 		} else {
+ 			/* We lost race with folio_put() */
++			if (folio_test_partially_mapped(folio)) {
++				__folio_clear_partially_mapped(folio);
++			}
+ 			list_del_init(&folio->_deferred_list);
+ 			ds_queue->split_queue_len--;
+ 		}
+@@ -3394,20 +3470,55 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
+ 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ 
+ 	list_for_each_entry_safe(folio, next, &list, _deferred_list) {
++		bool did_split = false;
++		bool underused = false;
++
++		if (!folio_test_partially_mapped(folio)) {
++			underused = thp_underused(folio);
++			if (!underused)
++				goto next;
++		}
+ 		if (!folio_trylock(folio))
+ 			goto next;
+-		/* split_huge_page() removes page from list on success */
+-		if (!split_folio(folio))
++		if (!split_folio(folio)) {
++			did_split = true;
++			if (underused)
++				count_vm_event(THP_UNDERUSED_SPLIT_PAGE);
+ 			split++;
++		}
+ 		folio_unlock(folio);
+ next:
+-		folio_put(folio);
++		/*
++		 * split_folio() removes folio from list on success.
++		 * Only add back to the queue if folio is partially mapped.
++		 * If thp_underused returns false, or if split_folio fails
++		 * in the case it was underused, then consider it used and
++		 * don't add it back to split_queue.
++		 */
++		if (!did_split && !folio_test_partially_mapped(folio)) {
++			list_del_init(&folio->_deferred_list);
++			removed++;
++		} else {
++			/*
++			 * That unlocked list_del_init() above would be unsafe,
++			 * unless its folio is separated from any earlier folios
++			 * left on the list (which may be concurrently unqueued)
++			 * by one safe folio with refcount still raised.
++			 */
++			swap(folio, prev);
++		}
++		if (folio)
++			folio_put(folio);
+ 	}
+ 
+ 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ 	list_splice_tail(&list, &ds_queue->split_queue);
++	ds_queue->split_queue_len -= removed;
+ 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ 
++	if (prev)
++		folio_put(prev);
++
+ 	/*
+ 	 * Stop shrinker if we didn't split any page, but the queue is empty.
+ 	 * This can happen if pages were freed under us.
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 4cba91ecf74b..ee490f1e7de2 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -85,7 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
+  *
+  * Note that these are only respected if collapse was initiated by khugepaged.
+  */
+-static unsigned int khugepaged_max_ptes_none __read_mostly;
++unsigned int khugepaged_max_ptes_none __read_mostly;
+ static unsigned int khugepaged_max_ptes_swap __read_mostly;
+ static unsigned int khugepaged_max_ptes_shared __read_mostly;
+ 
+@@ -1235,6 +1235,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
+ 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ 	set_pmd_at(mm, address, pmd, _pmd);
+ 	update_mmu_cache_pmd(vma, address, pmd);
++	deferred_split_folio(folio, false);
+ 	spin_unlock(pmd_ptl);
+ 
+ 	folio = NULL;
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 368ab3878fa6..d3a66f1a621b 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -177,13 +177,57 @@ void putback_movable_pages(struct list_head *l)
+ 	}
+ }
+ 
++static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
++					  struct folio *folio,
++					  unsigned long idx)
++{
++	struct page *page = folio_page(folio, idx);
++	bool contains_data;
++	pte_t newpte;
++	void *addr;
++
++	VM_BUG_ON_PAGE(PageCompound(page), page);
++	VM_BUG_ON_PAGE(!PageAnon(page), page);
++	VM_BUG_ON_PAGE(!PageLocked(page), page);
++	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
++
++	if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
++	    mm_forbids_zeropage(pvmw->vma->vm_mm))
++		return false;
++
++	/*
++	 * The pmd entry mapping the old thp was flushed and the pte mapping
++	 * this subpage has been non present. If the subpage is only zero-filled
++	 * then map it to the shared zeropage.
++	 */
++	addr = kmap_local_page(page);
++	contains_data = memchr_inv(addr, 0, PAGE_SIZE);
++	kunmap_local(addr);
++
++	if (contains_data)
++		return false;
++
++	newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
++					pvmw->vma->vm_page_prot));
++	set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
++
++	dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
++	return true;
++}
++
++struct rmap_walk_arg {
++	struct folio *folio;
++	bool map_unused_to_zeropage;
++};
++
+ /*
+  * Restore a potential migration pte to a working pte entry
+  */
+ static bool remove_migration_pte(struct folio *folio,
+-		struct vm_area_struct *vma, unsigned long addr, void *old)
++		struct vm_area_struct *vma, unsigned long addr, void *arg)
+ {
+-	DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
++	struct rmap_walk_arg *rmap_walk_arg = arg;
++	DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+ 
+ 	while (page_vma_mapped_walk(&pvmw)) {
+ 		rmap_t rmap_flags = RMAP_NONE;
+@@ -207,6 +251,9 @@ static bool remove_migration_pte(struct folio *folio,
+ 			continue;
+ 		}
+ #endif
++		if (rmap_walk_arg->map_unused_to_zeropage &&
++		    try_to_map_unused_to_zeropage(&pvmw, folio, idx))
++			continue;
+ 
+ 		folio_get(folio);
+ 		pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
+@@ -285,14 +332,21 @@ static bool remove_migration_pte(struct folio *folio,
+  * Get rid of all migration entries and replace them by
+  * references to the indicated page.
+  */
+-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
++void remove_migration_ptes(struct folio *src, struct folio *dst, int flags)
+ {
++	struct rmap_walk_arg rmap_walk_arg = {
++		.folio = src,
++		.map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE,
++	};
++
+ 	struct rmap_walk_control rwc = {
+ 		.rmap_one = remove_migration_pte,
+-		.arg = src,
++		.arg = &rmap_walk_arg,
+ 	};
+ 
+-	if (locked)
++	VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src);
++
++	if (flags & RMP_LOCKED)
+ 		rmap_walk_locked(dst, &rwc);
+ 	else
+ 		rmap_walk(dst, &rwc);
+@@ -904,7 +958,7 @@ static int writeout(struct address_space *mapping, struct folio *folio)
+ 	 * At this point we know that the migration attempt cannot
+ 	 * be successful.
+ 	 */
+-	remove_migration_ptes(folio, folio, false);
++	remove_migration_ptes(folio, folio, 0);
+ 
+ 	rc = mapping->a_ops->writepage(&folio->page, &wbc);
+ 
+@@ -1068,7 +1122,7 @@ static void migrate_folio_undo_src(struct folio *src,
+ 				   struct list_head *ret)
+ {
+ 	if (page_was_mapped)
+-		remove_migration_ptes(src, src, false);
++		remove_migration_ptes(src, src, 0);
+ 	/* Drop an anon_vma reference if we took one */
+ 	if (anon_vma)
+ 		put_anon_vma(anon_vma);
+@@ -1306,7 +1360,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
+ 		lru_add_drain();
+ 
+ 	if (old_page_state & PAGE_WAS_MAPPED)
+-		remove_migration_ptes(src, dst, false);
++		remove_migration_ptes(src, dst, 0);
+ 
+ out_unlock_both:
+ 	folio_unlock(dst);
+@@ -1444,7 +1498,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
+ 
+ 	if (page_was_mapped)
+ 		remove_migration_ptes(src,
+-			rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
++			rc == MIGRATEPAGE_SUCCESS ? dst : src, 0);
+ 
+ unlock_put_anon:
+ 	folio_unlock(dst);
+@@ -1682,7 +1736,8 @@ static int migrate_pages_batch(struct list_head *from,
+ 			 * use _deferred_list.
+ 			 */
+ 			if (nr_pages > 2 &&
+-			   !list_empty(&folio->_deferred_list)) {
++			   !list_empty(&folio->_deferred_list) &&
++			   folio_test_partially_mapped(folio)) {
+ 				if (!try_split_folio(folio, split_folios, mode)) {
+ 					nr_failed++;
+ 					stats->nr_thp_failed += is_thp;
+diff --git a/mm/migrate_device.c b/mm/migrate_device.c
+index 6d66dc1c6ffa..8f875636b35b 100644
+--- a/mm/migrate_device.c
++++ b/mm/migrate_device.c
+@@ -424,7 +424,7 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
+ 			continue;
+ 
+ 		folio = page_folio(page);
+-		remove_migration_ptes(folio, folio, false);
++		remove_migration_ptes(folio, folio, 0);
+ 
+ 		src_pfns[i] = 0;
+ 		folio_unlock(folio);
+@@ -837,7 +837,7 @@ void migrate_device_finalize(unsigned long *src_pfns,
+ 
+ 		src = page_folio(page);
+ 		dst = page_folio(newpage);
+-		remove_migration_ptes(src, dst, false);
++		remove_migration_ptes(src, dst, 0);
+ 		folio_unlock(src);
+ 
+ 		if (is_zone_device_page(page))
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 2490e727e2dc..77b5185058b4 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1566,8 +1566,9 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
+ 		 * Check partially_mapped first to ensure it is a large folio.
+ 		 */
+ 		if (folio_test_anon(folio) && partially_mapped &&
+-		    list_empty(&folio->_deferred_list))
+-			deferred_split_folio(folio);
++	    !folio_test_partially_mapped(folio))
++		deferred_split_folio(folio, true);
++
+ 	}
+ 	__folio_mod_stat(folio, -nr, -nr_pmdmapped);
+ 
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 35b67785907b..ca76f7df2d54 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1232,7 +1232,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
+ 					 * Split partially mapped folios right away.
+ 					 * We can free the unmapped pages without IO.
+ 					 */
+-					if (data_race(!list_empty(&folio->_deferred_list)) &&
++					if (data_race(!list_empty(&folio->_deferred_list) &&
++					    folio_test_partially_mapped(folio)) &&
+ 					    split_folio_to_list(folio, folio_list))
+ 						goto activate_locked;
+ 				}
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index e875f2a4915f..6c48f75eefa9 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1384,6 +1384,7 @@ const char * const vmstat_text[] = {
+ 	"thp_split_page",
+ 	"thp_split_page_failed",
+ 	"thp_deferred_split_page",
++	"thp_underused_split_page",
+ 	"thp_split_pmd",
+ 	"thp_scan_exceed_none_pte",
+ 	"thp_scan_exceed_swap_pte",
+diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
+index e5e8dafc9d94..eb6d1b9fc362 100644
+--- a/tools/testing/selftests/mm/split_huge_page_test.c
++++ b/tools/testing/selftests/mm/split_huge_page_test.c
+@@ -84,6 +84,76 @@ static void write_debugfs(const char *fmt, ...)
+ 	write_file(SPLIT_DEBUGFS, input, ret + 1);
+ }
+ 
++static char *allocate_zero_filled_hugepage(size_t len)
++{
++	char *result;
++	size_t i;
++
++	result = memalign(pmd_pagesize, len);
++	if (!result) {
++		printf("Fail to allocate memory\n");
++		exit(EXIT_FAILURE);
++	}
++
++	madvise(result, len, MADV_HUGEPAGE);
++
++	for (i = 0; i < len; i++)
++		result[i] = (char)0;
++
++	return result;
++}
++
++static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len)
++{
++	unsigned long rss_anon_before, rss_anon_after;
++	size_t i;
++
++	if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
++		printf("No THP is allocated\n");
++		exit(EXIT_FAILURE);
++	}
++
++	rss_anon_before = rss_anon();
++	if (!rss_anon_before) {
++		printf("No RssAnon is allocated before split\n");
++		exit(EXIT_FAILURE);
++	}
++
++	/* split all THPs */
++	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
++		      (uint64_t)one_page + len, 0);
++
++	for (i = 0; i < len; i++)
++		if (one_page[i] != (char)0) {
++			printf("%ld byte corrupted\n", i);
++			exit(EXIT_FAILURE);
++		}
++
++	if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
++		printf("Still AnonHugePages not split\n");
++		exit(EXIT_FAILURE);
++	}
++
++	rss_anon_after = rss_anon();
++	if (rss_anon_after >= rss_anon_before) {
++		printf("Incorrect RssAnon value. Before: %ld After: %ld\n",
++		       rss_anon_before, rss_anon_after);
++		exit(EXIT_FAILURE);
++	}
++}
++
++void split_pmd_zero_pages(void)
++{
++	char *one_page;
++	int nr_hpages = 4;
++	size_t len = nr_hpages * pmd_pagesize;
++
++	one_page = allocate_zero_filled_hugepage(len);
++	verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
++	printf("Split zero filled huge pages successful\n");
++	free(one_page);
++}
++
+ void split_pmd_thp(void)
+ {
+ 	char *one_page;
+@@ -431,6 +501,7 @@ int main(int argc, char **argv)
+ 
+ 	fd_size = 2 * pmd_pagesize;
+ 
++	split_pmd_zero_pages();
+ 	split_pmd_thp();
+ 	split_pte_mapped_thp();
+ 	split_file_backed_thp();
+diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
+index 5a62530da3b5..d8d0cf04bb57 100644
+--- a/tools/testing/selftests/mm/vm_util.c
++++ b/tools/testing/selftests/mm/vm_util.c
+@@ -12,6 +12,7 @@
+ 
+ #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+ #define SMAP_FILE_PATH "/proc/self/smaps"
++#define STATUS_FILE_PATH "/proc/self/status"
+ #define MAX_LINE_LENGTH 500
+ 
+ unsigned int __page_size;
+@@ -171,6 +172,27 @@ uint64_t read_pmd_pagesize(void)
+ 	return strtoul(buf, NULL, 10);
+ }
+ 
++unsigned long rss_anon(void)
++{
++	unsigned long rss_anon = 0;
++	FILE *fp;
++	char buffer[MAX_LINE_LENGTH];
++
++	fp = fopen(STATUS_FILE_PATH, "r");
++	if (!fp)
++		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH);
++
++	if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer)))
++		goto err_out;
++
++	if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1)
++		ksft_exit_fail_msg("Reading status error\n");
++
++err_out:
++	fclose(fp);
++	return rss_anon;
++}
++
+ bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ 		  uint64_t hpage_size)
+ {
+diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
+index 9007c420d52c..2eaed8209925 100644
+--- a/tools/testing/selftests/mm/vm_util.h
++++ b/tools/testing/selftests/mm/vm_util.h
+@@ -39,6 +39,7 @@ unsigned long pagemap_get_pfn(int fd, char *start);
+ void clear_softdirty(void);
+ bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
+ uint64_t read_pmd_pagesize(void);
++unsigned long rss_anon(void);
+ bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
+ bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
+ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
+-- 
+2.47.0.rc0
+
+From 6401d4b492055092a6ef1946c026e64ba06cc0ec Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 22 Oct 2024 22:53:58 +0800
+Subject: [PATCH 13/13] zstd
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ include/linux/zstd.h                          |    2 +-
+ include/linux/zstd_errors.h                   |   23 +-
+ include/linux/zstd_lib.h                      |  850 +++++--
+ lib/zstd/Makefile                             |    2 +-
+ lib/zstd/common/allocations.h                 |   56 +
+ lib/zstd/common/bits.h                        |  149 ++
+ lib/zstd/common/bitstream.h                   |  127 +-
+ lib/zstd/common/compiler.h                    |  134 +-
+ lib/zstd/common/cpu.h                         |    3 +-
+ lib/zstd/common/debug.c                       |    9 +-
+ lib/zstd/common/debug.h                       |   34 +-
+ lib/zstd/common/entropy_common.c              |   42 +-
+ lib/zstd/common/error_private.c               |   12 +-
+ lib/zstd/common/error_private.h               |   84 +-
+ lib/zstd/common/fse.h                         |   94 +-
+ lib/zstd/common/fse_decompress.c              |  130 +-
+ lib/zstd/common/huf.h                         |  237 +-
+ lib/zstd/common/mem.h                         |    3 +-
+ lib/zstd/common/portability_macros.h          |   28 +-
+ lib/zstd/common/zstd_common.c                 |   38 +-
+ lib/zstd/common/zstd_deps.h                   |   16 +-
+ lib/zstd/common/zstd_internal.h               |  109 +-
+ lib/zstd/compress/clevels.h                   |    3 +-
+ lib/zstd/compress/fse_compress.c              |   74 +-
+ lib/zstd/compress/hist.c                      |    3 +-
+ lib/zstd/compress/hist.h                      |    3 +-
+ lib/zstd/compress/huf_compress.c              |  441 ++--
+ lib/zstd/compress/zstd_compress.c             | 2111 ++++++++++++-----
+ lib/zstd/compress/zstd_compress_internal.h    |  359 ++-
+ lib/zstd/compress/zstd_compress_literals.c    |  155 +-
+ lib/zstd/compress/zstd_compress_literals.h    |   25 +-
+ lib/zstd/compress/zstd_compress_sequences.c   |    7 +-
+ lib/zstd/compress/zstd_compress_sequences.h   |    3 +-
+ lib/zstd/compress/zstd_compress_superblock.c  |  376 ++-
+ lib/zstd/compress/zstd_compress_superblock.h  |    3 +-
+ lib/zstd/compress/zstd_cwksp.h                |  169 +-
+ lib/zstd/compress/zstd_double_fast.c          |  143 +-
+ lib/zstd/compress/zstd_double_fast.h          |   17 +-
+ lib/zstd/compress/zstd_fast.c                 |  596 +++--
+ lib/zstd/compress/zstd_fast.h                 |    6 +-
+ lib/zstd/compress/zstd_lazy.c                 |  732 +++---
+ lib/zstd/compress/zstd_lazy.h                 |  138 +-
+ lib/zstd/compress/zstd_ldm.c                  |   21 +-
+ lib/zstd/compress/zstd_ldm.h                  |    3 +-
+ lib/zstd/compress/zstd_ldm_geartab.h          |    3 +-
+ lib/zstd/compress/zstd_opt.c                  |  497 ++--
+ lib/zstd/compress/zstd_opt.h                  |   41 +-
+ lib/zstd/decompress/huf_decompress.c          |  887 ++++---
+ lib/zstd/decompress/zstd_ddict.c              |    9 +-
+ lib/zstd/decompress/zstd_ddict.h              |    3 +-
+ lib/zstd/decompress/zstd_decompress.c         |  358 ++-
+ lib/zstd/decompress/zstd_decompress_block.c   |  708 +++---
+ lib/zstd/decompress/zstd_decompress_block.h   |   10 +-
+ .../decompress/zstd_decompress_internal.h     |    9 +-
+ lib/zstd/decompress_sources.h                 |    2 +-
+ lib/zstd/zstd_common_module.c                 |    5 +-
+ lib/zstd/zstd_compress_module.c               |    2 +-
+ lib/zstd/zstd_decompress_module.c             |    4 +-
+ 58 files changed, 6577 insertions(+), 3531 deletions(-)
+ create mode 100644 lib/zstd/common/allocations.h
+ create mode 100644 lib/zstd/common/bits.h
+
+diff --git a/include/linux/zstd.h b/include/linux/zstd.h
+index 113408eef6ec..f109d49f43f8 100644
+--- a/include/linux/zstd.h
++++ b/include/linux/zstd.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
+index 58b6dd45a969..6d5cf55f0bf3 100644
+--- a/include/linux/zstd_errors.h
++++ b/include/linux/zstd_errors.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -17,8 +18,17 @@
+ 
+ 
+ /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+-#define ZSTDERRORLIB_VISIBILITY 
+-#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
++#define ZSTDERRORLIB_VISIBLE 
++
++#ifndef ZSTDERRORLIB_HIDDEN
++#  if (__GNUC__ >= 4) && !defined(__MINGW32__)
++#    define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
++#  else
++#    define ZSTDERRORLIB_HIDDEN
++#  endif
++#endif
++
++#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
+ 
+ /*-*********************************************
+  *  Error codes list
+@@ -43,14 +53,17 @@ typedef enum {
+   ZSTD_error_frameParameter_windowTooLarge = 16,
+   ZSTD_error_corruption_detected = 20,
+   ZSTD_error_checksum_wrong      = 22,
++  ZSTD_error_literals_headerWrong = 24,
+   ZSTD_error_dictionary_corrupted      = 30,
+   ZSTD_error_dictionary_wrong          = 32,
+   ZSTD_error_dictionaryCreation_failed = 34,
+   ZSTD_error_parameter_unsupported   = 40,
++  ZSTD_error_parameter_combination_unsupported = 41,
+   ZSTD_error_parameter_outOfBound    = 42,
+   ZSTD_error_tableLog_tooLarge       = 44,
+   ZSTD_error_maxSymbolValue_tooLarge = 46,
+   ZSTD_error_maxSymbolValue_tooSmall = 48,
++  ZSTD_error_stabilityCondition_notRespected = 50,
+   ZSTD_error_stage_wrong       = 60,
+   ZSTD_error_init_missing      = 62,
+   ZSTD_error_memory_allocation = 64,
+@@ -58,11 +71,15 @@ typedef enum {
+   ZSTD_error_dstSize_tooSmall = 70,
+   ZSTD_error_srcSize_wrong    = 72,
+   ZSTD_error_dstBuffer_null   = 74,
++  ZSTD_error_noForwardProgress_destFull = 80,
++  ZSTD_error_noForwardProgress_inputEmpty = 82,
+   /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+   ZSTD_error_frameIndex_tooLarge = 100,
+   ZSTD_error_seekableIO          = 102,
+   ZSTD_error_dstBuffer_wrong     = 104,
+   ZSTD_error_srcBuffer_wrong     = 105,
++  ZSTD_error_sequenceProducer_failed = 106,
++  ZSTD_error_externalSequences_invalid = 107,
+   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+ } ZSTD_ErrorCode;
+ 
+diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
+index 79d55465d5c1..6320fedcf8a4 100644
+--- a/include/linux/zstd_lib.h
++++ b/include/linux/zstd_lib.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -11,23 +12,42 @@
+ #ifndef ZSTD_H_235446
+ #define ZSTD_H_235446
+ 
+-/* ======   Dependency   ======*/
++/* ======   Dependencies   ======*/
+ #include <linux/limits.h>   /* INT_MAX */
+ #include <linux/types.h>   /* size_t */
+ 
+ 
+ /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+-#ifndef ZSTDLIB_VISIBLE
++#define ZSTDLIB_VISIBLE 
++
++#ifndef ZSTDLIB_HIDDEN
+ #  if (__GNUC__ >= 4) && !defined(__MINGW32__)
+-#    define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
+ #    define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
+ #  else
+-#    define ZSTDLIB_VISIBLE
+ #    define ZSTDLIB_HIDDEN
+ #  endif
+ #endif
++
+ #define ZSTDLIB_API ZSTDLIB_VISIBLE
+ 
++/* Deprecation warnings :
++ * Should these warnings be a problem, it is generally possible to disable them,
++ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
++ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
++ */
++#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
++#  define ZSTD_DEPRECATED(message) /* disable deprecation warnings */
++#else
++#  if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
++#    define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
++#  elif (__GNUC__ >= 3)
++#    define ZSTD_DEPRECATED(message) __attribute__((deprecated))
++#  else
++#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
++#    define ZSTD_DEPRECATED(message)
++#  endif
++#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
++
+ 
+ /* *****************************************************************************
+   Introduction
+@@ -65,7 +85,7 @@
+ /*------   Version   ------*/
+ #define ZSTD_VERSION_MAJOR    1
+ #define ZSTD_VERSION_MINOR    5
+-#define ZSTD_VERSION_RELEASE  2
++#define ZSTD_VERSION_RELEASE  6
+ #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ 
+ /*! ZSTD_versionNumber() :
+@@ -107,7 +127,8 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
+ ***************************************/
+ /*! ZSTD_compress() :
+  *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
++ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
++ *        enough space to successfully compress the data.
+  *  @return : compressed size written into `dst` (<= `dstCapacity),
+  *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+@@ -156,7 +177,9 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
+  *  "empty", "unknown" and "error" results to the same return value (0),
+  *  while ZSTD_getFrameContentSize() gives them separate return values.
+  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+-ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
++ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
++ZSTDLIB_API
++unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+ 
+ /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
+  * `src` should point to the start of a ZSTD frame or skippable frame.
+@@ -168,8 +191,30 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+ 
+ 
+ /*======  Helper functions  ======*/
+-#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+-ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
++/* ZSTD_compressBound() :
++ * maximum compressed size in worst case single-pass scenario.
++ * When invoking `ZSTD_compress()` or any other one-pass compression function,
++ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
++ * as it eliminates one potential failure scenario,
++ * aka not enough room in dst buffer to write the compressed frame.
++ * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
++ *        In which case, ZSTD_compressBound() will return an error code
++ *        which can be tested using ZSTD_isError().
++ *
++ * ZSTD_COMPRESSBOUND() :
++ * same as ZSTD_compressBound(), but as a macro.
++ * It can be used to produce constants, which can be useful for static allocation,
++ * for example to size a static array on stack.
++ * Will produce constant value 0 if srcSize too large.
++ */
++#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
++#define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
++ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
++/* ZSTD_isError() :
++ * Most ZSTD_* functions returning a size_t value can be tested for error,
++ * using ZSTD_isError().
++ * @return 1 if error, 0 otherwise
++ */
+ ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
+@@ -183,7 +228,7 @@ ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compres
+ /*= Compression context
+  *  When compressing many times,
+  *  it is recommended to allocate a context just once,
+- *  and re-use it for each successive compression operation.
++ *  and reuse it for each successive compression operation.
+  *  This will make workload friendlier for system's memory.
+  *  Note : re-using context is just a speed / resource optimization.
+  *         It doesn't change the compression ratio, which remains identical.
+@@ -196,9 +241,9 @@ ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer *
+ 
+ /*! ZSTD_compressCCtx() :
+  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+- *  Important : in order to behave similarly to `ZSTD_compress()`,
+- *  this function compresses at requested compression level,
+- *  __ignoring any other parameter__ .
++ *  Important : in order to mirror `ZSTD_compress()` behavior,
++ *  this function compresses at the requested compression level,
++ *  __ignoring any other advanced parameter__ .
+  *  If any advanced parameter was set using the advanced API,
+  *  they will all be reset. Only `compressionLevel` remains.
+  */
+@@ -210,7 +255,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+ /*= Decompression context
+  *  When decompressing many times,
+  *  it is recommended to allocate a context only once,
+- *  and re-use it for each successive compression operation.
++ *  and reuse it for each successive compression operation.
+  *  This will make workload friendlier for system's memory.
+  *  Use one context per thread for parallel execution. */
+ typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+@@ -220,7 +265,7 @@ ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer *
+ /*! ZSTD_decompressDCtx() :
+  *  Same as ZSTD_decompress(),
+  *  requires an allocated ZSTD_DCtx.
+- *  Compatible with sticky parameters.
++ *  Compatible with sticky parameters (see below).
+  */
+ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+@@ -236,12 +281,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+  *   using ZSTD_CCtx_set*() functions.
+  *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+  *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+- *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
++ *   __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
+  *
+  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+  *
+  *   This API supersedes all other "advanced" API entry points in the experimental section.
+- *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
++ *   In the future, we expect to remove API entry points from experimental which are redundant with this API.
+  */
+ 
+ 
+@@ -324,6 +369,19 @@ typedef enum {
+                               * The higher the value of selected strategy, the more complex it is,
+                               * resulting in stronger and slower compression.
+                               * Special: value 0 means "use default strategy". */
++
++    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
++                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
++                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
++                                  * Note that it's not a guarantee, just a convergence target (default:0).
++                                  * No target when targetCBlockSize == 0.
++                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
++                                  * when a client can make use of partial documents (a prominent example being Chrome).
++                                  * Note: this parameter is stable since v1.5.6.
++                                  * It was present as an experimental parameter in earlier versions,
++                                  * but it's not recommended using it with earlier library versions
++                                  * due to massive performance regressions.
++                                  */
+     /* LDM mode parameters */
+     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                      * This parameter is designed to improve compression ratio
+@@ -403,7 +461,6 @@ typedef enum {
+      * ZSTD_c_forceMaxWindow
+      * ZSTD_c_forceAttachDict
+      * ZSTD_c_literalCompressionMode
+-     * ZSTD_c_targetCBlockSize
+      * ZSTD_c_srcSizeHint
+      * ZSTD_c_enableDedicatedDictSearch
+      * ZSTD_c_stableInBuffer
+@@ -412,6 +469,9 @@ typedef enum {
+      * ZSTD_c_validateSequences
+      * ZSTD_c_useBlockSplitter
+      * ZSTD_c_useRowMatchFinder
++     * ZSTD_c_prefetchCDictTables
++     * ZSTD_c_enableSeqProducerFallback
++     * ZSTD_c_maxBlockSize
+      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+      * note : never ever use experimentalParam? names directly;
+      *        also, the enums values themselves are unstable and can still change.
+@@ -421,7 +481,7 @@ typedef enum {
+      ZSTD_c_experimentalParam3=1000,
+      ZSTD_c_experimentalParam4=1001,
+      ZSTD_c_experimentalParam5=1002,
+-     ZSTD_c_experimentalParam6=1003,
++     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
+      ZSTD_c_experimentalParam7=1004,
+      ZSTD_c_experimentalParam8=1005,
+      ZSTD_c_experimentalParam9=1006,
+@@ -430,7 +490,11 @@ typedef enum {
+      ZSTD_c_experimentalParam12=1009,
+      ZSTD_c_experimentalParam13=1010,
+      ZSTD_c_experimentalParam14=1011,
+-     ZSTD_c_experimentalParam15=1012
++     ZSTD_c_experimentalParam15=1012,
++     ZSTD_c_experimentalParam16=1013,
++     ZSTD_c_experimentalParam17=1014,
++     ZSTD_c_experimentalParam18=1015,
++     ZSTD_c_experimentalParam19=1016
+ } ZSTD_cParameter;
+ 
+ typedef struct {
+@@ -493,7 +557,7 @@ typedef enum {
+  *                  They will be used to compress next frame.
+  *                  Resetting session never fails.
+  *  - The parameters : changes all parameters back to "default".
+- *                  This removes any reference to any dictionary too.
++ *                  This also removes any reference to any dictionary or external sequence producer.
+  *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+  *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+  *  - Both : similar to resetting the session, followed by resetting parameters.
+@@ -502,11 +566,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+ 
+ /*! ZSTD_compress2() :
+  *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
++ *  (note that this entry point doesn't even expose a compression level parameter).
+  *  ZSTD_compress2() always starts a new frame.
+  *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+  *  - The function is always blocking, returns when compression is completed.
+- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
++ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
++ *        enough space to successfully compress the data, though it is possible it fails for other reasons.
+  * @return : compressed size written into `dst` (<= `dstCapacity),
+  *           or an error code if it fails (which can be tested using ZSTD_isError()).
+  */
+@@ -543,13 +609,17 @@ typedef enum {
+      * ZSTD_d_stableOutBuffer
+      * ZSTD_d_forceIgnoreChecksum
+      * ZSTD_d_refMultipleDDicts
++     * ZSTD_d_disableHuffmanAssembly
++     * ZSTD_d_maxBlockSize
+      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+      * note : never ever use experimentalParam? names directly
+      */
+      ZSTD_d_experimentalParam1=1000,
+      ZSTD_d_experimentalParam2=1001,
+      ZSTD_d_experimentalParam3=1002,
+-     ZSTD_d_experimentalParam4=1003
++     ZSTD_d_experimentalParam4=1003,
++     ZSTD_d_experimentalParam5=1004,
++     ZSTD_d_experimentalParam6=1005
+ 
+ } ZSTD_dParameter;
+ 
+@@ -604,14 +674,14 @@ typedef struct ZSTD_outBuffer_s {
+ *  A ZSTD_CStream object is required to track streaming operation.
+ *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+ *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+-*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
++*  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+ *
+ *  For parallel execution, use one separate ZSTD_CStream per thread.
+ *
+ *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+ *
+ *  Parameters are sticky : when starting a new compression on the same context,
+-*  it will re-use the same sticky parameters as previous compression session.
++*  it will reuse the same sticky parameters as previous compression session.
+ *  When in doubt, it's recommended to fully initialize the context before usage.
+ *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+ *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+@@ -700,6 +770,11 @@ typedef enum {
+  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+  *            Before starting a new compression job, or changing compression parameters,
+  *            it is required to fully flush internal buffers.
++ *  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
++ *          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
++ *          In order to be re-employed after an error, a state must be reset,
++ *          which can be done explicitly (ZSTD_CCtx_reset()),
++ *          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
+  */
+ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                          ZSTD_outBuffer* output,
+@@ -728,8 +803,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output
+  * This following is a legacy streaming API, available since v1.0+ .
+  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+  * It is redundant, but remains fully supported.
+- * Streaming in combination with advanced parameters and dictionary compression
+- * can only be used through the new API.
+  ******************************************************************************/
+ 
+ /*!
+@@ -738,6 +811,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output
+  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
++ *
++ * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API
++ * to compress with a dictionary.
+  */
+ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+ /*!
+@@ -758,7 +834,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+ *
+ *  A ZSTD_DStream object is required to track streaming operations.
+ *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+-*  ZSTD_DStream objects can be re-used multiple times.
++*  ZSTD_DStream objects can be reused multiple times.
+ *
+ *  Use ZSTD_initDStream() to start a new decompression operation.
+ * @return : recommended first input size
+@@ -788,13 +864,37 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer
+ 
+ /*===== Streaming decompression functions =====*/
+ 
+-/* This function is redundant with the advanced API and equivalent to:
++/*! ZSTD_initDStream() :
++ * Initialize/reset DStream state for new decompression operation.
++ * Call before new decompression operation using same DStream.
+  *
++ * Note : This function is redundant with the advanced API and equivalent to:
+  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+  *     ZSTD_DCtx_refDDict(zds, NULL);
+  */
+ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+ 
++/*! ZSTD_decompressStream() :
++ * Streaming decompression function.
++ * Call repetitively to consume full input updating it as necessary.
++ * Function will update both input and output `pos` fields exposing current state via these fields:
++ * - `input.pos < input.size`, some input remaining and caller should provide remaining input
++ *   on the next call.
++ * - `output.pos < output.size`, decoder finished and flushed all remaining buffers.
++ * - `output.pos == output.size`, potentially uncflushed data present in the internal buffers,
++ *   call ZSTD_decompressStream() again to flush remaining data to output.
++ * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
++ *
++ * @return : 0 when a frame is completely decoded and fully flushed,
++ *           or an error code, which can be tested using ZSTD_isError(),
++ *           or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
++ *
++ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
++ *       It's UB to invoke `ZSTD_decompressStream()` on such a state.
++ *       In order to re-use such a state, it must be first reset,
++ *       which can be done explicitly (`ZSTD_DCtx_reset()`),
++ *       or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
++ */
+ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+ 
+ ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+@@ -913,7 +1013,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+  *  If @return == 0, the dictID could not be decoded.
+  *  This could for one of the following reasons :
+  *  - The frame does not require a dictionary to be decoded (most common case).
+- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
++ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information.
+  *    Note : this use case also happens when using a non-conformant dictionary.
+  *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+  *  - This is not a Zstandard frame.
+@@ -925,9 +1025,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+  * Advanced dictionary and prefix API (Requires v1.4.0+)
+  *
+  * This API allows dictionaries to be used with ZSTD_compress2(),
+- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
+- * only reset with the context is reset with ZSTD_reset_parameters or
+- * ZSTD_reset_session_and_parameters. Prefixes are single-use.
++ * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
++ * Dictionaries are sticky, they remain valid when same context is reused,
++ * they only reset when the context is reset
++ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
++ * In contrast, Prefixes are single-use.
+  ******************************************************************************/
+ 
+ 
+@@ -937,8 +1039,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+  *           meaning "return to no-dictionary mode".
+- *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+- *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
++ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames,
++ *           until parameters are reset, a new dictionary is loaded, or the dictionary
++ *           is explicitly invalidated by loading a NULL dictionary.
+  *  Note 2 : Loading a dictionary involves building tables.
+  *           It's also a CPU consuming operation, with non-negligible impact on latency.
+  *           Tables are dependent on compression parameters, and for this reason,
+@@ -947,11 +1050,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+  *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+  *           In such a case, dictionary buffer must outlive its users.
+  *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+- *           to precisely select how dictionary content must be interpreted. */
++ *           to precisely select how dictionary content must be interpreted.
++ *  Note 5 : This method does not benefit from LDM (long distance mode).
++ *           If you want to employ LDM on some large dictionary content,
++ *           prefer employing ZSTD_CCtx_refPrefix() described below.
++ */
+ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+ 
+ /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
+- *  Reference a prepared dictionary, to be used for all next compressed frames.
++ *  Reference a prepared dictionary, to be used for all future compressed frames.
+  *  Note that compression parameters are enforced from within CDict,
+  *  and supersede any compression parameter previously set within CCtx.
+  *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+@@ -970,6 +1077,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+  *  Decompression will need same prefix to properly regenerate data.
+  *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+  *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
++ *  This method is compatible with LDM (long distance mode).
+  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+  *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+@@ -986,9 +1094,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                  const void* prefix, size_t prefixSize);
+ 
+ /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
+- *  Create an internal DDict from dict buffer,
+- *  to be used to decompress next frames.
+- *  The dictionary remains valid for all future frames, until explicitly invalidated.
++ *  Create an internal DDict from dict buffer, to be used to decompress all future frames.
++ *  The dictionary remains valid for all future frames, until explicitly invalidated, or
++ *  a new dictionary is loaded.
+  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+  *            meaning "return to no-dictionary mode".
+@@ -1012,9 +1120,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
+  *  The memory for the table is allocated on the first call to refDDict, and can be
+  *  freed with ZSTD_freeDCtx().
+  *
++ *  If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary
++ *  will be managed, and referencing a dictionary effectively "discards" any previous one.
++ *
+  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+- *  Note 1 : Currently, only one dictionary can be managed.
+- *           Referencing a new dictionary effectively "discards" any previous one.
+  *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+  *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+  */
+@@ -1071,24 +1180,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+ #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
+ #endif
+ 
+-/* Deprecation warnings :
+- * Should these warnings be a problem, it is generally possible to disable them,
+- * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+- * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+- */
+-#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+-#  define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API  /* disable deprecation warnings */
+-#else
+-#  if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message)))
+-#  elif (__GNUC__ >= 3)
+-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated))
+-#  else
+-#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API
+-#  endif
+-#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+-
+ /* **************************************************************************************
+  *   experimental API (static linking only)
+  ****************************************************************************************
+@@ -1123,6 +1214,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+ #define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+ #define ZSTD_STRATEGY_MIN        ZSTD_fast
+ #define ZSTD_STRATEGY_MAX        ZSTD_btultra2
++#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */
+ 
+ 
+ #define ZSTD_OVERLAPLOG_MIN       0
+@@ -1146,7 +1238,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+ #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+ 
+ /* Advanced parameter bounds */
+-#define ZSTD_TARGETCBLOCKSIZE_MIN   64
++#define ZSTD_TARGETCBLOCKSIZE_MIN   1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
+ #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+ #define ZSTD_SRCSIZEHINT_MIN        0
+ #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+@@ -1303,7 +1395,7 @@ typedef enum {
+ } ZSTD_paramSwitch_e;
+ 
+ /* *************************************
+-*  Frame size functions
++*  Frame header and size functions
+ ***************************************/
+ 
+ /*! ZSTD_findDecompressedSize() :
+@@ -1350,29 +1442,122 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size
+  *           or an error code (if srcSize is too small) */
+ ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+ 
++typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
++typedef struct {
++    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
++    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
++    unsigned blockSizeMax;
++    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
++    unsigned headerSize;
++    unsigned dictID;
++    unsigned checksumFlag;
++    unsigned _reserved1;
++    unsigned _reserved2;
++} ZSTD_frameHeader;
++
++/*! ZSTD_getFrameHeader() :
++ *  decode Frame Header, or requires larger `srcSize`.
++ * @return : 0, `zfhPtr` is correctly filled,
++ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
++ *           or an error code, which can be tested using ZSTD_isError() */
++ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
++/*! ZSTD_getFrameHeader_advanced() :
++ *  same as ZSTD_getFrameHeader(),
++ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
++ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
++
++/*! ZSTD_decompressionMargin() :
++ * Zstd supports in-place decompression, where the input and output buffers overlap.
++ * In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
++ * and the input buffer must be at the end of the output buffer.
++ *
++ *  _______________________ Output Buffer ________________________
++ * |                                                              |
++ * |                                        ____ Input Buffer ____|
++ * |                                       |                      |
++ * v                                       v                      v
++ * |---------------------------------------|-----------|----------|
++ * ^                                                   ^          ^
++ * |___________________ Output_Size ___________________|_ Margin _|
++ *
++ * NOTE: See also ZSTD_DECOMPRESSION_MARGIN().
++ * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or
++ * ZSTD_decompressDCtx().
++ * NOTE: This function supports multi-frame input.
++ *
++ * @param src The compressed frame(s)
++ * @param srcSize The size of the compressed frame(s)
++ * @returns The decompression margin or an error that can be checked with ZSTD_isError().
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize);
++
++/*! ZSTD_DECOMPRESS_MARGIN() :
++ * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from
++ * the compressed frame, compute it from the original size and the blockSizeLog.
++ * See ZSTD_decompressionMargin() for details.
++ *
++ * WARNING: This macro does not support multi-frame input, the input must be a single
++ * zstd frame. If you need that support use the function, or implement it yourself.
++ *
++ * @param originalSize The original uncompressed size of the data.
++ * @param blockSize    The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX).
++ *                     Unless you explicitly set the windowLog smaller than
++ *                     ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX.
++ */
++#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)(                                              \
++        ZSTD_FRAMEHEADERSIZE_MAX                                                              /* Frame header */ + \
++        4                                                                                         /* checksum */ + \
++        ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \
++        (blockSize)                                                                    /* One block of margin */   \
++    ))
++
+ typedef enum {
+   ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+   ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+ } ZSTD_sequenceFormat_e;
+ 
++/*! ZSTD_sequenceBound() :
++ * `srcSize` : size of the input buffer
++ *  @return : upper-bound for the number of sequences that can be generated
++ *            from a buffer of srcSize bytes
++ *
++ *  note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence).
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
++
+ /*! ZSTD_generateSequences() :
+- * Generate sequences using ZSTD_compress2, given a source buffer.
++ * WARNING: This function is meant for debugging and informational purposes ONLY!
++ * Its implementation is flawed, and it will be deleted in a future version.
++ * It is not guaranteed to succeed, as there are several cases where it will give
++ * up and fail. You should NOT use this function in production code.
++ *
++ * This function is deprecated, and will be removed in a future version.
++ *
++ * Generate sequences using ZSTD_compress2(), given a source buffer.
++ *
++ * @param zc The compression context to be used for ZSTD_compress2(). Set any
++ *           compression parameters you need on this context.
++ * @param outSeqs The output sequences buffer of size @p outSeqsSize
++ * @param outSeqsSize The size of the output sequences buffer.
++ *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
++ *                    of sequences that can be generated.
++ * @param src The source buffer to generate sequences from of size @p srcSize.
++ * @param srcSize The size of the source buffer.
+  *
+  * Each block will end with a dummy sequence
+  * with offset == 0, matchLength == 0, and litLength == length of last literals.
+  * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+  * simply acts as a block delimiter.
+  *
+- * zc can be used to insert custom compression params.
+- * This function invokes ZSTD_compress2
+- *
+- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+- * @return : number of sequences generated
++ * @returns The number of sequences generated, necessarily less than
++ *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
++ *          with ZSTD_isError().
+  */
+-
+-ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+-                                          size_t outSeqsSize, const void* src, size_t srcSize);
++ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
++ZSTDLIB_STATIC_API size_t
++ZSTD_generateSequences(ZSTD_CCtx* zc,
++                       ZSTD_Sequence* outSeqs, size_t outSeqsSize,
++                       const void* src, size_t srcSize);
+ 
+ /*! ZSTD_mergeBlockDelimiters() :
+  * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+@@ -1388,7 +1573,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* o
+ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+ 
+ /*! ZSTD_compressSequences() :
+- * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
++ * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
++ * @src contains the entire input (not just the literals).
++ * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
+  * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+  * The entire source is compressed into a single frame.
+  *
+@@ -1413,11 +1600,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
+  * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+  * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+  *         and cannot emit an RLE block that disagrees with the repcode history
+- * @return : final compressed size or a ZSTD error.
++ * @return : final compressed size, or a ZSTD error code.
+  */
+-ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+-                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+-                                  const void* src, size_t srcSize);
++ZSTDLIB_STATIC_API size_t
++ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
++                        const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
++                        const void* src, size_t srcSize);
+ 
+ 
+ /*! ZSTD_writeSkippableFrame() :
+@@ -1464,48 +1652,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
+ /*! ZSTD_estimate*() :
+  *  These functions make it possible to estimate memory usage
+  *  of a future {D,C}Ctx, before its creation.
++ *  This is useful in combination with ZSTD_initStatic(),
++ *  which makes it possible to employ a static buffer for ZSTD_CCtx* state.
+  *
+  *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+- *  for any compression level up to selected one.
+- *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+- *         does not include space for a window buffer.
+- *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
++ *  to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
++ *  associated with any compression level up to max specified one.
+  *  The estimate will assume the input may be arbitrarily large,
+  *  which is the worst case.
+  *
++ *  Note that the size estimation is specific for one-shot compression,
++ *  it is not valid for streaming (see ZSTD_estimateCStreamSize*())
++ *  nor other potential ways of using a ZSTD_CCtx* state.
++ *
+  *  When srcSize can be bound by a known and rather "small" value,
+- *  this fact can be used to provide a tighter estimation
+- *  because the CCtx compression context will need less memory.
+- *  This tighter estimation can be provided by more advanced functions
++ *  this knowledge can be used to provide a tighter budget estimation
++ *  because the ZSTD_CCtx* state will need less memory for small inputs.
++ *  This tighter estimation can be provided by employing more advanced functions
+  *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+  *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+  *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+  *
+- *  Note 2 : only single-threaded compression is supported.
++ *  Note : only single-threaded compression is supported.
+  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+  */
+-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
++ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
+ 
+ /*! ZSTD_estimateCStreamSize() :
+- *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+- *  It will also consider src size to be arbitrarily "large", which is worst case.
++ *  ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
++ *  using any compression level up to the max specified one.
++ *  It will also consider src size to be arbitrarily "large", which is a worst case scenario.
+  *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+  *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+  *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+  *  Note : CStream size estimation is only correct for single-threaded compression.
+- *  ZSTD_DStream memory budget depends on window Size.
++ *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
++ *  Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
++ *  Size estimates assume that no external sequence producer is registered.
++ *
++ *  ZSTD_DStream memory budget depends on frame's window Size.
+  *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+  *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
++ *  Any frame requesting a window size larger than max specified one will be rejected.
+  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+  *         an internal ?Dict will be created, which additional size is not estimated here.
+- *         In this case, get total size by adding ZSTD_estimate?DictSize */
+-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
++ *         In this case, get total size by adding ZSTD_estimate?DictSize
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
++ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
+ ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+ 
+ /*! ZSTD_estimate?DictSize() :
+@@ -1649,22 +1848,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+  *  This function never fails (wide contract) */
+ ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+ 
++/*! ZSTD_CCtx_setCParams() :
++ *  Set all parameters provided within @p cparams into the working @p cctx.
++ *  Note : if modifying parameters during compression (MT mode only),
++ *         note that changes to the .windowLog parameter will be ignored.
++ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
++ *         On failure, no parameters are updated.
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
++
++/*! ZSTD_CCtx_setFParams() :
++ *  Set all parameters provided within @p fparams into the working @p cctx.
++ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
++
++/*! ZSTD_CCtx_setParams() :
++ *  Set all parameters provided within @p params into the working @p cctx.
++ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
++ */
++ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
++
+ /*! ZSTD_compress_advanced() :
+  *  Note : this function is now DEPRECATED.
+  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+  *  This prototype will generate compilation warnings. */
+ ZSTD_DEPRECATED("use ZSTD_compress2")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+-                                          void* dst, size_t dstCapacity,
+-                                    const void* src, size_t srcSize,
+-                                    const void* dict,size_t dictSize,
+-                                          ZSTD_parameters params);
++                              void* dst, size_t dstCapacity,
++                        const void* src, size_t srcSize,
++                        const void* dict,size_t dictSize,
++                              ZSTD_parameters params);
+ 
+ /*! ZSTD_compress_usingCDict_advanced() :
+  *  Note : this function is now DEPRECATED.
+  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+  *  This prototype will generate compilation warnings. */
+ ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                               void* dst, size_t dstCapacity,
+                                         const void* src, size_t srcSize,
+@@ -1737,11 +1959,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
+  */
+ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+ 
+-/* Tries to fit compressed block size to be around targetCBlockSize.
+- * No target when targetCBlockSize == 0.
+- * There is no guarantee on compressed block size (default:0) */
+-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+-
+ /* User's best guess of source size.
+  * Hint is not valid when srcSizeHint == 0.
+  * There is no guarantee that hint is close to actual source size,
+@@ -1808,13 +2025,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
+  * Experimental parameter.
+  * Default is 0 == disabled. Set to 1 to enable.
+  *
+- * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+- * between calls, except for the modifications that zstd makes to pos (the
+- * caller must not modify pos). This is checked by the compressor, and
+- * compression will fail if it ever changes. This means the only flush
+- * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+- * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+- * MUST not be modified during compression or you will get data corruption.
++ * Tells the compressor that input data presented with ZSTD_inBuffer
++ * will ALWAYS be the same between calls.
++ * Technically, the @src pointer must never be changed,
++ * and the @pos field can only be updated by zstd.
++ * However, it's possible to increase the @size field,
++ * allowing scenarios where more data can be appended after compressions starts.
++ * These conditions are checked by the compressor,
++ * and compression will fail if they are not respected.
++ * Also, data in the ZSTD_inBuffer within the range [src, src + pos)
++ * MUST not be modified during compression or it will result in data corruption.
+  *
+  * When this flag is enabled zstd won't allocate an input window buffer,
+  * because the user guarantees it can reference the ZSTD_inBuffer until
+@@ -1822,18 +2042,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
+  * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+  * avoid the memcpy() from the input buffer to the input window buffer.
+  *
+- * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+- * That means this flag cannot be used with ZSTD_compressStream().
+- *
+  * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+  * this flag is ALWAYS memory safe, and will never access out-of-bounds
+- * memory. However, compression WILL fail if you violate the preconditions.
++ * memory. However, compression WILL fail if conditions are not respected.
+  *
+- * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+- * not be modified during compression or you will get data corruption. This
+- * is because zstd needs to reference data in the ZSTD_inBuffer to find
++ * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST
++ * not be modified during compression or it will result in data corruption.
++ * This is because zstd needs to reference data in the ZSTD_inBuffer to find
+  * matches. Normally zstd maintains its own window buffer for this purpose,
+- * but passing this flag tells zstd to use the user provided buffer.
++ * but passing this flag tells zstd to rely on user provided buffer instead.
+  */
+ #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+ 
+@@ -1878,7 +2095,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
+  * Without validation, providing a sequence that does not conform to the zstd spec will cause
+  * undefined behavior, and may produce a corrupted block.
+  *
+- * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
++ * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
+  * specifics regarding offset/matchlength requirements) then the function will bail out and
+  * return an error.
+  *
+@@ -1928,6 +2145,79 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
+  */
+ #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+ 
++/* ZSTD_c_prefetchCDictTables
++ * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
++ *
++ * In some situations, zstd uses CDict tables in-place rather than copying them
++ * into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
++ * In such situations, compression speed is seriously impacted when CDict tables are
++ * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
++ * when they are used in-place.
++ *
++ * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
++ * For sufficiently large inputs, zstd will by default memcpy() CDict tables
++ * into the working context, so there is no need to prefetch. This parameter is
++ * targeted at a middle range of input sizes, where a prefetch is cheap enough to be
++ * useful but memcpy() is too expensive. The exact range of input sizes where this
++ * makes sense is best determined by careful experimentation.
++ *
++ * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
++ * but in the future zstd may conditionally enable this feature via an auto-detection
++ * heuristic for cold CDicts.
++ * Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
++ */
++#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
++
++/* ZSTD_c_enableSeqProducerFallback
++ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
++ *
++ * Controls whether zstd will fall back to an internal sequence producer if an
++ * external sequence producer is registered and returns an error code. This fallback
++ * is block-by-block: the internal sequence producer will only be called for blocks
++ * where the external sequence producer returns an error code. Fallback parsing will
++ * follow any other cParam settings, such as compression level, the same as in a
++ * normal (fully-internal) compression operation.
++ *
++ * The user is strongly encouraged to read the full Block-Level Sequence Producer API
++ * documentation (below) before setting this parameter. */
++#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
++
++/* ZSTD_c_maxBlockSize
++ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
++ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
++ *
++ * This parameter can be used to set an upper bound on the blocksize
++ * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
++ * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
++ * compressBound() inaccurate). Only currently meant to be used for testing.
++ *
++ */
++#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
++
++/* ZSTD_c_searchForExternalRepcodes
++ * This parameter affects how zstd parses external sequences, such as sequences
++ * provided through the compressSequences() API or from an external block-level
++ * sequence producer.
++ *
++ * If set to ZSTD_ps_enable, the library will check for repeated offsets in
++ * external sequences, even if those repcodes are not explicitly indicated in
++ * the "rep" field. Note that this is the only way to exploit repcode matches
++ * while using compressSequences() or an external sequence producer, since zstd
++ * currently ignores the "rep" field of external sequences.
++ *
++ * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
++ * external sequences, regardless of whether the "rep" field has been set. This
++ * reduces sequence compression overhead by about 25% while sacrificing some
++ * compression ratio.
++ *
++ * The default value is ZSTD_ps_auto, for which the library will enable/disable
++ * based on compression level.
++ *
++ * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
++ * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
++ */
++#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
++
+ /*! ZSTD_CCtx_getParameter() :
+  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+  *  and store it into int* value.
+@@ -2084,7 +2374,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
+  * in the range [dst, dst + pos) MUST not be modified during decompression
+  * or you will get data corruption.
+  *
+- * When this flags is enabled zstd won't allocate an output buffer, because
++ * When this flag is enabled zstd won't allocate an output buffer, because
+  * it can write directly to the ZSTD_outBuffer, but it will still allocate
+  * an input buffer large enough to fit any compressed block. This will also
+  * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+@@ -2137,6 +2427,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
+  */
+ #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+ 
++/* ZSTD_d_disableHuffmanAssembly
++ * Set to 1 to disable the Huffman assembly implementation.
++ * The default value is 0, which allows zstd to use the Huffman assembly
++ * implementation if available.
++ *
++ * This parameter can be used to disable Huffman assembly at runtime.
++ * If you want to disable it at compile time you can define the macro
++ * ZSTD_DISABLE_ASM.
++ */
++#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
++
++/* ZSTD_d_maxBlockSize
++ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
++ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
++ *
++ * Forces the decompressor to reject blocks whose content size is
++ * larger than the configured maxBlockSize. When maxBlockSize is
++ * larger than the windowSize, the windowSize is used instead.
++ * This saves memory on the decoder when you know all blocks are small.
++ *
++ * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
++ *
++ * WARNING: This causes the decoder to reject otherwise valid frames
++ * that have block sizes larger than the configured maxBlockSize.
++ */
++#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
++
+ 
+ /*! ZSTD_DCtx_setFormat() :
+  *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+@@ -2145,6 +2462,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
+  *  such ZSTD_f_zstd1_magicless for example.
+  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+ 
+ /*! ZSTD_decompressStream_simpleArgs() :
+@@ -2181,6 +2499,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
+  * This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                          int compressionLevel,
+                          unsigned long long pledgedSrcSize);
+@@ -2198,17 +2517,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+  * This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                      const void* dict, size_t dictSize,
+                            int compressionLevel);
+ 
+ /*! ZSTD_initCStream_advanced() :
+- * This function is DEPRECATED, and is approximately equivalent to:
++ * This function is DEPRECATED, and is equivalent to:
+  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+- *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+- *     for ((param, value) : params) {
+- *         ZSTD_CCtx_setParameter(zcs, param, value);
+- *     }
++ *     ZSTD_CCtx_setParams(zcs, params);
+  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+  *
+@@ -2218,6 +2535,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+  * This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           ZSTD_parameters params,
+@@ -2232,15 +2550,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+  * This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+ 
+ /*! ZSTD_initCStream_usingCDict_advanced() :
+- *   This function is DEPRECATED, and is approximately equivalent to:
++ *   This function is DEPRECATED, and is equivalent to:
+  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+- *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+- *     for ((fParam, value) : fParams) {
+- *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+- *     }
++ *     ZSTD_CCtx_setFParams(zcs, fParams);
+  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+  *     ZSTD_CCtx_refCDict(zcs, cdict);
+  *
+@@ -2250,6 +2566,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+  * This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                const ZSTD_CDict* cdict,
+                                      ZSTD_frameParameters fParams,
+@@ -2264,7 +2581,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+  *       explicitly specified.
+  *
+  *  start a new frame, using same parameters from previous frame.
+- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
++ *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
+  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+@@ -2274,6 +2591,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+  *  This prototype will generate compilation warnings.
+  */
+ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+ 
+ 
+@@ -2319,8 +2637,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+  *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+  *
+  * note: no dictionary will be used if dict == NULL or dictSize < 8
+- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+  */
++ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions")
+ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ 
+ /*!
+@@ -2330,8 +2648,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const vo
+  *     ZSTD_DCtx_refDDict(zds, ddict);
+  *
+  * note : ddict is referenced, it must outlive decompression session
+- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+  */
++ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions")
+ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+ 
+ /*!
+@@ -2339,18 +2657,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z
+  *
+  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+  *
+- * re-use decompression parameters from previous init; saves dictionary loading
+- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
++ * reuse decompression parameters from previous init; saves dictionary loading
+  */
++ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
+ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+ 
+ 
++/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
++ *
++ * *** OVERVIEW ***
++ * The Block-Level Sequence Producer API allows users to provide their own custom
++ * sequence producer which libzstd invokes to process each block. The produced list
++ * of sequences (literals and matches) is then post-processed by libzstd to produce
++ * valid compressed blocks.
++ *
++ * This block-level offload API is a more granular complement of the existing
++ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
++ * an easier migration story for applications already integrated with libzstd: the
++ * user application continues to invoke the same compression functions
++ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
++ * from the specific advantages of the external sequence producer. For example,
++ * the sequence producer could be tuned to take advantage of known characteristics
++ * of the input, to offer better speed / ratio, or could leverage hardware
++ * acceleration not available within libzstd itself.
++ *
++ * See contrib/externalSequenceProducer for an example program employing the
++ * Block-Level Sequence Producer API.
++ *
++ * *** USAGE ***
++ * The user is responsible for implementing a function of type
++ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
++ * arguments to the user-provided function:
++ *
++ *   - sequenceProducerState: a pointer to a user-managed state for the sequence
++ *     producer.
++ *
++ *   - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
++ *     outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
++ *     backing outSeqs is managed by the CCtx.
++ *
++ *   - src, srcSize: an input buffer for the sequence producer to parse.
++ *     srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
++ *
++ *   - dict, dictSize: a history buffer, which may be empty, which the sequence
++ *     producer may reference as it parses the src buffer. Currently, zstd will
++ *     always pass dictSize == 0 into external sequence producers, but this will
++ *     change in the future.
++ *
++ *   - compressionLevel: a signed integer representing the zstd compression level
++ *     set by the user for the current operation. The sequence producer may choose
++ *     to use this information to change its compression strategy and speed/ratio
++ *     tradeoff. Note: the compression level does not reflect zstd parameters set
++ *     through the advanced API.
++ *
++ *   - windowSize: a size_t representing the maximum allowed offset for external
++ *     sequences. Note that sequence offsets are sometimes allowed to exceed the
++ *     windowSize if a dictionary is present, see doc/zstd_compression_format.md
++ *     for details.
++ *
++ * The user-provided function shall return a size_t representing the number of
++ * sequences written to outSeqs. This return value will be treated as an error
++ * code if it is greater than outSeqsCapacity. The return value must be non-zero
++ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
++ * for convenience, but any value greater than outSeqsCapacity will be treated as
++ * an error code.
++ *
++ * If the user-provided function does not return an error code, the sequences
++ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
++ * occur if the parse is not valid. A parse is defined to be valid if the
++ * following conditions hold:
++ *   - The sum of matchLengths and literalLengths must equal srcSize.
++ *   - All sequences in the parse, except for the final sequence, must have
++ *     matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
++ *     matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
++ *   - All offsets must respect the windowSize parameter as specified in
++ *     doc/zstd_compression_format.md.
++ *   - If the final sequence has matchLength == 0, it must also have offset == 0.
++ *
++ * zstd will only validate these conditions (and fail compression if they do not
++ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
++ * validation has a performance cost.
++ *
++ * If the user-provided function returns an error, zstd will either fall back
++ * to an internal sequence producer or fail the compression operation. The user can
++ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
++ * cParam. Fallback compression will follow any other cParam settings, such as
++ * compression level, the same as in a normal compression operation.
++ *
++ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
++ * function by calling
++ *         ZSTD_registerSequenceProducer(cctx,
++ *                                       sequenceProducerState,
++ *                                       sequenceProducer)
++ * This setting will persist until the next parameter reset of the CCtx.
++ *
++ * The sequenceProducerState must be initialized by the user before calling
++ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
++ * sequenceProducerState.
++ *
++ * *** LIMITATIONS ***
++ * This API is compatible with all zstd compression APIs which respect advanced parameters.
++ * However, there are three limitations:
++ *
++ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
++ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
++ * external sequence producer.
++ *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
++ *     cases (see its documentation for details). Users must explicitly set
++ *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
++ *     sequence producer is registered.
++ *   - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
++ *     whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
++ *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
++ *     Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
++ *
++ * Second, history buffers are not currently supported. Concretely, zstd will always pass
++ * dictSize == 0 to the external sequence producer (for now). This has two implications:
++ *   - Dictionaries are not currently supported. Compression will *not* fail if the user
++ *     references a dictionary, but the dictionary won't have any effect.
++ *   - Stream history is not currently supported. All advanced compression APIs, including
++ *     streaming APIs, work with external sequence producers, but each block is treated as
++ *     an independent chunk without history from previous blocks.
++ *
++ * Third, multi-threading within a single compression is not currently supported. In other words,
++ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
++ * Multi-threading across compressions is fine: simply create one CCtx per thread.
++ *
++ * Long-term, we plan to overcome all three limitations. There is no technical blocker to
++ * overcoming them. It is purely a question of engineering effort.
++ */
++
++#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
++
++typedef size_t (*ZSTD_sequenceProducer_F) (
++  void* sequenceProducerState,
++  ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
++  const void* src, size_t srcSize,
++  const void* dict, size_t dictSize,
++  int compressionLevel,
++  size_t windowSize
++);
++
++/*! ZSTD_registerSequenceProducer() :
++ * Instruct zstd to use a block-level external sequence producer function.
++ *
++ * The sequenceProducerState must be initialized by the caller, and the caller is
++ * responsible for managing its lifetime. This parameter is sticky across
++ * compressions. It will remain set until the user explicitly resets compression
++ * parameters.
++ *
++ * Sequence producer registration is considered to be an "advanced parameter",
++ * part of the "advanced API". This means it will only have an effect on compression
++ * APIs which respect advanced parameters, such as compress2() and compressStream2().
++ * Older compression APIs such as compressCCtx(), which predate the introduction of
++ * "advanced parameters", will ignore any external sequence producer setting.
++ *
++ * The sequence producer can be "cleared" by registering a NULL function pointer. This
++ * removes all limitations described above in the "LIMITATIONS" section of the API docs.
++ *
++ * The user is strongly encouraged to read the full API documentation (above) before
++ * calling this function. */
++ZSTDLIB_STATIC_API void
++ZSTD_registerSequenceProducer(
++  ZSTD_CCtx* cctx,
++  void* sequenceProducerState,
++  ZSTD_sequenceProducer_F sequenceProducer
++);
++
++/*! ZSTD_CCtxParams_registerSequenceProducer() :
++ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
++ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
++ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
++ *
++ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
++ * is required, then this function is for you. Otherwise, you probably don't need it.
++ *
++ * See tests/zstreamtest.c for example usage. */
++ZSTDLIB_STATIC_API void
++ZSTD_CCtxParams_registerSequenceProducer(
++  ZSTD_CCtx_params* params,
++  void* sequenceProducerState,
++  ZSTD_sequenceProducer_F sequenceProducer
++);
++
++
+ /* *******************************************************************
+-*  Buffer-less and synchronous inner streaming functions
++*  Buffer-less and synchronous inner streaming functions (DEPRECATED)
++*
++*  This API is deprecated, and will be removed in a future version.
++*  It allows streaming (de)compression with user allocated buffers.
++*  However, it is hard to use, and not as well tested as the rest of
++*  our API.
+ *
+-*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+-*  But it's also a complex one, with several restrictions, documented below.
+-*  Prefer normal streaming API for an easier experience.
++*  Please use the normal streaming API instead: ZSTD_compressStream2,
++*  and ZSTD_decompressStream.
++*  If there is functionality that you need, but it doesn't provide,
++*  please open an issue on our GitHub.
+ ********************************************************************* */
+ 
+ /*
+@@ -2358,11 +2860,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+ 
+   A ZSTD_CCtx object is required to track streaming operations.
+   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
++  ZSTD_CCtx object can be reused multiple times within successive compression operations.
+ 
+   Start by initializing a context.
+   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
+-  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+ 
+   Then, consume your input using ZSTD_compressContinue().
+   There are some important considerations to keep in mind when using this advanced function :
+@@ -2380,36 +2881,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+ 
+-  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
++  `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
+ */
+ 
+ /*=====   Buffer-less streaming compression functions  =====*/
++ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
++ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
++ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
+-ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+ 
++ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
++ZSTDLIB_STATIC_API
++size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
++
++ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
++ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ 
+ /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ ZSTD_DEPRECATED("use advanced API to access custom parameters")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ ZSTD_DEPRECATED("use advanced API to access custom parameters")
++ZSTDLIB_STATIC_API
+ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ /*
+   Buffer-less streaming decompression (synchronous mode)
+ 
+   A ZSTD_DCtx object is required to track streaming operations.
+   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+-  A ZSTD_DCtx object can be re-used multiple times.
++  A ZSTD_DCtx object can be reused multiple times.
+ 
+   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+   Data fragment must be large enough to ensure successful decoding.
+  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+-  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+-           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
++  result  : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
++           >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
+            errorCode, which can be tested using ZSTD_isError().
+ 
+   It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+@@ -2428,7 +2939,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
+ 
+   The most memory efficient way is to use a round buffer of sufficient size.
+   Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+-  which can @return an error code if required value is too large for current system (in 32-bits mode).
++  which can return an error code if required value is too large for current system (in 32-bits mode).
+   In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+   up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+   which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+@@ -2448,7 +2959,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
+   ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+   ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+ 
+- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
++  result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+   It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+   It can also be an error code, which can be tested with ZSTD_isError().
+ 
+@@ -2471,27 +2982,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
+ */
+ 
+ /*=====   Buffer-less streaming decompression functions  =====*/
+-typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+-typedef struct {
+-    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+-    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+-    unsigned blockSizeMax;
+-    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+-    unsigned headerSize;
+-    unsigned dictID;
+-    unsigned checksumFlag;
+-} ZSTD_frameHeader;
+ 
+-/*! ZSTD_getFrameHeader() :
+- *  decode Frame Header, or requires larger `srcSize`.
+- * @return : 0, `zfhPtr` is correctly filled,
+- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+- *           or an error code, which can be tested using ZSTD_isError() */
+-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
+-/*! ZSTD_getFrameHeader_advanced() :
+- *  same as ZSTD_getFrameHeader(),
+- *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+ 
+ ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+@@ -2502,6 +2993,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ 
+ /* misc */
++ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.")
+ ZSTDLIB_STATIC_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+ typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+@@ -2509,11 +3001,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+ 
+ 
+ 
+-/* ============================ */
+-/*       Block level API       */
+-/* ============================ */
++/* ========================================= */
++/*       Block level API (DEPRECATED)       */
++/* ========================================= */
+ 
+ /*!
++
++    This API is deprecated in favor of the regular compression API.
++    You can get the frame header down to 2 bytes by setting:
++      - ZSTD_c_format = ZSTD_f_zstd1_magicless
++      - ZSTD_c_contentSizeFlag = 0
++      - ZSTD_c_checksumFlag = 0
++      - ZSTD_c_dictIDFlag = 0
++
++    This API is not as well tested as our normal API, so we recommend not using it.
++    We will be removing it in a future version. If the normal API doesn't provide
++    the functionality you need, please open a GitHub issue.
++
+     Block functions produce and decode raw zstd blocks, without frame metadata.
+     Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+     But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+@@ -2524,7 +3028,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+     - It is necessary to init context before starting
+       + compression : any ZSTD_compressBegin*() variant, including with dictionary
+       + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+-      + copyCCtx() and copyDCtx() can be used too
+     - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+       + If input is larger than a block size, it's necessary to split input data into multiple blocks
+       + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+@@ -2541,11 +3044,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+ */
+ 
+ /*=====   Raw zstd block functions  =====*/
++ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
++ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
++ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
++ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+ 
+-
+ #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+ 
+diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
+index 20f08c644b71..464c410b2768 100644
+--- a/lib/zstd/Makefile
++++ b/lib/zstd/Makefile
+@@ -1,6 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ # ################################################################
+-# Copyright (c) Facebook, Inc.
++# Copyright (c) Meta Platforms, Inc. and affiliates.
+ # All rights reserved.
+ #
+ # This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/common/allocations.h b/lib/zstd/common/allocations.h
+new file mode 100644
+index 000000000000..16c3d08e8d1a
+--- /dev/null
++++ b/lib/zstd/common/allocations.h
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
++/*
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
++ * All rights reserved.
++ *
++ * This source code is licensed under both the BSD-style license (found in the
++ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
++ * in the COPYING file in the root directory of this source tree).
++ * You may select, at your option, one of the above-listed licenses.
++ */
++
++/* This file provides custom allocation primitives
++ */
++
++#define ZSTD_DEPS_NEED_MALLOC
++#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
++
++#include "compiler.h" /* MEM_STATIC */
++#define ZSTD_STATIC_LINKING_ONLY
++#include <linux/zstd.h> /* ZSTD_customMem */
++
++#ifndef ZSTD_ALLOCATIONS_H
++#define ZSTD_ALLOCATIONS_H
++
++/* custom memory allocation functions */
++
++MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
++{
++    if (customMem.customAlloc)
++        return customMem.customAlloc(customMem.opaque, size);
++    return ZSTD_malloc(size);
++}
++
++MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
++{
++    if (customMem.customAlloc) {
++        /* calloc implemented as malloc+memset;
++         * not as efficient as calloc, but next best guess for custom malloc */
++        void* const ptr = customMem.customAlloc(customMem.opaque, size);
++        ZSTD_memset(ptr, 0, size);
++        return ptr;
++    }
++    return ZSTD_calloc(1, size);
++}
++
++MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
++{
++    if (ptr!=NULL) {
++        if (customMem.customFree)
++            customMem.customFree(customMem.opaque, ptr);
++        else
++            ZSTD_free(ptr);
++    }
++}
++
++#endif /* ZSTD_ALLOCATIONS_H */
+diff --git a/lib/zstd/common/bits.h b/lib/zstd/common/bits.h
+new file mode 100644
+index 000000000000..aa3487ec4b6a
+--- /dev/null
++++ b/lib/zstd/common/bits.h
+@@ -0,0 +1,149 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
++/*
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
++ * All rights reserved.
++ *
++ * This source code is licensed under both the BSD-style license (found in the
++ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
++ * in the COPYING file in the root directory of this source tree).
++ * You may select, at your option, one of the above-listed licenses.
++ */
++
++#ifndef ZSTD_BITS_H
++#define ZSTD_BITS_H
++
++#include "mem.h"
++
++MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
++{
++    assert(val != 0);
++    {
++        static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
++                                                30, 22, 20, 15, 25, 17, 4, 8,
++                                                31, 27, 13, 23, 21, 19, 16, 7,
++                                                26, 12, 18, 6, 11, 5, 10, 9};
++        return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
++    }
++}
++
++MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
++{
++    assert(val != 0);
++#   if (__GNUC__ >= 4)
++        return (unsigned)__builtin_ctz(val);
++#   else
++        return ZSTD_countTrailingZeros32_fallback(val);
++#   endif
++}
++
++MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
++    assert(val != 0);
++    {
++        static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
++                                            11, 14, 16, 18, 22, 25, 3, 30,
++                                            8, 12, 20, 28, 15, 17, 24, 7,
++                                            19, 27, 23, 6, 26, 5, 4, 31};
++        val |= val >> 1;
++        val |= val >> 2;
++        val |= val >> 4;
++        val |= val >> 8;
++        val |= val >> 16;
++        return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
++    }
++}
++
++MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
++{
++    assert(val != 0);
++#   if (__GNUC__ >= 4)
++        return (unsigned)__builtin_clz(val);
++#   else
++        return ZSTD_countLeadingZeros32_fallback(val);
++#   endif
++}
++
++MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
++{
++    assert(val != 0);
++#   if (__GNUC__ >= 4) && defined(__LP64__)
++        return (unsigned)__builtin_ctzll(val);
++#   else
++        {
++            U32 mostSignificantWord = (U32)(val >> 32);
++            U32 leastSignificantWord = (U32)val;
++            if (leastSignificantWord == 0) {
++                return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
++            } else {
++                return ZSTD_countTrailingZeros32(leastSignificantWord);
++            }
++        }
++#   endif
++}
++
++MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
++{
++    assert(val != 0);
++#   if (__GNUC__ >= 4)
++        return (unsigned)(__builtin_clzll(val));
++#   else
++        {
++            U32 mostSignificantWord = (U32)(val >> 32);
++            U32 leastSignificantWord = (U32)val;
++            if (mostSignificantWord == 0) {
++                return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
++            } else {
++                return ZSTD_countLeadingZeros32(mostSignificantWord);
++            }
++        }
++#   endif
++}
++
++MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
++{
++    if (MEM_isLittleEndian()) {
++        if (MEM_64bits()) {
++            return ZSTD_countTrailingZeros64((U64)val) >> 3;
++        } else {
++            return ZSTD_countTrailingZeros32((U32)val) >> 3;
++        }
++    } else {  /* Big Endian CPU */
++        if (MEM_64bits()) {
++            return ZSTD_countLeadingZeros64((U64)val) >> 3;
++        } else {
++            return ZSTD_countLeadingZeros32((U32)val) >> 3;
++        }
++    }
++}
++
++MEM_STATIC unsigned ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
++{
++    assert(val != 0);
++    return 31 - ZSTD_countLeadingZeros32(val);
++}
++
++/* ZSTD_rotateRight_*():
++ * Rotates a bitfield to the right by "count" bits.
++ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
++ */
++MEM_STATIC
++U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
++    assert(count < 64);
++    count &= 0x3F; /* for fickle pattern recognition */
++    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
++}
++
++MEM_STATIC
++U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
++    assert(count < 32);
++    count &= 0x1F; /* for fickle pattern recognition */
++    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
++}
++
++MEM_STATIC
++U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
++    assert(count < 16);
++    count &= 0x0F; /* for fickle pattern recognition */
++    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
++}
++
++#endif /* ZSTD_BITS_H */
+diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
+index feef3a1b1d60..6a13f1f0f1e8 100644
+--- a/lib/zstd/common/bitstream.h
++++ b/lib/zstd/common/bitstream.h
+@@ -1,7 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /* ******************************************************************
+  * bitstream
+  * Part of FSE library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  * You can contact the author at :
+  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -27,6 +28,7 @@
+ #include "compiler.h"       /* UNLIKELY() */
+ #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
+ #include "error_private.h"  /* error codes and messages */
++#include "bits.h"           /* ZSTD_highbit32 */
+ 
+ 
+ /*=========================================
+@@ -79,19 +81,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+ /*-********************************************
+ *  bitStream decoding API (read backward)
+ **********************************************/
++typedef size_t BitContainerType;
+ typedef struct {
+-    size_t   bitContainer;
++    BitContainerType bitContainer;
+     unsigned bitsConsumed;
+     const char* ptr;
+     const char* start;
+     const char* limitPtr;
+ } BIT_DStream_t;
+ 
+-typedef enum { BIT_DStream_unfinished = 0,
+-               BIT_DStream_endOfBuffer = 1,
+-               BIT_DStream_completed = 2,
+-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
++typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
++               BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
++               BIT_DStream_completed = 2,   /* bitstream entirely consumed, bit-exact */
++               BIT_DStream_overflow = 3     /* user requested more bits than present in bitstream */
++    } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+ 
+ MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+ MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+@@ -101,7 +104,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+ 
+ /* Start by invoking BIT_initDStream().
+ *  A chunk of the bitStream is then stored into a local register.
+-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
++*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
+ *  You can then retrieve bitFields stored into the local register, **in reverse order**.
+ *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+ *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+@@ -122,33 +125,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+ /* faster, but works only if nbBits >= 1 */
+ 
+-
+-
+-/*-**************************************************************
+-*  Internal functions
+-****************************************************************/
+-MEM_STATIC unsigned BIT_highbit32 (U32 val)
+-{
+-    assert(val != 0);
+-    {
+-#   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+-        return __builtin_clz (val) ^ 31;
+-#   else   /* Software version */
+-        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+-                                                 11, 14, 16, 18, 22, 25,  3, 30,
+-                                                  8, 12, 20, 28, 15, 17, 24,  7,
+-                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+-        U32 v = val;
+-        v |= v >> 1;
+-        v |= v >> 2;
+-        v |= v >> 4;
+-        v |= v >> 8;
+-        v |= v >> 16;
+-        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+-#   endif
+-    }
+-}
+-
+ /*=====    Local Constants   =====*/
+ static const unsigned BIT_mask[] = {
+     0,          1,         3,         7,         0xF,       0x1F,
+@@ -178,6 +154,12 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+     return 0;
+ }
+ 
++FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
++{
++    assert(nbBits < BIT_MASK_SIZE);
++    return bitContainer & BIT_mask[nbBits];
++}
++
+ /*! BIT_addBits() :
+  *  can add up to 31 bits into `bitC`.
+  *  Note : does not check for register overflow ! */
+@@ -187,7 +169,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+     assert(nbBits < BIT_MASK_SIZE);
+     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+-    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
++    bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
+     bitC->bitPos += nbBits;
+ }
+ 
+@@ -266,35 +248,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
+         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+         bitD->bitContainer = MEM_readLEST(bitD->ptr);
+         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+-          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
++          bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+     } else {
+         bitD->ptr   = bitD->start;
+         bitD->bitContainer = *(const BYTE*)(bitD->start);
+         switch(srcSize)
+         {
+-        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
++        case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                 ZSTD_FALLTHROUGH;
+ 
+-        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
++        case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                 ZSTD_FALLTHROUGH;
+ 
+-        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
++        case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                 ZSTD_FALLTHROUGH;
+ 
+-        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
++        case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
+                 ZSTD_FALLTHROUGH;
+ 
+-        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
++        case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
+                 ZSTD_FALLTHROUGH;
+ 
+-        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
++        case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                 ZSTD_FALLTHROUGH;
+ 
+         default: break;
+         }
+         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+-            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
++            bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
+             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+         }
+         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+@@ -303,12 +285,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
+     return srcSize;
+ }
+ 
+-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
++FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
+ {
+     return bitContainer >> start;
+ }
+ 
+-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
++FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
+ {
+     U32 const regMask = sizeof(bitContainer)*8 - 1;
+     /* if start > regMask, bitstream is corrupted, and result is undefined */
+@@ -325,19 +307,13 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
+ #endif
+ }
+ 
+-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+-{
+-    assert(nbBits < BIT_MASK_SIZE);
+-    return bitContainer & BIT_mask[nbBits];
+-}
+-
+ /*! BIT_lookBits() :
+  *  Provides next n bits from local register.
+  *  local register is not modified.
+  *  On 32-bits, maxNbBits==24.
+  *  On 64-bits, maxNbBits==56.
+  * @return : value extracted */
+-MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
++FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+ {
+     /* arbitrate between double-shift and shift+mask */
+ #if 1
+@@ -360,7 +336,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+ }
+ 
+-MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
++FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+ {
+     bitD->bitsConsumed += nbBits;
+ }
+@@ -369,7 +345,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+  *  Read (consume) next n bits from local register and update.
+  *  Pay attention to not read more than nbBits contained into local register.
+  * @return : extracted value. */
+-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
++FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+ {
+     size_t const value = BIT_lookBits(bitD, nbBits);
+     BIT_skipBits(bitD, nbBits);
+@@ -377,7 +353,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
+ }
+ 
+ /*! BIT_readBitsFast() :
+- *  unsafe version; only works only if nbBits >= 1 */
++ *  unsafe version; only works if nbBits >= 1 */
+ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+ {
+     size_t const value = BIT_lookBitsFast(bitD, nbBits);
+@@ -386,6 +362,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+     return value;
+ }
+ 
++/*! BIT_reloadDStream_internal() :
++ *  Simple variant of BIT_reloadDStream(), with two conditions:
++ *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
++ *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
++ */
++MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
++{
++    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
++    bitD->ptr -= bitD->bitsConsumed >> 3;
++    assert(bitD->ptr >= bitD->start);
++    bitD->bitsConsumed &= 7;
++    bitD->bitContainer = MEM_readLEST(bitD->ptr);
++    return BIT_DStream_unfinished;
++}
++
+ /*! BIT_reloadDStreamFast() :
+  *  Similar to BIT_reloadDStream(), but with two differences:
+  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+@@ -396,31 +387,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+ {
+     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+         return BIT_DStream_overflow;
+-    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+-    bitD->ptr -= bitD->bitsConsumed >> 3;
+-    bitD->bitsConsumed &= 7;
+-    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+-    return BIT_DStream_unfinished;
++    return BIT_reloadDStream_internal(bitD);
+ }
+ 
+ /*! BIT_reloadDStream() :
+  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+- *  This function is safe, it guarantees it will not read beyond src buffer.
++ *  This function is safe, it guarantees it will not never beyond src buffer.
+  * @return : status of `BIT_DStream_t` internal register.
+  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
++FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+ {
+-    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
++    /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
++    if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
++        static const BitContainerType zeroFilled = 0;
++        bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
++        /* overflow detected, erroneous scenario or end of stream: no update */
+         return BIT_DStream_overflow;
++    }
++
++    assert(bitD->ptr >= bitD->start);
+ 
+     if (bitD->ptr >= bitD->limitPtr) {
+-        return BIT_reloadDStreamFast(bitD);
++        return BIT_reloadDStream_internal(bitD);
+     }
+     if (bitD->ptr == bitD->start) {
++        /* reached end of bitStream => no update */
+         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+         return BIT_DStream_completed;
+     }
+-    /* start < ptr < limitPtr */
++    /* start < ptr < limitPtr => cautious update */
+     {   U32 nbBytes = bitD->bitsConsumed >> 3;
+         BIT_DStream_status result = BIT_DStream_unfinished;
+         if (bitD->ptr - nbBytes < bitD->start) {
+diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
+index c42d39faf9bd..508ee25537bb 100644
+--- a/lib/zstd/common/compiler.h
++++ b/lib/zstd/common/compiler.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -11,6 +12,8 @@
+ #ifndef ZSTD_COMPILER_H
+ #define ZSTD_COMPILER_H
+ 
++#include <linux/types.h>
++
+ #include "portability_macros.h"
+ 
+ /*-*******************************************************
+@@ -41,12 +44,15 @@
+ */
+ #define WIN_CDECL
+ 
++/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
++#define UNUSED_ATTR __attribute__((unused))
++
+ /*
+  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+  * parameters. They must be inlined for the compiler to eliminate the constant
+  * branches.
+  */
+-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
++#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
+ /*
+  * HINT_INLINE is used to help the compiler generate better code. It is *not*
+  * used for "templates", so it can be tweaked based on the compilers
+@@ -61,11 +67,21 @@
+ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+ #  define HINT_INLINE static INLINE_KEYWORD
+ #else
+-#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
++#  define HINT_INLINE FORCE_INLINE_TEMPLATE
+ #endif
+ 
+-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+-#define UNUSED_ATTR __attribute__((unused))
++/* "soft" inline :
++ * The compiler is free to select if it's a good idea to inline or not.
++ * The main objective is to silence compiler warnings
++ * when a defined function in included but not used.
++ *
++ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
++ * Updating the prefix is probably preferable, but requires a fairly large codemod,
++ * since this name is used everywhere.
++ */
++#ifndef MEM_STATIC  /* already defined in Linux Kernel mem.h */
++#define MEM_STATIC static __inline UNUSED_ATTR
++#endif
+ 
+ /* force no inlining */
+ #define FORCE_NOINLINE static __attribute__((__noinline__))
+@@ -86,23 +102,24 @@
+ #  define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+ #  define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+ #elif defined(__aarch64__)
+-#  define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+-#  define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
++#  define PREFETCH_L1(ptr)  do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
++#  define PREFETCH_L2(ptr)  do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
+ #else
+-#  define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+-#  define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
++#  define PREFETCH_L1(ptr) do { (void)(ptr); } while (0)  /* disabled */
++#  define PREFETCH_L2(ptr) do { (void)(ptr); } while (0)  /* disabled */
+ #endif  /* NO_PREFETCH */
+ 
+ #define CACHELINE_SIZE 64
+ 
+-#define PREFETCH_AREA(p, s)  {            \
+-    const char* const _ptr = (const char*)(p);  \
+-    size_t const _size = (size_t)(s);     \
+-    size_t _pos;                          \
+-    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+-        PREFETCH_L2(_ptr + _pos);         \
+-    }                                     \
+-}
++#define PREFETCH_AREA(p, s)                              \
++    do {                                                 \
++        const char* const _ptr = (const char*)(p);       \
++        size_t const _size = (size_t)(s);                \
++        size_t _pos;                                     \
++        for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
++            PREFETCH_L2(_ptr + _pos);                    \
++        }                                                \
++    } while (0)
+ 
+ /* vectorization
+  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
+@@ -126,9 +143,9 @@
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
+ 
+ #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+-#  define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
++#  define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
+ #else
+-#  define ZSTD_UNREACHABLE { assert(0); }
++#  define ZSTD_UNREACHABLE do { assert(0); } while (0)
+ #endif
+ 
+ /* disable warnings */
+@@ -179,6 +196,85 @@
+ *  Sanitizer
+ *****************************************************************/
+ 
++/*
++ * Zstd relies on pointer overflow in its decompressor.
++ * We add this attribute to functions that rely on pointer overflow.
++ */
++#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++#  if __has_attribute(no_sanitize)
++#    if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
++       /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
++#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
++#    else
++       /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
++#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
++#    endif
++#  else
++#    define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++#  endif
++#endif
++
++/*
++ * Helper function to perform a wrapped pointer difference without trigging
++ * UBSAN.
++ *
++ * @returns lhs - rhs with wrapping
++ */
++MEM_STATIC
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
++{
++    return lhs - rhs;
++}
++
++/*
++ * Helper function to perform a wrapped pointer add without triggering UBSAN.
++ *
++ * @return ptr + add with wrapping
++ */
++MEM_STATIC
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
++{
++    return ptr + add;
++}
++
++/*
++ * Helper function to perform a wrapped pointer subtraction without triggering
++ * UBSAN.
++ *
++ * @return ptr - sub with wrapping
++ */
++MEM_STATIC
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
++{
++    return ptr - sub;
++}
++
++/*
++ * Helper function to add to a pointer that works around C's undefined behavior
++ * of adding 0 to NULL.
++ *
++ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
++ */
++MEM_STATIC
++unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
++{
++    return add > 0 ? ptr + add : ptr;
++}
++
++/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
++ * abundance of caution, disable our custom poisoning on mingw. */
++#ifdef __MINGW32__
++#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
++#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
++#endif
++#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
++#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
++#endif
++#endif
++
+ 
+ 
+ #endif /* ZSTD_COMPILER_H */
+diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
+index 0db7b42407ee..d8319a2bef4c 100644
+--- a/lib/zstd/common/cpu.h
++++ b/lib/zstd/common/cpu.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
+index bb863c9ea616..8eb6aa9a3b20 100644
+--- a/lib/zstd/common/debug.c
++++ b/lib/zstd/common/debug.c
+@@ -1,7 +1,8 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * debug
+  * Part of FSE library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  * You can contact the author at :
+  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -21,4 +22,10 @@
+ 
+ #include "debug.h"
+ 
++#if (DEBUGLEVEL>=2)
++/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
++ * translation unit is empty. So remove this from Linux kernel builds, but
++ * otherwise just leave it in.
++ */
+ int g_debuglevel = DEBUGLEVEL;
++#endif
+diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
+index 6dd88d1fbd02..226ba3c57ec3 100644
+--- a/lib/zstd/common/debug.h
++++ b/lib/zstd/common/debug.h
+@@ -1,7 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /* ******************************************************************
+  * debug
+  * Part of FSE library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  * You can contact the author at :
+  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -82,18 +83,27 @@ extern int g_debuglevel; /* the variable is only declared,
+                             It's useful when enabling very verbose levels
+                             on selective conditions (such as position in src) */
+ 
+-#  define RAWLOG(l, ...) {                                       \
+-                if (l<=g_debuglevel) {                           \
+-                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
+-            }   }
+-#  define DEBUGLOG(l, ...) {                                     \
+-                if (l<=g_debuglevel) {                           \
+-                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+-                    ZSTD_DEBUG_PRINT(" \n");                     \
+-            }   }
++#  define RAWLOG(l, ...)                   \
++    do {                                   \
++        if (l<=g_debuglevel) {             \
++            ZSTD_DEBUG_PRINT(__VA_ARGS__); \
++        }                                  \
++    } while (0)
++
++#define STRINGIFY(x) #x
++#define TOSTRING(x) STRINGIFY(x)
++#define LINE_AS_STRING TOSTRING(__LINE__)
++
++#  define DEBUGLOG(l, ...)                               \
++    do {                                                 \
++        if (l<=g_debuglevel) {                           \
++            ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
++            ZSTD_DEBUG_PRINT(" \n");                     \
++        }                                                \
++    } while (0)
+ #else
+-#  define RAWLOG(l, ...)      {}    /* disabled */
+-#  define DEBUGLOG(l, ...)    {}    /* disabled */
++#  define RAWLOG(l, ...)   do { } while (0)    /* disabled */
++#  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
+ #endif
+ 
+ 
+diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
+index fef67056f052..6cdd82233fb5 100644
+--- a/lib/zstd/common/entropy_common.c
++++ b/lib/zstd/common/entropy_common.c
+@@ -1,6 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * Common functions of New Generation Entropy library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -19,8 +20,8 @@
+ #include "error_private.h"       /* ERR_*, ERROR */
+ #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+ #include "fse.h"
+-#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+ #include "huf.h"
++#include "bits.h"                /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
+ 
+ 
+ /*===   Version   ===*/
+@@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+ /*-**************************************************************
+ *  FSE NCount encoding-decoding
+ ****************************************************************/
+-static U32 FSE_ctz(U32 val)
+-{
+-    assert(val != 0);
+-    {
+-#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+-        return __builtin_ctz(val);
+-#   else   /* Software version */
+-        U32 count = 0;
+-        while ((val & 1) == 0) {
+-            val >>= 1;
+-            ++count;
+-        }
+-        return count;
+-#   endif
+-    }
+-}
+-
+ FORCE_INLINE_TEMPLATE
+ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                            const void* headerBuffer, size_t hbSize)
+@@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
+              * repeat.
+              * Avoid UB by setting the high bit to 1.
+              */
+-            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
++            int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
+             while (repeats >= 12) {
+                 charnum += 3 * 12;
+                 if (LIKELY(ip <= iend-7)) {
+@@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
+                     ip = iend - 4;
+                 }
+                 bitStream = MEM_readLE32(ip) >> bitCount;
+-                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
++                repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
+             }
+             charnum += 3 * repeats;
+             bitStream >>= 2 * repeats;
+@@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
+                  * know that threshold > 1.
+                  */
+                 if (remaining <= 1) break;
+-                nbBits = BIT_highbit32(remaining) + 1;
++                nbBits = ZSTD_highbit32(remaining) + 1;
+                 threshold = 1 << (nbBits - 1);
+             }
+             if (charnum >= maxSV1) break;
+@@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                      const void* src, size_t srcSize)
+ {
+     U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+-    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
++    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
+ }
+ 
+ FORCE_INLINE_TEMPLATE size_t
+@@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+     if (weightTotal == 0) return ERROR(corruption_detected);
+ 
+     /* get last non-null symbol weight (implied, total must be 2^n) */
+-    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
++    {   U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
+         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+         *tableLogPtr = tableLog;
+         /* determine last weight */
+         {   U32 const total = 1 << tableLog;
+             U32 const rest = total - weightTotal;
+-            U32 const verif = 1 << BIT_highbit32(rest);
+-            U32 const lastWeight = BIT_highbit32(rest) + 1;
++            U32 const verif = 1 << ZSTD_highbit32(rest);
++            U32 const lastWeight = ZSTD_highbit32(rest) + 1;
+             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+             huffWeight[oSize] = (BYTE)lastWeight;
+             rankStats[lastWeight]++;
+@@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                      U32* nbSymbolsPtr, U32* tableLogPtr,
+                      const void* src, size_t srcSize,
+                      void* workSpace, size_t wkspSize,
+-                     int bmi2)
++                     int flags)
+ {
+ #if DYNAMIC_BMI2
+-    if (bmi2) {
++    if (flags & HUF_flags_bmi2) {
+         return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+     }
+ #endif
+-    (void)bmi2;
++    (void)flags;
+     return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+ }
+diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
+index 6d1135f8c373..a4062d30d170 100644
+--- a/lib/zstd/common/error_private.c
++++ b/lib/zstd/common/error_private.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum code)
+     case PREFIX(version_unsupported): return "Version not supported";
+     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+     case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+-    case PREFIX(corruption_detected): return "Corrupted block detected";
++    case PREFIX(corruption_detected): return "Data corruption detected";
+     case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
++    case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
+     case PREFIX(parameter_unsupported): return "Unsupported parameter";
++    case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
+     case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+     case PREFIX(init_missing): return "Context should be init first";
+     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+@@ -38,17 +41,22 @@ const char* ERR_getErrorString(ERR_enum code)
+     case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+     case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
++    case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
+     case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+     case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+     case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+     case PREFIX(srcSize_wrong): return "Src size is incorrect";
+     case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
++    case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
++    case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
+         /* following error codes are not stable and may be removed or changed in a future version */
+     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+     case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
++    case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
++    case PREFIX(externalSequences_invalid): return "External sequences are not valid";
+     case PREFIX(maxCode):
+     default: return notErrorCode;
+     }
+diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
+index ca5101e542fa..0410ca415b54 100644
+--- a/lib/zstd/common/error_private.h
++++ b/lib/zstd/common/error_private.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -49,8 +50,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+ ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+ 
+ /* check and forward error code */
+-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
++#define CHECK_V_F(e, f)     \
++    size_t const e = f;     \
++    do {                    \
++        if (ERR_isError(e)) \
++            return e;       \
++    } while (0)
++#define CHECK_F(f)   do { CHECK_V_F(_var_err__, f); } while (0)
+ 
+ 
+ /*-****************************************
+@@ -84,10 +90,12 @@ void _force_has_format_string(const char *format, ...) {
+  * We want to force this function invocation to be syntactically correct, but
+  * we don't want to force runtime evaluation of its arguments.
+  */
+-#define _FORCE_HAS_FORMAT_STRING(...) \
+-  if (0) { \
+-    _force_has_format_string(__VA_ARGS__); \
+-  }
++#define _FORCE_HAS_FORMAT_STRING(...)              \
++    do {                                           \
++        if (0) {                                   \
++            _force_has_format_string(__VA_ARGS__); \
++        }                                          \
++    } while (0)
+ 
+ #define ERR_QUOTE(str) #str
+ 
+@@ -98,48 +106,50 @@ void _force_has_format_string(const char *format, ...) {
+  * In order to do that (particularly, printing the conditional that failed),
+  * this can't just wrap RETURN_ERROR().
+  */
+-#define RETURN_ERROR_IF(cond, err, ...) \
+-  if (cond) { \
+-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+-           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+-    RAWLOG(3, ": " __VA_ARGS__); \
+-    RAWLOG(3, "\n"); \
+-    return ERROR(err); \
+-  }
++#define RETURN_ERROR_IF(cond, err, ...)                                        \
++    do {                                                                       \
++        if (cond) {                                                            \
++            RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s",          \
++                  __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
++            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                             \
++            RAWLOG(3, ": " __VA_ARGS__);                                       \
++            RAWLOG(3, "\n");                                                   \
++            return ERROR(err);                                                 \
++        }                                                                      \
++    } while (0)
+ 
+ /*
+  * Unconditionally return the specified error.
+  *
+  * In debug modes, prints additional information.
+  */
+-#define RETURN_ERROR(err, ...) \
+-  do { \
+-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+-           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
+-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+-    RAWLOG(3, ": " __VA_ARGS__); \
+-    RAWLOG(3, "\n"); \
+-    return ERROR(err); \
+-  } while(0);
++#define RETURN_ERROR(err, ...)                                               \
++    do {                                                                     \
++        RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
++              __FILE__, __LINE__, ERR_QUOTE(ERROR(err)));                    \
++        _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                               \
++        RAWLOG(3, ": " __VA_ARGS__);                                         \
++        RAWLOG(3, "\n");                                                     \
++        return ERROR(err);                                                   \
++    } while(0)
+ 
+ /*
+  * If the provided expression evaluates to an error code, returns that error code.
+  *
+  * In debug modes, prints additional information.
+  */
+-#define FORWARD_IF_ERROR(err, ...) \
+-  do { \
+-    size_t const err_code = (err); \
+-    if (ERR_isError(err_code)) { \
+-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+-             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+-      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+-      RAWLOG(3, ": " __VA_ARGS__); \
+-      RAWLOG(3, "\n"); \
+-      return err_code; \
+-    } \
+-  } while(0);
++#define FORWARD_IF_ERROR(err, ...)                                                 \
++    do {                                                                           \
++        size_t const err_code = (err);                                             \
++        if (ERR_isError(err_code)) {                                               \
++            RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s",                 \
++                  __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
++            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                                 \
++            RAWLOG(3, ": " __VA_ARGS__);                                           \
++            RAWLOG(3, "\n");                                                       \
++            return err_code;                                                       \
++        }                                                                          \
++    } while(0)
+ 
+ 
+ #endif /* ERROR_H_MODULE */
+diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
+index 4507043b2287..2185a578617d 100644
+--- a/lib/zstd/common/fse.h
++++ b/lib/zstd/common/fse.h
+@@ -1,7 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /* ******************************************************************
+  * FSE : Finite State Entropy codec
+  * Public Prototypes declaration
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  * You can contact the author at :
+  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -50,34 +51,6 @@
+ FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /*< library version number; to be used when checking dll version */
+ 
+ 
+-/*-****************************************
+-*  FSE simple functions
+-******************************************/
+-/*! FSE_compress() :
+-    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+-    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+-    @return : size of compressed data (<= dstCapacity).
+-    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+-                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+-*/
+-FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+-                             const void* src, size_t srcSize);
+-
+-/*! FSE_decompress():
+-    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+-    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+-    @return : size of regenerated data (<= maxDstSize),
+-              or an error code, which can be tested using FSE_isError() .
+-
+-    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+-    Why ? : making this distinction requires a header.
+-    Header management is intentionally delegated to the user layer, which can better manage special cases.
+-*/
+-FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+-                               const void* cSrc, size_t cSrcSize);
+-
+-
+ /*-*****************************************
+ *  Tool functions
+ ******************************************/
+@@ -88,20 +61,6 @@ FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return
+ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+ 
+ 
+-/*-*****************************************
+-*  FSE advanced functions
+-******************************************/
+-/*! FSE_compress2() :
+-    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+-    Both parameters can be defined as '0' to mean : use default value
+-    @return : size of compressed data
+-    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+-                     if FSE_isError(return), it's an error code.
+-*/
+-FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+-
+-
+ /*-*****************************************
+ *  FSE detailed API
+ ******************************************/
+@@ -161,8 +120,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+ /*! Constructor and Destructor of FSE_CTable.
+     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+ typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+-FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+-FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+ 
+ /*! FSE_buildCTable():
+     Builds `ct`, which must be already allocated, using FSE_createCTable().
+@@ -238,23 +195,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                            const void* rBuffer, size_t rBuffSize, int bmi2);
+ 
+-/*! Constructor and Destructor of FSE_DTable.
+-    Note that its size depends on 'tableLog' */
+ typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+-FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+-FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+-
+-/*! FSE_buildDTable():
+-    Builds 'dt', which must be already allocated, using FSE_createDTable().
+-    return : 0, or an errorCode, which can be tested using FSE_isError() */
+-FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+-
+-/*! FSE_decompress_usingDTable():
+-    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+-    into `dst` which must be already allocated.
+-    @return : size of regenerated data (necessarily <= `dstCapacity`),
+-              or an errorCode, which can be tested using FSE_isError() */
+-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+ 
+ /*!
+ Tutorial :
+@@ -286,6 +227,7 @@ If there is an error, the function will return an error code, which can be teste
+ 
+ #endif  /* FSE_H */
+ 
++
+ #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+ #define FSE_H_FSE_STATIC_LINKING_ONLY
+ 
+@@ -317,16 +259,6 @@ If there is an error, the function will return an error code, which can be teste
+ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+ /*< same as FSE_optimalTableLog(), which used `minus==2` */
+ 
+-/* FSE_compress_wksp() :
+- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+- * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+- */
+-#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+-size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+-
+-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+-/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+-
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+ /*< build a fake FSE_CTable, designed to compress always the same symbolValue */
+ 
+@@ -344,19 +276,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+ /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+ 
+-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+-/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+-
+-size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+-/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
+-
+-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
++#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+-/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+-
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+-/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
++/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
++ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
+ 
+ typedef enum {
+    FSE_repeat_none,  /*< Cannot use the previous table */
+@@ -539,20 +463,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
+     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+     const U16* const stateTable = (const U16*)(statePtr->stateTable);
+     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+-    BIT_addBits(bitC, statePtr->value, nbBitsOut);
++    BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
+     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+ }
+ 
+ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+ {
+-    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
++    BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
+     BIT_flushBits(bitC);
+ }
+ 
+ 
+ /* FSE_getMaxNbBits() :
+  * Approximate maximum cost of a symbol, in bits.
+- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
++ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
+index 8dcb8ca39767..3a17e84f27bf 100644
+--- a/lib/zstd/common/fse_decompress.c
++++ b/lib/zstd/common/fse_decompress.c
+@@ -1,6 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * FSE : Finite State Entropy decoder
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -22,8 +23,8 @@
+ #define FSE_STATIC_LINKING_ONLY
+ #include "fse.h"
+ #include "error_private.h"
+-#define ZSTD_DEPS_NEED_MALLOC
+-#include "zstd_deps.h"
++#include "zstd_deps.h"  /* ZSTD_memcpy */
++#include "bits.h"       /* ZSTD_highbit32 */
+ 
+ 
+ /* **************************************************************
+@@ -55,19 +56,6 @@
+ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+ #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+ 
+-
+-/* Function templates */
+-FSE_DTable* FSE_createDTable (unsigned tableLog)
+-{
+-    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+-    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+-}
+-
+-void FSE_freeDTable (FSE_DTable* dt)
+-{
+-    ZSTD_free(dt);
+-}
+-
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+ {
+     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+@@ -96,7 +84,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
+                     symbolNext[s] = 1;
+                 } else {
+                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+-                    symbolNext[s] = normalizedCounter[s];
++                    symbolNext[s] = (U16)normalizedCounter[s];
+         }   }   }
+         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+     }
+@@ -111,8 +99,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
+          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+          * our buffer to handle the over-write.
+          */
+-        {
+-            U64 const add = 0x0101010101010101ull;
++        {   U64 const add = 0x0101010101010101ull;
+             size_t pos = 0;
+             U64 sv = 0;
+             U32 s;
+@@ -123,14 +110,13 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
+                 for (i = 8; i < n; i += 8) {
+                     MEM_write64(spread + pos + i, sv);
+                 }
+-                pos += n;
+-            }
+-        }
++                pos += (size_t)n;
++        }   }
+         /* Now we spread those positions across the table.
+-         * The benefit of doing it in two stages is that we avoid the the
++         * The benefit of doing it in two stages is that we avoid the
+          * variable size inner loop, which caused lots of branch misses.
+          * Now we can run through all the positions without any branch misses.
+-         * We unroll the loop twice, since that is what emperically worked best.
++         * We unroll the loop twice, since that is what empirically worked best.
+          */
+         {
+             size_t position = 0;
+@@ -166,7 +152,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
+         for (u=0; u<tableSize; u++) {
+             FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+             U32 const nextState = symbolNext[symbol]++;
+-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
++            tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
+             tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+     }   }
+ 
+@@ -184,49 +170,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
+ /*-*******************************************************
+ *  Decompression (Byte symbols)
+ *********************************************************/
+-size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+-{
+-    void* ptr = dt;
+-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+-    void* dPtr = dt + 1;
+-    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+-
+-    DTableH->tableLog = 0;
+-    DTableH->fastMode = 0;
+-
+-    cell->newState = 0;
+-    cell->symbol = symbolValue;
+-    cell->nbBits = 0;
+-
+-    return 0;
+-}
+-
+-
+-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+-{
+-    void* ptr = dt;
+-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+-    void* dPtr = dt + 1;
+-    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+-    const unsigned tableSize = 1 << nbBits;
+-    const unsigned tableMask = tableSize - 1;
+-    const unsigned maxSV1 = tableMask+1;
+-    unsigned s;
+-
+-    /* Sanity checks */
+-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+-
+-    /* Build Decoding Table */
+-    DTableH->tableLog = (U16)nbBits;
+-    DTableH->fastMode = 1;
+-    for (s=0; s<maxSV1; s++) {
+-        dinfo[s].newState = 0;
+-        dinfo[s].symbol = (BYTE)s;
+-        dinfo[s].nbBits = (BYTE)nbBits;
+-    }
+-
+-    return 0;
+-}
+ 
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+           void* dst, size_t maxDstSize,
+@@ -287,32 +230,12 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+             break;
+     }   }
+ 
+-    return op-ostart;
+-}
+-
+-
+-size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+-                            const void* cSrc, size_t cSrcSize,
+-                            const FSE_DTable* dt)
+-{
+-    const void* ptr = dt;
+-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+-    const U32 fastMode = DTableH->fastMode;
+-
+-    /* select fast mode (static) */
+-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+-}
+-
+-
+-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+-{
+-    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
++    assert(op >= ostart);
++    return (size_t)(op-ostart);
+ }
+ 
+ typedef struct {
+     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+-    FSE_DTable dtable[]; /* Dynamically sized */
+ } FSE_DecompressWksp;
+ 
+ 
+@@ -327,13 +250,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+     unsigned tableLog;
+     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
++    size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
++    FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
+ 
+-    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
++    FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+ 
++    /* correct offset to dtable depends on this property */
++    FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
++
+     /* normal FSE decoding mode */
+-    {
+-        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
++    {   size_t const NCountLength =
++            FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+         if (FSE_isError(NCountLength)) return NCountLength;
+         if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+         assert(NCountLength <= cSrcSize);
+@@ -342,19 +270,20 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+     }
+ 
+     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+-    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
++    assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
++    workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+ 
+-    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
++    CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+ 
+     {
+-        const void* ptr = wksp->dtable;
++        const void* ptr = dtable;
+         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+         const U32 fastMode = DTableH->fastMode;
+ 
+         /* select fast mode (static) */
+-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
++        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
++        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+     }
+ }
+ 
+@@ -382,9 +311,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
+     return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+ }
+ 
+-
+-typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+-
+-
+-
+ #endif   /* FSE_COMMONDEFS_ONLY */
+diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
+index 5042ff870308..57462466e188 100644
+--- a/lib/zstd/common/huf.h
++++ b/lib/zstd/common/huf.h
+@@ -1,7 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /* ******************************************************************
+  * huff0 huffman codec,
+  * part of Finite State Entropy library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  * You can contact the author at :
+  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -18,99 +19,22 @@
+ 
+ /* *** Dependencies *** */
+ #include "zstd_deps.h"    /* size_t */
+-
+-
+-/* *** library symbols visibility *** */
+-/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+- *        HUF symbols remain "private" (internal symbols for library only).
+- *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+-#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+-#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+-#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+-#  define HUF_PUBLIC_API __declspec(dllexport)
+-#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+-#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+-#else
+-#  define HUF_PUBLIC_API
+-#endif
+-
+-
+-/* ========================== */
+-/* ***  simple functions  *** */
+-/* ========================== */
+-
+-/* HUF_compress() :
+- *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+- * 'dst' buffer must be already allocated.
+- *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+- * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+- * @return : size of compressed data (<= `dstCapacity`).
+- *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+- *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+- */
+-HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+-                             const void* src, size_t srcSize);
+-
+-/* HUF_decompress() :
+- *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+- *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+- * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+- *  Note : in contrast with FSE, HUF_decompress can regenerate
+- *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+- *         because it knows size to regenerate (originalSize).
+- * @return : size of regenerated data (== originalSize),
+- *           or an error code, which can be tested using HUF_isError()
+- */
+-HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+-                               const void* cSrc, size_t cSrcSize);
++#include "mem.h"          /* U32 */
++#define FSE_STATIC_LINKING_ONLY
++#include "fse.h"
+ 
+ 
+ /* ***   Tool functions *** */
+-#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /*< maximum input size for a single block compressed with HUF_compress */
+-HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
++#define HUF_BLOCKSIZE_MAX (128 * 1024)   /*< maximum input size for a single block compressed with HUF_compress */
++size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
+ 
+ /* Error Management */
+-HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
+-HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
+-
++unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
++const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
+ 
+-/* ***   Advanced function   *** */
+ 
+-/* HUF_compress2() :
+- *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+- * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+-HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+-                               const void* src, size_t srcSize,
+-                               unsigned maxSymbolValue, unsigned tableLog);
+-
+-/* HUF_compress4X_wksp() :
+- *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
+ #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
+ #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
+-HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+-                                     const void* src, size_t srcSize,
+-                                     unsigned maxSymbolValue, unsigned tableLog,
+-                                     void* workSpace, size_t wkspSize);
+-
+-#endif   /* HUF_H_298734234 */
+-
+-/* ******************************************************************
+- *  WARNING !!
+- *  The following section contains advanced and experimental definitions
+- *  which shall never be used in the context of a dynamic library,
+- *  because they are not guaranteed to remain stable in the future.
+- *  Only consider them in association with static linking.
+- * *****************************************************************/
+-#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+-#define HUF_H_HUF_STATIC_LINKING_ONLY
+-
+-/* *** Dependencies *** */
+-#include "mem.h"   /* U32 */
+-#define FSE_STATIC_LINKING_ONLY
+-#include "fse.h"
+-
+ 
+ /* *** Constants *** */
+ #define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
+@@ -151,25 +75,49 @@ typedef U32 HUF_DTable;
+ /* ****************************************
+ *  Advanced decompression functions
+ ******************************************/
+-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+-#endif
+ 
+-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< decodes RLE and uncompressed */
+-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
+-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
+-size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+-size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+-#endif
++/*
++ * Huffman flags bitset.
++ * For all flags, 0 is the default value.
++ */
++typedef enum {
++    /*
++     * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
++     * Otherwise: Ignored.
++     */
++    HUF_flags_bmi2 = (1 << 0),
++    /*
++     * If set: Test possible table depths to find the one that produces the smallest header + encoded size.
++     * If unset: Use heuristic to find the table depth.
++     */
++    HUF_flags_optimalDepth = (1 << 1),
++    /*
++     * If set: If the previous table can encode the input, always reuse the previous table.
++     * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
++     */
++    HUF_flags_preferRepeat = (1 << 2),
++    /*
++     * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
++     * If unset: Always histogram the entire input.
++     */
++    HUF_flags_suspectUncompressible = (1 << 3),
++    /*
++     * If set: Don't use assembly implementations
++     * If unset: Allow using assembly implementations
++     */
++    HUF_flags_disableAsm = (1 << 4),
++    /*
++     * If set: Don't use the fast decoding loop, always use the fallback decoding loop.
++     * If unset: Use the fast decoding loop when possible.
++     */
++    HUF_flags_disableFast = (1 << 5)
++} HUF_flags_e;
+ 
+ 
+ /* ****************************************
+  *  HUF detailed API
+  * ****************************************/
++#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
+ 
+ /*! HUF_compress() does the following:
+  *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+@@ -182,12 +130,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+  *  For example, it's possible to compress several blocks using the same 'CTable',
+  *  or to save and regenerate 'CTable' using external methods.
+  */
+-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+-size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+-size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
++unsigned HUF_minTableLog(unsigned symbolCardinality);
++unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
++unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
++ size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+-size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
++size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
+ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+ int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+ 
+@@ -196,6 +144,7 @@ typedef enum {
+    HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+    HUF_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+  } HUF_repeat;
++
+ /* HUF_compress4X_repeat() :
+  *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+  *  If it uses hufTable it does not modify hufTable or repeat.
+@@ -206,13 +155,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                        const void* src, size_t srcSize,
+                        unsigned maxSymbolValue, unsigned tableLog,
+                        void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
++                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
+ 
+ /* HUF_buildCTable_wksp() :
+  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+  * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+  */
+-#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
++#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
+ #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+ size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                        const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+@@ -238,7 +187,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                           U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                           const void* src, size_t srcSize,
+                           void* workspace, size_t wkspSize,
+-                          int bmi2);
++                          int flags);
+ 
+ /* HUF_readCTable() :
+  *  Loading a CTable saved with HUF_writeCTable() */
+@@ -246,9 +195,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
+ 
+ /* HUF_getNbBitsFromCTable() :
+  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+- *  Note 1 : is not inlined, as HUF_CElt definition is private */
++ *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
++ *  Note 2 : is not inlined, as HUF_CElt definition is private
++ */
+ U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
+ 
++typedef struct {
++    BYTE tableLog;
++    BYTE maxSymbolValue;
++    BYTE unused[sizeof(size_t) - 2];
++} HUF_CTableHeader;
++
++/* HUF_readCTableHeader() :
++ *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
++ */
++HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
++
+ /*
+  * HUF_decompress() does the following:
+  * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+@@ -276,32 +238,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+ #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+ #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+ 
+-#ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+-size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+-#endif
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+-size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+-#endif
+-
+-size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+-#ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+-#endif
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+-#endif
+-
+ 
+ /* ====================== */
+ /* single stream variants */
+ /* ====================== */
+ 
+-size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+-size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
+-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+-size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
++size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
+ /* HUF_compress1X_repeat() :
+  *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+  *  If it uses hufTable it does not modify hufTable or repeat.
+@@ -312,47 +254,28 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                        const void* src, size_t srcSize,
+                        unsigned maxSymbolValue, unsigned tableLog,
+                        void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
++                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
+ 
+-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+-#endif
+-
+-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+-size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+-#ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+-size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+-#endif
++size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
+ #ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+-#endif
+-
+-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /*< automatic selection of sing or double symbol decoder, based on DTable */
+-#ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+-#endif
+-#ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
++size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);   /*< double-symbols decoder */
+ #endif
+ 
+ /* BMI2 variants.
+  * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+  */
+-size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
++size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
+ #ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
++size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
+ #endif
+-size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+-size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
++size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
++size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
+ #ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
++size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
+ #endif
+ #ifndef HUF_FORCE_DECOMPRESS_X1
+-size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
++size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
+ #endif
+ 
+-#endif /* HUF_STATIC_LINKING_ONLY */
++#endif   /* HUF_H_298734234 */
+ 
+diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
+index 1d9cc03924ca..2e91e7780c1f 100644
+--- a/lib/zstd/common/mem.h
++++ b/lib/zstd/common/mem.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -24,6 +24,7 @@
+ /*-****************************************
+ *  Compiler specifics
+ ******************************************/
++#undef MEM_STATIC /* may be already defined from common/compiler.h */
+ #define MEM_STATIC static inline
+ 
+ /*-**************************************************************
+diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h
+index 0e3b2c0a527d..f08638cced6c 100644
+--- a/lib/zstd/common/portability_macros.h
++++ b/lib/zstd/common/portability_macros.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -12,7 +13,7 @@
+ #define ZSTD_PORTABILITY_MACROS_H
+ 
+ /*
+- * This header file contains macro defintions to support portability.
++ * This header file contains macro definitions to support portability.
+  * This header is shared between C and ASM code, so it MUST only
+  * contain macro definitions. It MUST not contain any C code.
+  *
+@@ -45,6 +46,8 @@
+ /* Mark the internal assembly functions as hidden  */
+ #ifdef __ELF__
+ # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
++#elif defined(__APPLE__)
++# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
+ #else
+ # define ZSTD_HIDE_ASM_FUNCTION(func)
+ #endif
+@@ -65,7 +68,7 @@
+ #endif
+ 
+ /*
+- * Only enable assembly for GNUC comptabile compilers,
++ * Only enable assembly for GNUC compatible compilers,
+  * because other platforms may not support GAS assembly syntax.
+  *
+  * Only enable assembly for Linux / MacOS, other platforms may
+@@ -90,4 +93,23 @@
+  */
+ #define ZSTD_ENABLE_ASM_X86_64_BMI2 0
+ 
++/*
++ * For x86 ELF targets, add .note.gnu.property section for Intel CET in
++ * assembly sources when CET is enabled.
++ *
++ * Additionally, any function that may be called indirectly must begin
++ * with ZSTD_CET_ENDBRANCH.
++ */
++#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
++    && defined(__has_include)
++# if __has_include(<cet.h>)
++#  include <cet.h>
++#  define ZSTD_CET_ENDBRANCH _CET_ENDBR
++# endif
++#endif
++
++#ifndef ZSTD_CET_ENDBRANCH
++# define ZSTD_CET_ENDBRANCH
++#endif
++
+ #endif /* ZSTD_PORTABILITY_MACROS_H */
+diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
+index 3d7e35b309b5..44b95b25344a 100644
+--- a/lib/zstd/common/zstd_common.c
++++ b/lib/zstd/common/zstd_common.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -14,7 +15,6 @@
+ *  Dependencies
+ ***************************************/
+ #define ZSTD_DEPS_NEED_MALLOC
+-#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+ #include "error_private.h"
+ #include "zstd_internal.h"
+ 
+@@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+ /*! ZSTD_getErrorString() :
+  *  provides error code string from enum */
+ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+-
+-
+-
+-/*=**************************************************************
+-*  Custom allocator
+-****************************************************************/
+-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+-{
+-    if (customMem.customAlloc)
+-        return customMem.customAlloc(customMem.opaque, size);
+-    return ZSTD_malloc(size);
+-}
+-
+-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+-{
+-    if (customMem.customAlloc) {
+-        /* calloc implemented as malloc+memset;
+-         * not as efficient as calloc, but next best guess for custom malloc */
+-        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+-        ZSTD_memset(ptr, 0, size);
+-        return ptr;
+-    }
+-    return ZSTD_calloc(1, size);
+-}
+-
+-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+-{
+-    if (ptr!=NULL) {
+-        if (customMem.customFree)
+-            customMem.customFree(customMem.opaque, ptr);
+-        else
+-            ZSTD_free(ptr);
+-    }
+-}
+diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
+index 2c34e8a33a1c..f931f7d0e294 100644
+--- a/lib/zstd/common/zstd_deps.h
++++ b/lib/zstd/common/zstd_deps.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+ 
+ #endif /* ZSTD_DEPS_IO */
+ #endif /* ZSTD_DEPS_NEED_IO */
++
++/*
++ * Only requested when MSAN is enabled.
++ * Need:
++ * intptr_t
++ */
++#ifdef ZSTD_DEPS_NEED_STDINT
++#ifndef ZSTD_DEPS_STDINT
++#define ZSTD_DEPS_STDINT
++
++/* intptr_t already provided by ZSTD_DEPS_COMMON */
++
++#endif /* ZSTD_DEPS_STDINT */
++#endif /* ZSTD_DEPS_NEED_STDINT */
+diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
+index 93305d9b41bb..11da1233e890 100644
+--- a/lib/zstd/common/zstd_internal.h
++++ b/lib/zstd/common/zstd_internal.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -28,7 +29,6 @@
+ #include <linux/zstd.h>
+ #define FSE_STATIC_LINKING_ONLY
+ #include "fse.h"
+-#define HUF_STATIC_LINKING_ONLY
+ #include "huf.h"
+ #include <linux/xxhash.h>                /* XXH_reset, update, digest */
+ #define ZSTD_TRACE 0
+@@ -83,9 +83,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+ #define ZSTD_FRAMECHECKSUMSIZE 4
+ 
+ #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
++#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */)   /* for a non-null block */
++#define MIN_LITERALS_FOR_4_STREAMS 6
+ 
+-#define HufLog 12
+ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+ 
+ #define LONGNBSEQ 0x7F00
+@@ -93,6 +93,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
+ #define MINMATCH 3
+ 
+ #define Litbits  8
++#define LitHufLog 11
+ #define MaxLit ((1<<Litbits) - 1)
+ #define MaxML   52
+ #define MaxLL   35
+@@ -103,6 +104,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
+ #define LLFSELog    9
+ #define OffFSELog   8
+ #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
++#define MaxMLBits 16
++#define MaxLLBits 16
+ 
+ #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+ /* Each table cannot take more than #symbols * FSELog bits */
+@@ -166,7 +169,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
+     ZSTD_memcpy(dst, src, 8);
+ #endif
+ }
+-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
++#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
+ 
+ /* Need to use memmove here since the literal buffer can now be located within
+    the dst buffer. In circumstances where the op "catches up" to where the
+@@ -186,7 +189,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
+     ZSTD_memcpy(dst, copy16_buf, 16);
+ #endif
+ }
+-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
++#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
+ 
+ #define WILDCOPY_OVERLENGTH 32
+ #define WILDCOPY_VECLEN 16
+@@ -215,7 +218,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
+     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+         /* Handle short offset copies. */
+         do {
+-            COPY8(op, ip)
++            COPY8(op, ip);
+         } while (op < oend);
+     } else {
+         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+@@ -225,12 +228,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
+          * one COPY16() in the first call. Then, do two calls per loop since
+          * at that point it is more likely to have a high trip count.
+          */
+-#ifdef __aarch64__
+-        do {
+-            COPY16(op, ip);
+-        }
+-        while (op < oend);
+-#else
+         ZSTD_copy16(op, ip);
+         if (16 >= length) return;
+         op += 16;
+@@ -240,7 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
+             COPY16(op, ip);
+         }
+         while (op < oend);
+-#endif
+     }
+ }
+ 
+@@ -289,11 +285,11 @@ typedef enum {
+ typedef struct {
+     seqDef* sequencesStart;
+     seqDef* sequences;      /* ptr to end of sequences */
+-    BYTE* litStart;
+-    BYTE* lit;              /* ptr to end of literals */
+-    BYTE* llCode;
+-    BYTE* mlCode;
+-    BYTE* ofCode;
++    BYTE*  litStart;
++    BYTE*  lit;             /* ptr to end of literals */
++    BYTE*  llCode;
++    BYTE*  mlCode;
++    BYTE*  ofCode;
+     size_t maxNbSeq;
+     size_t maxNbLit;
+ 
+@@ -301,8 +297,8 @@ typedef struct {
+      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+      * the existing value of the litLength or matchLength by 0x10000.
+      */
+-    ZSTD_longLengthType_e   longLengthType;
+-    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
++    ZSTD_longLengthType_e longLengthType;
++    U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
+ } seqStore_t;
+ 
+ typedef struct {
+@@ -321,10 +317,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
+     seqLen.matchLength = seq->mlBase + MINMATCH;
+     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+-            seqLen.litLength += 0xFFFF;
++            seqLen.litLength += 0x10000;
+         }
+         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+-            seqLen.matchLength += 0xFFFF;
++            seqLen.matchLength += 0x10000;
+         }
+     }
+     return seqLen;
+@@ -337,72 +333,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
+  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+  */
+ typedef struct {
++    size_t nbBlocks;
+     size_t compressedSize;
+     unsigned long long decompressedBound;
+ } ZSTD_frameSizeInfo;   /* decompress & legacy */
+ 
+ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+-
+-/* custom memory allocation functions */
+-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+-
+-
+-MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+-{
+-    assert(val != 0);
+-    {
+-#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+-        return __builtin_clz (val) ^ 31;
+-#   else   /* Software version */
+-        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+-        U32 v = val;
+-        v |= v >> 1;
+-        v |= v >> 2;
+-        v |= v >> 4;
+-        v |= v >> 8;
+-        v |= v >> 16;
+-        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+-#   endif
+-    }
+-}
+-
+-/*
+- * Counts the number of trailing zeros of a `size_t`.
+- * Most compilers should support CTZ as a builtin. A backup
+- * implementation is provided if the builtin isn't supported, but
+- * it may not be terribly efficient.
+- */
+-MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
+-{
+-    if (MEM_64bits()) {
+-#       if (__GNUC__ >= 4)
+-            return __builtin_ctzll((U64)val);
+-#       else
+-            static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
+-                                                      4, 25, 14, 28,  9, 34, 20, 56,
+-                                                      5, 17, 26, 54, 15, 41, 29, 43,
+-                                                      10, 31, 38, 35, 21, 45, 49, 57,
+-                                                      63,  6, 12, 18, 24, 27, 33, 55,
+-                                                      16, 53, 40, 42, 30, 37, 44, 48,
+-                                                      62, 11, 23, 32, 52, 39, 36, 47,
+-                                                      61, 22, 51, 46, 60, 50, 59, 58 };
+-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+-#       endif
+-    } else { /* 32 bits */
+-#       if (__GNUC__ >= 3)
+-            return __builtin_ctz((U32)val);
+-#       else
+-            static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
+-                                                     30, 22, 20, 15, 25, 17,  4,  8,
+-                                                     31, 27, 13, 23, 21, 19, 16,  7,
+-                                                     26, 12, 18,  6, 11,  5, 10,  9 };
+-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+-#       endif
+-    }
+-}
++int ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+ 
+ 
+ /* ZSTD_invalidateRepCodes() :
+@@ -420,13 +357,13 @@ typedef struct {
+ 
+ /*! ZSTD_getcBlockSize() :
+  *  Provides the size of compressed block from block header `src` */
+-/* Used by: decompress, fullbench (does not get its definition from here) */
++/*  Used by: decompress, fullbench */
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                           blockProperties_t* bpPtr);
+ 
+ /*! ZSTD_decodeSeqHeaders() :
+  *  decode sequence header from src */
+-/* Used by: decompress, fullbench (does not get its definition from here) */
++/*  Used by: zstd_decompress_block, fullbench */
+ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                        const void* src, size_t srcSize);
+ 
+diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h
+index d9a76112ec3a..6ab8be6532ef 100644
+--- a/lib/zstd/compress/clevels.h
++++ b/lib/zstd/compress/clevels.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
+index ec5b1ca6d71a..44a3c10becf2 100644
+--- a/lib/zstd/compress/fse_compress.c
++++ b/lib/zstd/compress/fse_compress.c
+@@ -1,6 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * FSE : Finite State Entropy encoder
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -25,7 +26,8 @@
+ #include "../common/error_private.h"
+ #define ZSTD_DEPS_NEED_MALLOC
+ #define ZSTD_DEPS_NEED_MATH64
+-#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
++#include "../common/zstd_deps.h"  /* ZSTD_memset */
++#include "../common/bits.h" /* ZSTD_highbit32 */
+ 
+ 
+ /* **************************************************************
+@@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+     assert(tableLog < 16);   /* required for threshold strategy to work */
+ 
+     /* For explanations on how to distribute symbol values over the table :
+-     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
++     * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+ 
+      #ifdef __clang_analyzer__
+      ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+@@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                 break;
+             default :
+                 assert(normalizedCounter[s] > 1);
+-                {   U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
++                {   U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
+                     U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
+                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                     symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
+@@ -224,8 +226,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+                                    + 4 /* bitCount initialized at 4 */
+                                    + 2 /* first two symbols may use one additional bit each */) / 8)
+-                                    + 1 /* round up to whole nb bytes */
+-                                    + 2 /* additional two bytes for bitstream flush */;
++                                   + 1 /* round up to whole nb bytes */
++                                   + 2 /* additional two bytes for bitstream flush */;
+     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+ }
+ 
+@@ -254,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+     /* Init */
+     remaining = tableSize+1;   /* +1 for extra accuracy */
+     threshold = tableSize;
+-    nbBits = tableLog+1;
++    nbBits = (int)tableLog+1;
+ 
+     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+         if (previousIs0) {
+@@ -273,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+             }
+             while (symbol >= start+3) {
+                 start+=3;
+-                bitStream += 3 << bitCount;
++                bitStream += 3U << bitCount;
+                 bitCount += 2;
+             }
+             bitStream += (symbol-start) << bitCount;
+@@ -293,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+             count++;   /* +1 for extra accuracy */
+             if (count>=threshold)
+                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+-            bitStream += count << bitCount;
++            bitStream += (U32)count << bitCount;
+             bitCount  += nbBits;
+             bitCount  -= (count<max);
+             previousIs0  = (count==1);
+@@ -321,7 +323,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+     out[1] = (BYTE)(bitStream>>8);
+     out+= (bitCount+7) /8;
+ 
+-    return (out-ostart);
++    assert(out >= ostart);
++    return (size_t)(out-ostart);
+ }
+ 
+ 
+@@ -342,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+ *  FSE Compression Code
+ ****************************************************************/
+ 
+-FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+-{
+-    size_t size;
+-    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+-    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+-    return (FSE_CTable*)ZSTD_malloc(size);
+-}
+-
+-void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
+-
+ /* provides the minimum logSize to safely represent a distribution */
+ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+ {
+-    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+-    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
++    U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
++    U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
+     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+     assert(srcSize > 1); /* Not supported, RLE should be used instead */
+     return minBits;
+@@ -364,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+ 
+ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+ {
+-    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
++    U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
+     U32 tableLog = maxTableLog;
+     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+     assert(srcSize > 1); /* Not supported, RLE should be used instead */
+@@ -532,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+     return tableLog;
+ }
+ 
+-
+-/* fake FSE_CTable, for raw (uncompressed) input */
+-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+-{
+-    const unsigned tableSize = 1 << nbBits;
+-    const unsigned tableMask = tableSize - 1;
+-    const unsigned maxSymbolValue = tableMask;
+-    void* const ptr = ct;
+-    U16* const tableU16 = ( (U16*) ptr) + 2;
+-    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+-    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+-    unsigned s;
+-
+-    /* Sanity checks */
+-    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+-
+-    /* header */
+-    tableU16[-2] = (U16) nbBits;
+-    tableU16[-1] = (U16) maxSymbolValue;
+-
+-    /* Build table */
+-    for (s=0; s<tableSize; s++)
+-        tableU16[s] = (U16)(tableSize + s);
+-
+-    /* Build Symbol Transformation Table */
+-    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+-        for (s=0; s<=maxSymbolValue; s++) {
+-            symbolTT[s].deltaNbBits = deltaNbBits;
+-            symbolTT[s].deltaFindState = s-1;
+-    }   }
+-
+-    return 0;
+-}
+-
+ /* fake FSE_CTable, for rle input (always same symbol) */
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+ {
+@@ -664,5 +623,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+ 
+ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+ 
+-
+ #endif   /* FSE_COMMONDEFS_ONLY */
+diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c
+index 3ddc6dfb6894..0b12587cc14b 100644
+--- a/lib/zstd/compress/hist.c
++++ b/lib/zstd/compress/hist.c
+@@ -1,7 +1,8 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * hist : Histogram functions
+  * part of Finite State Entropy project
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h
+index fc1830abc9c6..f7687b0fc20a 100644
+--- a/lib/zstd/compress/hist.h
++++ b/lib/zstd/compress/hist.h
+@@ -1,7 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /* ******************************************************************
+  * hist : Histogram functions
+  * part of Finite State Entropy project
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
+index 74ef0db47621..0b229f5d2ae2 100644
+--- a/lib/zstd/compress/huf_compress.c
++++ b/lib/zstd/compress/huf_compress.c
+@@ -1,6 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * Huffman encoder, part of New Generation Entropy library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -26,9 +27,9 @@
+ #include "hist.h"
+ #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+ #include "../common/fse.h"        /* header compression */
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include "../common/error_private.h"
++#include "../common/bits.h"       /* ZSTD_highbit32 */
+ 
+ 
+ /* **************************************************************
+@@ -39,13 +40,67 @@
+ 
+ 
+ /* **************************************************************
+-*  Utils
++*  Required declarations
+ ****************************************************************/
+-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
++typedef struct nodeElt_s {
++    U32 count;
++    U16 parent;
++    BYTE byte;
++    BYTE nbBits;
++} nodeElt;
++
++
++/* **************************************************************
++*  Debug Traces
++****************************************************************/
++
++#if DEBUGLEVEL >= 2
++
++static size_t showU32(const U32* arr, size_t size)
+ {
+-    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
++    size_t u;
++    for (u=0; u<size; u++) {
++        RAWLOG(6, " %u", arr[u]); (void)arr;
++    }
++    RAWLOG(6, " \n");
++    return size;
+ }
+ 
++static size_t HUF_getNbBits(HUF_CElt elt);
++
++static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
++{
++    size_t u;
++    for (u=0; u<size; u++) {
++        RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
++    }
++    RAWLOG(6, " \n");
++    return size;
++
++}
++
++static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
++{
++    size_t u;
++    for (u=0; u<size; u++) {
++        RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
++    }
++    RAWLOG(6, " \n");
++    return size;
++}
++
++static size_t showHNodeBits(const nodeElt* hnode, size_t size)
++{
++    size_t u;
++    for (u=0; u<size; u++) {
++        RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
++    }
++    RAWLOG(6, " \n");
++    return size;
++}
++
++#endif
++
+ 
+ /* *******************************************************
+ *  HUF : Huffman block compression
+@@ -86,7 +141,10 @@ typedef struct {
+     S16 norm[HUF_TABLELOG_MAX+1];
+ } HUF_CompressWeightsWksp;
+ 
+-static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
++static size_t
++HUF_compressWeights(void* dst, size_t dstSize,
++              const void* weightTable, size_t wtSize,
++                    void* workspace, size_t workspaceSize)
+ {
+     BYTE* const ostart = (BYTE*) dst;
+     BYTE* op = ostart;
+@@ -137,7 +195,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
+ 
+ static size_t HUF_getValue(HUF_CElt elt)
+ {
+-    return elt & ~0xFF;
++    return elt & ~(size_t)0xFF;
+ }
+ 
+ static size_t HUF_getValueFast(HUF_CElt elt)
+@@ -160,6 +218,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
+     }
+ }
+ 
++HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
++{
++    HUF_CTableHeader header;
++    ZSTD_memcpy(&header, ctable, sizeof(header));
++    return header;
++}
++
++static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
++{
++    HUF_CTableHeader header;
++    HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
++    ZSTD_memset(&header, 0, sizeof(header));
++    assert(tableLog < 256);
++    header.tableLog = (BYTE)tableLog;
++    assert(maxSymbolValue < 256);
++    header.maxSymbolValue = (BYTE)maxSymbolValue;
++    ZSTD_memcpy(ctable, &header, sizeof(header));
++}
++
+ typedef struct {
+     HUF_CompressWeightsWksp wksp;
+     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+@@ -175,6 +252,11 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+     U32 n;
+     HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
+ 
++    HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
++
++    assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
++    assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
++
+     /* check conditions */
+     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
+     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+@@ -204,16 +286,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+     return ((maxSymbolValue+1)/2) + 1;
+ }
+ 
+-/*! HUF_writeCTable() :
+-    `CTable` : Huffman tree to save, using huf representation.
+-    @return : size of saved CTable */
+-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+-{
+-    HUF_WriteCTableWksp wksp;
+-    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+-}
+-
+ 
+ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+ {
+@@ -231,7 +303,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
+     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+ 
+-    CTable[0] = tableLog;
++    *maxSymbolValuePtr = nbSymbols - 1;
++
++    HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
+ 
+     /* Prepare base value per rank */
+     {   U32 n, nextRankStart = 0;
+@@ -263,74 +337,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
+         { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
+     }
+ 
+-    *maxSymbolValuePtr = nbSymbols - 1;
+     return readSize;
+ }
+ 
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
+ {
+-    const HUF_CElt* ct = CTable + 1;
++    const HUF_CElt* const ct = CTable + 1;
+     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
++    if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
++        return 0;
+     return (U32)HUF_getNbBits(ct[symbolValue]);
+ }
+ 
+ 
+-typedef struct nodeElt_s {
+-    U32 count;
+-    U16 parent;
+-    BYTE byte;
+-    BYTE nbBits;
+-} nodeElt;
+-
+ /*
+  * HUF_setMaxHeight():
+- * Enforces maxNbBits on the Huffman tree described in huffNode.
++ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
+  *
+- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+- * the tree to so that it is a valid canonical Huffman tree.
++ * It attempts to convert all nodes with nbBits > @targetNbBits
++ * to employ @targetNbBits instead. Then it adjusts the tree
++ * so that it remains a valid canonical Huffman tree.
+  *
+  * @pre               The sum of the ranks of each symbol == 2^largestBits,
+  *                    where largestBits == huffNode[lastNonNull].nbBits.
+  * @post              The sum of the ranks of each symbol == 2^largestBits,
+- *                    where largestBits is the return value <= maxNbBits.
++ *                    where largestBits is the return value (expected <= targetNbBits).
+  *
+- * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
++ * @param huffNode    The Huffman tree modified in place to enforce targetNbBits.
++ *                    It's presumed sorted, from most frequent to rarest symbol.
+  * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+- * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
++ * @param targetNbBits  The allowed number of bits, which the Huffman tree
+  *                    may not respect. After this function the Huffman tree will
+- *                    respect maxNbBits.
+- * @return            The maximum number of bits of the Huffman tree after adjustment,
+- *                    necessarily no more than maxNbBits.
++ *                    respect targetNbBits.
++ * @return            The maximum number of bits of the Huffman tree after adjustment.
+  */
+-static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
++static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
+ {
+     const U32 largestBits = huffNode[lastNonNull].nbBits;
+-    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+-    if (largestBits <= maxNbBits) return largestBits;
++    /* early exit : no elt > targetNbBits, so the tree is already valid. */
++    if (largestBits <= targetNbBits) return largestBits;
++
++    DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
+ 
+     /* there are several too large elements (at least >= 2) */
+     {   int totalCost = 0;
+-        const U32 baseCost = 1 << (largestBits - maxNbBits);
++        const U32 baseCost = 1 << (largestBits - targetNbBits);
+         int n = (int)lastNonNull;
+ 
+-        /* Adjust any ranks > maxNbBits to maxNbBits.
++        /* Adjust any ranks > targetNbBits to targetNbBits.
+          * Compute totalCost, which is how far the sum of the ranks is
+          * we are over 2^largestBits after adjust the offending ranks.
+          */
+-        while (huffNode[n].nbBits > maxNbBits) {
++        while (huffNode[n].nbBits > targetNbBits) {
+             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+-            huffNode[n].nbBits = (BYTE)maxNbBits;
++            huffNode[n].nbBits = (BYTE)targetNbBits;
+             n--;
+         }
+-        /* n stops at huffNode[n].nbBits <= maxNbBits */
+-        assert(huffNode[n].nbBits <= maxNbBits);
+-        /* n end at index of smallest symbol using < maxNbBits */
+-        while (huffNode[n].nbBits == maxNbBits) --n;
++        /* n stops at huffNode[n].nbBits <= targetNbBits */
++        assert(huffNode[n].nbBits <= targetNbBits);
++        /* n end at index of smallest symbol using < targetNbBits */
++        while (huffNode[n].nbBits == targetNbBits) --n;
+ 
+-        /* renorm totalCost from 2^largestBits to 2^maxNbBits
++        /* renorm totalCost from 2^largestBits to 2^targetNbBits
+          * note : totalCost is necessarily a multiple of baseCost */
+-        assert((totalCost & (baseCost - 1)) == 0);
+-        totalCost >>= (largestBits - maxNbBits);
++        assert(((U32)totalCost & (baseCost - 1)) == 0);
++        totalCost >>= (largestBits - targetNbBits);
+         assert(totalCost > 0);
+ 
+         /* repay normalized cost */
+@@ -339,19 +410,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+ 
+             /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+             ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
+-            {   U32 currentNbBits = maxNbBits;
++            {   U32 currentNbBits = targetNbBits;
+                 int pos;
+                 for (pos=n ; pos >= 0; pos--) {
+                     if (huffNode[pos].nbBits >= currentNbBits) continue;
+-                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+-                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
++                    currentNbBits = huffNode[pos].nbBits;   /* < targetNbBits */
++                    rankLast[targetNbBits-currentNbBits] = (U32)pos;
+             }   }
+ 
+             while (totalCost > 0) {
+                 /* Try to reduce the next power of 2 above totalCost because we
+                  * gain back half the rank.
+                  */
+-                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
++                U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
+                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                     U32 const highPos = rankLast[nBitsToDecrease];
+                     U32 const lowPos = rankLast[nBitsToDecrease-1];
+@@ -391,7 +462,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+                     rankLast[nBitsToDecrease] = noSymbol;
+                 else {
+                     rankLast[nBitsToDecrease]--;
+-                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
++                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
+                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                 }
+             }   /* while (totalCost > 0) */
+@@ -403,11 +474,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+              * TODO.
+              */
+             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+-                /* special case : no rank 1 symbol (using maxNbBits-1);
+-                 * let's create one from largest rank 0 (using maxNbBits).
++                /* special case : no rank 1 symbol (using targetNbBits-1);
++                 * let's create one from largest rank 0 (using targetNbBits).
+                  */
+                 if (rankLast[1] == noSymbol) {
+-                    while (huffNode[n].nbBits == maxNbBits) n--;
++                    while (huffNode[n].nbBits == targetNbBits) n--;
+                     huffNode[n+1].nbBits--;
+                     assert(n >= 0);
+                     rankLast[1] = (U32)(n+1);
+@@ -421,7 +492,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+         }   /* repay normalized cost */
+     }   /* there are several too large elements (at least >= 2) */
+ 
+-    return maxNbBits;
++    return targetNbBits;
+ }
+ 
+ typedef struct {
+@@ -429,7 +500,7 @@ typedef struct {
+     U16 curr;
+ } rankPos;
+ 
+-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
++typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
+ 
+ /* Number of buckets available for HUF_sort() */
+ #define RANK_POSITION_TABLE_SIZE 192
+@@ -448,8 +519,8 @@ typedef struct {
+  * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
+  */
+ #define RANK_POSITION_MAX_COUNT_LOG 32
+-#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
+-#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
++#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
++#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
+ 
+ /* Return the appropriate bucket index for a given count. See definition of
+  * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
+@@ -457,7 +528,7 @@ typedef struct {
+ static U32 HUF_getIndex(U32 const count) {
+     return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
+         ? count
+-        : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
++        : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
+ }
+ 
+ /* Helper swap function for HUF_quickSortPartition() */
+@@ -580,7 +651,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
+ 
+     /* Sort each bucket. */
+     for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
+-        U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
++        int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
+         U32 const bucketStartIdx = rankPosition[n].base;
+         if (bucketSize > 1) {
+             assert(bucketStartIdx < maxSymbolValue1);
+@@ -591,6 +662,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
+     assert(HUF_isSorted(huffNode, maxSymbolValue1));
+ }
+ 
++
+ /* HUF_buildCTable_wksp() :
+  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+@@ -611,6 +683,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+     int lowS, lowN;
+     int nodeNb = STARTNODE;
+     int n, nodeRoot;
++    DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
+     /* init for parents */
+     nonNullRank = (int)maxSymbolValue;
+     while(huffNode[nonNullRank].count == 0) nonNullRank--;
+@@ -637,6 +710,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+     for (n=0; n<=nonNullRank; n++)
+         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+ 
++    DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
++
+     return nonNullRank;
+ }
+ 
+@@ -671,31 +746,40 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
+         HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
+     for (n=0; n<alphabetSize; n++)
+         HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
+-    CTable[0] = maxNbBits;
++
++    HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
+ }
+ 
+-size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
++size_t
++HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
++                     void* workSpace, size_t wkspSize)
+ {
+-    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
++    HUF_buildCTable_wksp_tables* const wksp_tables =
++        (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
+     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+     nodeElt* const huffNode = huffNode0+1;
+     int nonNullRank;
+ 
++    HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
++
++    DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
++
+     /* safety checks */
+     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+-      return ERROR(workSpace_tooSmall);
++        return ERROR(workSpace_tooSmall);
+     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+-      return ERROR(maxSymbolValue_tooLarge);
++        return ERROR(maxSymbolValue_tooLarge);
+     ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+ 
+     /* sort, decreasing order */
+     HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
++    DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
+ 
+     /* build tree */
+     nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+ 
+-    /* enforce maxTableLog */
++    /* determine and enforce maxTableLog */
+     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+     if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+ 
+@@ -716,13 +800,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
+ }
+ 
+ int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+-  HUF_CElt const* ct = CTable + 1;
+-  int bad = 0;
+-  int s;
+-  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+-    bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
+-  }
+-  return !bad;
++    HUF_CTableHeader header = HUF_readCTableHeader(CTable);
++    HUF_CElt const* ct = CTable + 1;
++    int bad = 0;
++    int s;
++
++    assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
++
++    if (header.maxSymbolValue < maxSymbolValue)
++        return 0;
++
++    for (s = 0; s <= (int)maxSymbolValue; ++s) {
++        bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
++    }
++    return !bad;
+ }
+ 
+ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+@@ -804,7 +895,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
+ #if DEBUGLEVEL >= 1
+     {
+         size_t const nbBits = HUF_getNbBits(elt);
+-        size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
++        size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
+         (void)dirtyBits;
+         /* Middle bits are 0. */
+         assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
+@@ -884,7 +975,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
+     {
+         size_t const nbBits = bitC->bitPos[0] & 0xFF;
+         if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+-        return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
++        return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
+     }
+ }
+ 
+@@ -964,17 +1055,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                    const void* src, size_t srcSize,
+                                    const HUF_CElt* CTable)
+ {
+-    U32 const tableLog = (U32)CTable[0];
++    U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
+     HUF_CElt const* ct = CTable + 1;
+     const BYTE* ip = (const BYTE*) src;
+     BYTE* const ostart = (BYTE*)dst;
+     BYTE* const oend = ostart + dstSize;
+-    BYTE* op = ostart;
+     HUF_CStream_t bitC;
+ 
+     /* init */
+     if (dstSize < 8) return 0;   /* not enough space to compress */
+-    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
++    { BYTE* op = ostart;
++      size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+       if (HUF_isError(initErr)) return 0; }
+ 
+     if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
+@@ -1045,9 +1136,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+ static size_t
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                               const void* src, size_t srcSize,
+-                              const HUF_CElt* CTable, const int bmi2)
++                              const HUF_CElt* CTable, const int flags)
+ {
+-    if (bmi2) {
++    if (flags & HUF_flags_bmi2) {
+         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+     }
+     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+@@ -1058,28 +1149,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+ static size_t
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                               const void* src, size_t srcSize,
+-                              const HUF_CElt* CTable, const int bmi2)
++                              const HUF_CElt* CTable, const int flags)
+ {
+-    (void)bmi2;
++    (void)flags;
+     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+ }
+ 
+ #endif
+ 
+-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
++size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
+ {
+-    return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+-}
+-
+-size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
+-{
+-    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
++    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
+ }
+ 
+ static size_t
+ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                               const void* src, size_t srcSize,
+-                              const HUF_CElt* CTable, int bmi2)
++                              const HUF_CElt* CTable, int flags)
+ {
+     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+     const BYTE* ip = (const BYTE*) src;
+@@ -1093,7 +1179,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+     op += 6;   /* jumpTable */
+ 
+     assert(op <= oend);
+-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
++    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
+         if (cSize == 0 || cSize > 65535) return 0;
+         MEM_writeLE16(ostart, (U16)cSize);
+         op += cSize;
+@@ -1101,7 +1187,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+ 
+     ip += segmentSize;
+     assert(op <= oend);
+-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
++    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
+         if (cSize == 0 || cSize > 65535) return 0;
+         MEM_writeLE16(ostart+2, (U16)cSize);
+         op += cSize;
+@@ -1109,7 +1195,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+ 
+     ip += segmentSize;
+     assert(op <= oend);
+-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
++    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
+         if (cSize == 0 || cSize > 65535) return 0;
+         MEM_writeLE16(ostart+4, (U16)cSize);
+         op += cSize;
+@@ -1118,7 +1204,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+     ip += segmentSize;
+     assert(op <= oend);
+     assert(ip <= iend);
+-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
++    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
+         if (cSize == 0 || cSize > 65535) return 0;
+         op += cSize;
+     }
+@@ -1126,14 +1212,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+     return (size_t)(op-ostart);
+ }
+ 
+-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+-{
+-    return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+-}
+-
+-size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
++size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
+ {
+-    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
++    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
+ }
+ 
+ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+@@ -1141,11 +1222,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+ static size_t HUF_compressCTable_internal(
+                 BYTE* const ostart, BYTE* op, BYTE* const oend,
+                 const void* src, size_t srcSize,
+-                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
++                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
+ {
+     size_t const cSize = (nbStreams==HUF_singleStream) ?
+-                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+-                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
++                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
++                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
+     if (HUF_isError(cSize)) { return cSize; }
+     if (cSize==0) { return 0; }   /* uncompressible */
+     op += cSize;
+@@ -1168,6 +1249,81 @@ typedef struct {
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10  /* Must be >= 2 */
+ 
++unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
++{
++    unsigned cardinality = 0;
++    unsigned i;
++
++    for (i = 0; i < maxSymbolValue + 1; i++) {
++        if (count[i] != 0) cardinality += 1;
++    }
++
++    return cardinality;
++}
++
++unsigned HUF_minTableLog(unsigned symbolCardinality)
++{
++    U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
++    return minBitsSymbols;
++}
++
++unsigned HUF_optimalTableLog(
++            unsigned maxTableLog,
++            size_t srcSize,
++            unsigned maxSymbolValue,
++            void* workSpace, size_t wkspSize,
++            HUF_CElt* table,
++      const unsigned* count,
++            int flags)
++{
++    assert(srcSize > 1); /* Not supported, RLE should be used instead */
++    assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
++
++    if (!(flags & HUF_flags_optimalDepth)) {
++        /* cheap evaluation, based on FSE */
++        return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
++    }
++
++    {   BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
++        size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
++        size_t hSize, newSize;
++        const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
++        const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
++        size_t optSize = ((size_t) ~0) - 1;
++        unsigned optLog = maxTableLog, optLogGuess;
++
++        DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
++
++        /* Search until size increases */
++        for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
++            DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
++
++            {   size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
++                if (ERR_isError(maxBits)) continue;
++
++                if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
++
++                hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
++            }
++
++            if (ERR_isError(hSize)) continue;
++
++            newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
++
++            if (newSize > optSize + 1) {
++                break;
++            }
++
++            if (newSize < optSize) {
++                optSize = newSize;
++                optLog = optLogGuess;
++            }
++        }
++        assert(optLog <= HUF_TABLELOG_MAX);
++        return optLog;
++    }
++}
++
+ /* HUF_compress_internal() :
+  * `workSpace_align4` must be aligned on 4-bytes boundaries,
+  * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
+@@ -1177,14 +1333,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
+                        unsigned maxSymbolValue, unsigned huffLog,
+                        HUF_nbStreams_e nbStreams,
+                        void* workSpace, size_t wkspSize,
+-                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+-                 const int bmi2, unsigned suspectUncompressible)
++                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
+ {
+     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
+     BYTE* const ostart = (BYTE*)dst;
+     BYTE* const oend = ostart + dstSize;
+     BYTE* op = ostart;
+ 
++    DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
+     HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
+ 
+     /* checks & inits */
+@@ -1198,16 +1354,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
+     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+ 
+     /* Heuristic : If old table is valid, use it for small inputs */
+-    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
++    if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
+         return HUF_compressCTable_internal(ostart, op, oend,
+                                            src, srcSize,
+-                                           nbStreams, oldHufTable, bmi2);
++                                           nbStreams, oldHufTable, flags);
+     }
+ 
+     /* If uncompressible data is suspected, do a smaller sampling first */
+     DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
+-    if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
++    if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
+         size_t largestTotal = 0;
++        DEBUGLOG(5, "input suspected incompressible : sampling to check");
+         {   unsigned maxSymbolValueBegin = maxSymbolValue;
+             CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
+             largestTotal += largestBegin;
+@@ -1224,6 +1381,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
+         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+     }
++    DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
+ 
+     /* Check validity of previous table */
+     if ( repeat
+@@ -1232,25 +1390,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
+         *repeat = HUF_repeat_none;
+     }
+     /* Heuristic : use existing table for small inputs */
+-    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
++    if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
+         return HUF_compressCTable_internal(ostart, op, oend,
+                                            src, srcSize,
+-                                           nbStreams, oldHufTable, bmi2);
++                                           nbStreams, oldHufTable, flags);
+     }
+ 
+     /* Build Huffman Tree */
+-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
++    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
+     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                             maxSymbolValue, huffLog,
+                                             &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
+         CHECK_F(maxBits);
+         huffLog = (U32)maxBits;
+-    }
+-    /* Zero unused symbols in CTable, so we can check it for validity */
+-    {
+-        size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
+-        size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
+-        ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
++        DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
+     }
+ 
+     /* Write table description header */
+@@ -1263,7 +1416,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
+             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                 return HUF_compressCTable_internal(ostart, op, oend,
+                                                    src, srcSize,
+-                                                   nbStreams, oldHufTable, bmi2);
++                                                   nbStreams, oldHufTable, flags);
+         }   }
+ 
+         /* Use the new huffman table */
+@@ -1275,61 +1428,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
+     }
+     return HUF_compressCTable_internal(ostart, op, oend,
+                                        src, srcSize,
+-                                       nbStreams, table->CTable, bmi2);
+-}
+-
+-
+-size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+-                      const void* src, size_t srcSize,
+-                      unsigned maxSymbolValue, unsigned huffLog,
+-                      void* workSpace, size_t wkspSize)
+-{
+-    return HUF_compress_internal(dst, dstSize, src, srcSize,
+-                                 maxSymbolValue, huffLog, HUF_singleStream,
+-                                 workSpace, wkspSize,
+-                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
++                                       nbStreams, table->CTable, flags);
+ }
+ 
+ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       void* workSpace, size_t wkspSize,
+-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
+-                      int bmi2, unsigned suspectUncompressible)
++                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
+ {
++    DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
+     return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                  maxSymbolValue, huffLog, HUF_singleStream,
+                                  workSpace, wkspSize, hufTable,
+-                                 repeat, preferRepeat, bmi2, suspectUncompressible);
+-}
+-
+-/* HUF_compress4X_repeat():
+- * compress input using 4 streams.
+- * provide workspace to generate compression tables */
+-size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+-                      const void* src, size_t srcSize,
+-                      unsigned maxSymbolValue, unsigned huffLog,
+-                      void* workSpace, size_t wkspSize)
+-{
+-    return HUF_compress_internal(dst, dstSize, src, srcSize,
+-                                 maxSymbolValue, huffLog, HUF_fourStreams,
+-                                 workSpace, wkspSize,
+-                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
++                                 repeat, flags);
+ }
+ 
+ /* HUF_compress4X_repeat():
+  * compress input using 4 streams.
+  * consider skipping quickly
+- * re-use an existing huffman compression table */
++ * reuse an existing huffman compression table */
+ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       void* workSpace, size_t wkspSize,
+-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
++                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
+ {
++    DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
+     return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                  maxSymbolValue, huffLog, HUF_fourStreams,
+                                  workSpace, wkspSize,
+-                                 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
++                                 hufTable, repeat, flags);
+ }
+-
+diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
+index f620cafca633..0d139727cd39 100644
+--- a/lib/zstd/compress/zstd_compress.c
++++ b/lib/zstd/compress/zstd_compress.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -11,12 +12,12 @@
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
++#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+ #include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
+ #include "../common/mem.h"
+ #include "hist.h"           /* HIST_countFast_wksp */
+ #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+ #include "../common/fse.h"
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include "zstd_compress_internal.h"
+ #include "zstd_compress_sequences.h"
+@@ -27,6 +28,7 @@
+ #include "zstd_opt.h"
+ #include "zstd_ldm.h"
+ #include "zstd_compress_superblock.h"
++#include  "../common/bits.h"      /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
+ 
+ /* ***************************************************************
+ *  Tuning parameters
+@@ -55,14 +57,17 @@
+ *  Helper functions
+ ***************************************/
+ /* ZSTD_compressBound()
+- * Note that the result from this function is only compatible with the "normal"
+- * full-block strategy.
+- * When there are a lot of small blocks due to frequent flush in streaming mode
+- * the overhead of headers can make the compressed data to be larger than the
+- * return value of ZSTD_compressBound().
++ * Note that the result from this function is only valid for
++ * the one-pass compression functions.
++ * When employing the streaming mode,
++ * if flushes are frequently altering the size of blocks,
++ * the overhead from block headers can make the compressed data larger
++ * than the return value of ZSTD_compressBound().
+  */
+ size_t ZSTD_compressBound(size_t srcSize) {
+-    return ZSTD_COMPRESSBOUND(srcSize);
++    size_t const r = ZSTD_COMPRESSBOUND(srcSize);
++    if (r==0) return ERROR(srcSize_wrong);
++    return r;
+ }
+ 
+ 
+@@ -168,15 +173,13 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+ 
+ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+ {
++    DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
+     if (cctx==NULL) return 0;   /* support free on NULL */
+     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                     "not compatible with static CCtx");
+-    {
+-        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
++    {   int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+         ZSTD_freeCCtxContent(cctx);
+-        if (!cctxInWorkspace) {
+-            ZSTD_customFree(cctx, cctx->customMem);
+-        }
++        if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);
+     }
+     return 0;
+ }
+@@ -257,9 +260,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+     return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+ }
+ 
+-/* Returns 1 if compression parameters are such that we should
++/* Returns ZSTD_ps_enable if compression parameters are such that we should
+  * enable long distance matching (wlog >= 27, strategy >= btopt).
+- * Returns 0 otherwise.
++ * Returns ZSTD_ps_disable otherwise.
+  */
+ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
+                                  const ZSTD_compressionParameters* const cParams) {
+@@ -267,6 +270,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
+     return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
+ }
+ 
++static int ZSTD_resolveExternalSequenceValidation(int mode) {
++    return mode;
++}
++
++/* Resolves maxBlockSize to the default if no value is present. */
++static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
++    if (maxBlockSize == 0) {
++        return ZSTD_BLOCKSIZE_MAX;
++    } else {
++        return maxBlockSize;
++    }
++}
++
++static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
++    if (value != ZSTD_ps_auto) return value;
++    if (cLevel < 10) {
++        return ZSTD_ps_disable;
++    } else {
++        return ZSTD_ps_enable;
++    }
++}
++
++/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
++ * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
++static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
++    return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
++}
++
+ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+         ZSTD_compressionParameters cParams)
+ {
+@@ -284,6 +315,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+     }
+     cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
+     cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
++    cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
++    cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
++    cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
++                                                                             cctxParams.compressionLevel);
+     assert(!ZSTD_checkCParams(cParams));
+     return cctxParams;
+ }
+@@ -329,10 +364,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
+ #define ZSTD_NO_CLEVEL 0
+ 
+ /*
+- * Initializes the cctxParams from params and compressionLevel.
++ * Initializes `cctxParams` from `params` and `compressionLevel`.
+  * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+  */
+-static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
++static void
++ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
++                        const ZSTD_parameters* params,
++                              int compressionLevel)
+ {
+     assert(!ZSTD_checkCParams(params->cParams));
+     ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+@@ -345,6 +383,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
+     cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
+     cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams);
+     cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
++    cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
++    cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
++    cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
+     DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
+                 cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
+ }
+@@ -359,7 +400,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
+ 
+ /*
+  * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+- * @param param Validated zstd parameters.
++ * @param params Validated zstd parameters.
+  */
+ static void ZSTD_CCtxParams_setZstdParams(
+         ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+@@ -455,8 +496,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+         return bounds;
+ 
+     case ZSTD_c_enableLongDistanceMatching:
+-        bounds.lowerBound = 0;
+-        bounds.upperBound = 1;
++        bounds.lowerBound = (int)ZSTD_ps_auto;
++        bounds.upperBound = (int)ZSTD_ps_disable;
+         return bounds;
+ 
+     case ZSTD_c_ldmHashLog:
+@@ -549,6 +590,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+         bounds.upperBound = 1;
+         return bounds;
+ 
++    case ZSTD_c_prefetchCDictTables:
++        bounds.lowerBound = (int)ZSTD_ps_auto;
++        bounds.upperBound = (int)ZSTD_ps_disable;
++        return bounds;
++
++    case ZSTD_c_enableSeqProducerFallback:
++        bounds.lowerBound = 0;
++        bounds.upperBound = 1;
++        return bounds;
++
++    case ZSTD_c_maxBlockSize:
++        bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
++        bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
++        return bounds;
++
++    case ZSTD_c_searchForExternalRepcodes:
++        bounds.lowerBound = (int)ZSTD_ps_auto;
++        bounds.upperBound = (int)ZSTD_ps_disable;
++        return bounds;
++
+     default:
+         bounds.error = ERROR(parameter_unsupported);
+         return bounds;
+@@ -567,10 +628,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+     return 0;
+ }
+ 
+-#define BOUNDCHECK(cParam, val) { \
+-    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+-                    parameter_outOfBound, "Param out of bounds"); \
+-}
++#define BOUNDCHECK(cParam, val)                                       \
++    do {                                                              \
++        RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val),        \
++                        parameter_outOfBound, "Param out of bounds"); \
++    } while (0)
+ 
+ 
+ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+@@ -613,6 +675,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+     case ZSTD_c_useBlockSplitter:
+     case ZSTD_c_useRowMatchFinder:
+     case ZSTD_c_deterministicRefPrefix:
++    case ZSTD_c_prefetchCDictTables:
++    case ZSTD_c_enableSeqProducerFallback:
++    case ZSTD_c_maxBlockSize:
++    case ZSTD_c_searchForExternalRepcodes:
+     default:
+         return 0;
+     }
+@@ -625,7 +691,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+         if (ZSTD_isUpdateAuthorized(param)) {
+             cctx->cParamsChanged = 1;
+         } else {
+-            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
++            RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
+     }   }
+ 
+     switch(param)
+@@ -668,6 +734,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+     case ZSTD_c_useBlockSplitter:
+     case ZSTD_c_useRowMatchFinder:
+     case ZSTD_c_deterministicRefPrefix:
++    case ZSTD_c_prefetchCDictTables:
++    case ZSTD_c_enableSeqProducerFallback:
++    case ZSTD_c_maxBlockSize:
++    case ZSTD_c_searchForExternalRepcodes:
+         break;
+ 
+     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+@@ -723,12 +793,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+     case ZSTD_c_minMatch :
+         if (value!=0)   /* 0 => use default */
+             BOUNDCHECK(ZSTD_c_minMatch, value);
+-        CCtxParams->cParams.minMatch = value;
++        CCtxParams->cParams.minMatch = (U32)value;
+         return CCtxParams->cParams.minMatch;
+ 
+     case ZSTD_c_targetLength :
+         BOUNDCHECK(ZSTD_c_targetLength, value);
+-        CCtxParams->cParams.targetLength = value;
++        CCtxParams->cParams.targetLength = (U32)value;
+         return CCtxParams->cParams.targetLength;
+ 
+     case ZSTD_c_strategy :
+@@ -741,12 +811,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+         /* Content size written in frame header _when known_ (default:1) */
+         DEBUGLOG(4, "set content size flag = %u", (value!=0));
+         CCtxParams->fParams.contentSizeFlag = value != 0;
+-        return CCtxParams->fParams.contentSizeFlag;
++        return (size_t)CCtxParams->fParams.contentSizeFlag;
+ 
+     case ZSTD_c_checksumFlag :
+         /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+         CCtxParams->fParams.checksumFlag = value != 0;
+-        return CCtxParams->fParams.checksumFlag;
++        return (size_t)CCtxParams->fParams.checksumFlag;
+ 
+     case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+         DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+@@ -755,18 +825,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+ 
+     case ZSTD_c_forceMaxWindow :
+         CCtxParams->forceWindow = (value != 0);
+-        return CCtxParams->forceWindow;
++        return (size_t)CCtxParams->forceWindow;
+ 
+     case ZSTD_c_forceAttachDict : {
+         const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+-        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
++        BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
+         CCtxParams->attachDictPref = pref;
+         return CCtxParams->attachDictPref;
+     }
+ 
+     case ZSTD_c_literalCompressionMode : {
+         const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
+-        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
++        BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
+         CCtxParams->literalCompressionMode = lcm;
+         return CCtxParams->literalCompressionMode;
+     }
+@@ -789,47 +859,50 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+ 
+     case ZSTD_c_enableDedicatedDictSearch :
+         CCtxParams->enableDedicatedDictSearch = (value!=0);
+-        return CCtxParams->enableDedicatedDictSearch;
++        return (size_t)CCtxParams->enableDedicatedDictSearch;
+ 
+     case ZSTD_c_enableLongDistanceMatching :
++        BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);
+         CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
+         return CCtxParams->ldmParams.enableLdm;
+ 
+     case ZSTD_c_ldmHashLog :
+         if (value!=0)   /* 0 ==> auto */
+             BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+-        CCtxParams->ldmParams.hashLog = value;
++        CCtxParams->ldmParams.hashLog = (U32)value;
+         return CCtxParams->ldmParams.hashLog;
+ 
+     case ZSTD_c_ldmMinMatch :
+         if (value!=0)   /* 0 ==> default */
+             BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+-        CCtxParams->ldmParams.minMatchLength = value;
++        CCtxParams->ldmParams.minMatchLength = (U32)value;
+         return CCtxParams->ldmParams.minMatchLength;
+ 
+     case ZSTD_c_ldmBucketSizeLog :
+         if (value!=0)   /* 0 ==> default */
+             BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+-        CCtxParams->ldmParams.bucketSizeLog = value;
++        CCtxParams->ldmParams.bucketSizeLog = (U32)value;
+         return CCtxParams->ldmParams.bucketSizeLog;
+ 
+     case ZSTD_c_ldmHashRateLog :
+         if (value!=0)   /* 0 ==> default */
+             BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
+-        CCtxParams->ldmParams.hashRateLog = value;
++        CCtxParams->ldmParams.hashRateLog = (U32)value;
+         return CCtxParams->ldmParams.hashRateLog;
+ 
+     case ZSTD_c_targetCBlockSize :
+-        if (value!=0)   /* 0 ==> default */
++        if (value!=0) {  /* 0 ==> default */
++            value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
+             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+-        CCtxParams->targetCBlockSize = value;
++        }
++        CCtxParams->targetCBlockSize = (U32)value;
+         return CCtxParams->targetCBlockSize;
+ 
+     case ZSTD_c_srcSizeHint :
+         if (value!=0)    /* 0 ==> default */
+             BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+         CCtxParams->srcSizeHint = value;
+-        return CCtxParams->srcSizeHint;
++        return (size_t)CCtxParams->srcSizeHint;
+ 
+     case ZSTD_c_stableInBuffer:
+         BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+@@ -849,7 +922,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+     case ZSTD_c_validateSequences:
+         BOUNDCHECK(ZSTD_c_validateSequences, value);
+         CCtxParams->validateSequences = value;
+-        return CCtxParams->validateSequences;
++        return (size_t)CCtxParams->validateSequences;
+ 
+     case ZSTD_c_useBlockSplitter:
+         BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
+@@ -864,7 +937,28 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+     case ZSTD_c_deterministicRefPrefix:
+         BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+         CCtxParams->deterministicRefPrefix = !!value;
+-        return CCtxParams->deterministicRefPrefix;
++        return (size_t)CCtxParams->deterministicRefPrefix;
++
++    case ZSTD_c_prefetchCDictTables:
++        BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
++        CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
++        return CCtxParams->prefetchCDictTables;
++
++    case ZSTD_c_enableSeqProducerFallback:
++        BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
++        CCtxParams->enableMatchFinderFallback = value;
++        return (size_t)CCtxParams->enableMatchFinderFallback;
++
++    case ZSTD_c_maxBlockSize:
++        if (value!=0)    /* 0 ==> default */
++            BOUNDCHECK(ZSTD_c_maxBlockSize, value);
++        CCtxParams->maxBlockSize = value;
++        return CCtxParams->maxBlockSize;
++
++    case ZSTD_c_searchForExternalRepcodes:
++        BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
++        CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
++        return CCtxParams->searchForExternalRepcodes;
+ 
+     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+     }
+@@ -980,6 +1074,18 @@ size_t ZSTD_CCtxParams_getParameter(
+     case ZSTD_c_deterministicRefPrefix:
+         *value = (int)CCtxParams->deterministicRefPrefix;
+         break;
++    case ZSTD_c_prefetchCDictTables:
++        *value = (int)CCtxParams->prefetchCDictTables;
++        break;
++    case ZSTD_c_enableSeqProducerFallback:
++        *value = CCtxParams->enableMatchFinderFallback;
++        break;
++    case ZSTD_c_maxBlockSize:
++        *value = (int)CCtxParams->maxBlockSize;
++        break;
++    case ZSTD_c_searchForExternalRepcodes:
++        *value = (int)CCtxParams->searchForExternalRepcodes;
++        break;
+     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+     }
+     return 0;
+@@ -1006,9 +1112,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+     return 0;
+ }
+ 
++size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
++{
++    ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
++    DEBUGLOG(4, "ZSTD_CCtx_setCParams");
++    /* only update if all parameters are valid */
++    FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
++    return 0;
++}
++
++size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
++{
++    ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
++    DEBUGLOG(4, "ZSTD_CCtx_setFParams");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
++    FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
++    return 0;
++}
++
++size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
++{
++    DEBUGLOG(4, "ZSTD_CCtx_setParams");
++    /* First check cParams, because we want to update all or none. */
++    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
++    /* Next set fParams, because this could fail if the cctx isn't in init stage. */
++    FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
++    /* Finally set cParams, which should succeed. */
++    FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
++    return 0;
++}
++
+ size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+ {
+-    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
++    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
+     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                     "Can't set pledgedSrcSize when not in init stage.");
+     cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+@@ -1024,9 +1168,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
+         ZSTD_compressionParameters* cParams);
+ 
+ /*
+- * Initializes the local dict using the requested parameters.
+- * NOTE: This does not use the pledged src size, because it may be used for more
+- * than one compression.
++ * Initializes the local dictionary using requested parameters.
++ * NOTE: Initialization does not employ the pledged src size,
++ * because the dictionary may be used for multiple compressions.
+  */
+ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+ {
+@@ -1039,8 +1183,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+         return 0;
+     }
+     if (dl->cdict != NULL) {
+-        assert(cctx->cdict == dl->cdict);
+         /* Local dictionary already initialized. */
++        assert(cctx->cdict == dl->cdict);
+         return 0;
+     }
+     assert(dl->dictSize > 0);
+@@ -1060,26 +1204,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+ }
+ 
+ size_t ZSTD_CCtx_loadDictionary_advanced(
+-        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+-        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
++        ZSTD_CCtx* cctx,
++        const void* dict, size_t dictSize,
++        ZSTD_dictLoadMethod_e dictLoadMethod,
++        ZSTD_dictContentType_e dictContentType)
+ {
+-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+-                    "Can't load a dictionary when ctx is not in init stage.");
+     DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+-    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+-    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
++    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
++                    "Can't load a dictionary when cctx is not in init stage.");
++    ZSTD_clearAllDicts(cctx);  /* erase any previously set dictionary */
++    if (dict == NULL || dictSize == 0)  /* no dictionary */
+         return 0;
+     if (dictLoadMethod == ZSTD_dlm_byRef) {
+         cctx->localDict.dict = dict;
+     } else {
++        /* copy dictionary content inside CCtx to own its lifetime */
+         void* dictBuffer;
+         RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+-                        "no malloc for static CCtx");
++                        "static CCtx can't allocate for an internal copy of dictionary");
+         dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
+-        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
++        RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
++                        "allocation failed for dictionary content");
+         ZSTD_memcpy(dictBuffer, dict, dictSize);
+-        cctx->localDict.dictBuffer = dictBuffer;
+-        cctx->localDict.dict = dictBuffer;
++        cctx->localDict.dictBuffer = dictBuffer;  /* owned ptr to free */
++        cctx->localDict.dict = dictBuffer;        /* read-only reference */
+     }
+     cctx->localDict.dictSize = dictSize;
+     cctx->localDict.dictContentType = dictContentType;
+@@ -1149,7 +1297,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+     if ( (reset == ZSTD_reset_parameters)
+       || (reset == ZSTD_reset_session_and_parameters) ) {
+         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+-                        "Can't reset parameters only when not in init stage.");
++                        "Reset parameters is only possible during init stage.");
+         ZSTD_clearAllDicts(cctx);
+         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+     }
+@@ -1178,11 +1326,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+ static ZSTD_compressionParameters
+ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+ {
+-#   define CLAMP_TYPE(cParam, val, type) {                                \
+-        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+-        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+-        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+-    }
++#   define CLAMP_TYPE(cParam, val, type)                                      \
++        do {                                                                  \
++            ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
++            if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
++            else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
++        } while (0)
+ #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+@@ -1247,12 +1396,55 @@ static ZSTD_compressionParameters
+ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                             unsigned long long srcSize,
+                             size_t dictSize,
+-                            ZSTD_cParamMode_e mode)
++                            ZSTD_cParamMode_e mode,
++                            ZSTD_paramSwitch_e useRowMatchFinder)
+ {
+     const U64 minSrcSize = 513; /* (1<<9) + 1 */
+     const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+     assert(ZSTD_checkCParams(cPar)==0);
+ 
++    /* Cascade the selected strategy down to the next-highest one built into
++     * this binary. */
++#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_btultra2) {
++        cPar.strategy = ZSTD_btultra;
++    }
++    if (cPar.strategy == ZSTD_btultra) {
++        cPar.strategy = ZSTD_btopt;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_btopt) {
++        cPar.strategy = ZSTD_btlazy2;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_btlazy2) {
++        cPar.strategy = ZSTD_lazy2;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_lazy2) {
++        cPar.strategy = ZSTD_lazy;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_lazy) {
++        cPar.strategy = ZSTD_greedy;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_greedy) {
++        cPar.strategy = ZSTD_dfast;
++    }
++#endif
++#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
++    if (cPar.strategy == ZSTD_dfast) {
++        cPar.strategy = ZSTD_fast;
++        cPar.targetLength = 0;
++    }
++#endif
++
+     switch (mode) {
+     case ZSTD_cpm_unknown:
+     case ZSTD_cpm_noAttachDict:
+@@ -1281,8 +1473,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+     }
+ 
+     /* resize windowLog if input is small enough, to use less memory */
+-    if ( (srcSize < maxWindowResize)
+-      && (dictSize < maxWindowResize) )  {
++    if ( (srcSize <= maxWindowResize)
++      && (dictSize <= maxWindowResize) )  {
+         U32 const tSize = (U32)(srcSize + dictSize);
+         static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+         U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+@@ -1300,6 +1492,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+         cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+ 
++    /* We can't use more than 32 bits of hash in total, so that means that we require:
++     * (hashLog + 8) <= 32 && (chainLog + 8) <= 32
++     */
++    if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
++        U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
++        if (cPar.hashLog > maxShortCacheHashLog) {
++            cPar.hashLog = maxShortCacheHashLog;
++        }
++        if (cPar.chainLog > maxShortCacheHashLog) {
++            cPar.chainLog = maxShortCacheHashLog;
++        }
++    }
++
++
++    /* At this point, we aren't 100% sure if we are using the row match finder.
++     * Unless it is explicitly disabled, conservatively assume that it is enabled.
++     * In this case it will only be disabled for small sources, so shrinking the
++     * hash log a little bit shouldn't result in any ratio loss.
++     */
++    if (useRowMatchFinder == ZSTD_ps_auto)
++        useRowMatchFinder = ZSTD_ps_enable;
++
++    /* We can't hash more than 32-bits in total. So that means that we require:
++     * (hashLog - rowLog + 8) <= 32
++     */
++    if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
++        /* Switch to 32-entry rows if searchLog is 5 (or more) */
++        U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
++        U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
++        U32 const maxHashLog = maxRowHashLog + rowLog;
++        assert(cPar.hashLog >= rowLog);
++        if (cPar.hashLog > maxHashLog) {
++            cPar.hashLog = maxHashLog;
++        }
++    }
++
+     return cPar;
+ }
+ 
+@@ -1310,7 +1538,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+ {
+     cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+     if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+-    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
++    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
+ }
+ 
+ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+@@ -1341,7 +1569,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+     ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
+     assert(!ZSTD_checkCParams(cParams));
+     /* srcSizeHint == 0 means 0 */
+-    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
++    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
+ }
+ 
+ static size_t
+@@ -1367,10 +1595,10 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+       + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+       + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+       + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
++      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
++      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
+     size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+-                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
++                                            ? ZSTD_cwksp_aligned_alloc_size(hSize)
+                                             : 0;
+     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                 ? optPotentialSpace
+@@ -1386,6 +1614,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+     return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
+ }
+ 
++/* Helper function for calculating memory requirements.
++ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
++static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
++    U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
++    return blockSize / divider;
++}
++
+ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+         const ZSTD_compressionParameters* cParams,
+         const ldmParams_t* ldmParams,
+@@ -1393,12 +1628,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+         const ZSTD_paramSwitch_e useRowMatchFinder,
+         const size_t buffInSize,
+         const size_t buffOutSize,
+-        const U64 pledgedSrcSize)
++        const U64 pledgedSrcSize,
++        int useSequenceProducer,
++        size_t maxBlockSize)
+ {
+     size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
+-    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+-    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
+-    size_t const maxNbSeq = blockSize / divider;
++    size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
++    size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
+     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                             + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+@@ -1417,6 +1653,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+ 
+     size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+ 
++    size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
++    size_t const externalSeqSpace = useSequenceProducer
++        ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
++        : 0;
++
+     size_t const neededSpace =
+         cctxSpace +
+         entropySpace +
+@@ -1425,7 +1666,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+         ldmSeqSpace +
+         matchStateSize +
+         tokenSpace +
+-        bufferSpace;
++        bufferSpace +
++        externalSeqSpace;
+ 
+     DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+     return neededSpace;
+@@ -1443,7 +1685,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+      * be needed. However, we still allocate two 0-sized buffers, which can
+      * take space under ASAN. */
+     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+-        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
++        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
+ }
+ 
+ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+@@ -1493,7 +1735,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+     {   ZSTD_compressionParameters const cParams =
+                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+-        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
++        size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);
+         size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                 ? ((size_t)1 << cParams.windowLog) + blockSize
+                 : 0;
+@@ -1504,7 +1746,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+ 
+         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+             &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
+-            ZSTD_CONTENTSIZE_UNKNOWN);
++            ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
+     }
+ }
+ 
+@@ -1637,6 +1879,19 @@ typedef enum {
+     ZSTD_resetTarget_CCtx
+ } ZSTD_resetTarget_e;
+ 
++/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
++static U64 ZSTD_bitmix(U64 val, U64 len) {
++    val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
++    val *= 0x9FB21C651E98DF25ULL;
++    val ^= (val >> 35) + len ;
++    val *= 0x9FB21C651E98DF25ULL;
++    return val ^ (val >> 28);
++}
++
++/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
++static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
++    ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
++}
+ 
+ static size_t
+ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+@@ -1664,6 +1919,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+     }
+ 
+     ms->hashLog3 = hashLog3;
++    ms->lazySkipping = 0;
+ 
+     ZSTD_invalidateMatchState(ms);
+ 
+@@ -1685,22 +1941,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+         ZSTD_cwksp_clean_tables(ws);
+     }
+ 
+-    /* opt parser space */
+-    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+-        DEBUGLOG(4, "reserving optimal parser space");
+-        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+-        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+-        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+-        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+-        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+-        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+-    }
+-
+     if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+-        {   /* Row match finder needs an additional table of hashes ("tags") */
+-            size_t const tagTableSize = hSize*sizeof(U16);
+-            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+-            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
++        /* Row match finder needs an additional table of hashes ("tags") */
++        size_t const tagTableSize = hSize;
++        /* We want to generate a new salt in case we reset a Cctx, but we always want to use
++         * 0 when we reset a Cdict */
++        if(forWho == ZSTD_resetTarget_CCtx) {
++            ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
++            ZSTD_advanceHashSalt(ms);
++        } else {
++            /* When we are not salting we want to always memset the memory */
++            ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
++            ZSTD_memset(ms->tagTable, 0, tagTableSize);
++            ms->hashSalt = 0;
+         }
+         {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
+             U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
+@@ -1709,6 +1962,17 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+         }
+     }
+ 
++    /* opt parser space */
++    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
++        DEBUGLOG(4, "reserving optimal parser space");
++        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
++        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
++        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
++        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
++        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
++        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
++    }
++
+     ms->cParams = *cParams;
+ 
+     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+@@ -1768,6 +2032,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+     assert(params->useRowMatchFinder != ZSTD_ps_auto);
+     assert(params->useBlockSplitter != ZSTD_ps_auto);
+     assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
++    assert(params->maxBlockSize != 0);
+     if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
+         /* Adjust long distance matching parameters */
+         ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
+@@ -1776,9 +2041,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+     }
+ 
+     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
+-        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+-        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
+-        size_t const maxNbSeq = blockSize / divider;
++        size_t const blockSize = MIN(params->maxBlockSize, windowSize);
++        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
+         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
+                 ? ZSTD_compressBound(blockSize) + 1
+                 : 0;
+@@ -1795,8 +2059,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+         size_t const neededSpace =
+             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+                 &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
+-                buffInSize, buffOutSize, pledgedSrcSize);
+-        int resizeWorkspace;
++                buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
+ 
+         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
+ 
+@@ -1805,7 +2068,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+         {   /* Check if workspace is large enough, alloc a new one if needed */
+             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+-            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
++            int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
+             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+ 
+@@ -1838,6 +2101,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+ 
+         /* init params */
+         zc->blockState.matchState.cParams = params->cParams;
++        zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
+         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+         zc->consumedSrcSize = 0;
+         zc->producedCSize = 0;
+@@ -1854,13 +2118,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+ 
+         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+ 
++        FORWARD_IF_ERROR(ZSTD_reset_matchState(
++                &zc->blockState.matchState,
++                ws,
++                &params->cParams,
++                params->useRowMatchFinder,
++                crp,
++                needsIndexReset,
++                ZSTD_resetTarget_CCtx), "");
++
++        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
++
++        /* ldm hash table */
++        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
++            /* TODO: avoid memset? */
++            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
++            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
++            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
++            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
++            zc->maxNbLdmSequences = maxNbLdmSeq;
++
++            ZSTD_window_init(&zc->ldmState.window);
++            zc->ldmState.loadedDictEnd = 0;
++        }
++
++        /* reserve space for block-level external sequences */
++        if (ZSTD_hasExtSeqProd(params)) {
++            size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
++            zc->extSeqBufCapacity = maxNbExternalSeq;
++            zc->extSeqBuf =
++                (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
++        }
++
++        /* buffers */
++
+         /* ZSTD_wildcopy() is used to copy into the literals buffer,
+          * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+          */
+         zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+         zc->seqStore.maxNbLit = blockSize;
+ 
+-        /* buffers */
+         zc->bufferedPolicy = zbuff;
+         zc->inBuffSize = buffInSize;
+         zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+@@ -1883,32 +2180,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+         zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+         zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+         zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+-        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+-
+-        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+-            &zc->blockState.matchState,
+-            ws,
+-            &params->cParams,
+-            params->useRowMatchFinder,
+-            crp,
+-            needsIndexReset,
+-            ZSTD_resetTarget_CCtx), "");
+-
+-        /* ldm hash table */
+-        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
+-            /* TODO: avoid memset? */
+-            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
+-            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+-            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+-            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+-            zc->maxNbLdmSequences = maxNbLdmSeq;
+-
+-            ZSTD_window_init(&zc->ldmState.window);
+-            zc->ldmState.loadedDictEnd = 0;
+-        }
+ 
+         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+-        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
++        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
+ 
+         zc->initialized = 1;
+ 
+@@ -1980,7 +2254,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+         }
+ 
+         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+-                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
++                                                     cdict->dictContentSize, ZSTD_cpm_attachDict,
++                                                     params.useRowMatchFinder);
+         params.cParams.windowLog = windowLog;
+         params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
+         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+@@ -2019,6 +2294,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+     return 0;
+ }
+ 
++static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
++                                        ZSTD_compressionParameters const* cParams) {
++    if (ZSTD_CDictIndicesAreTagged(cParams)){
++        /* Remove tags from the CDict table if they are present.
++         * See docs on "short cache" in zstd_compress_internal.h for context. */
++        size_t i;
++        for (i = 0; i < tableSize; i++) {
++            U32 const taggedIndex = src[i];
++            U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
++            dst[i] = index;
++        }
++    } else {
++        ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
++    }
++}
++
+ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                             const ZSTD_CDict* cdict,
+                             ZSTD_CCtx_params params,
+@@ -2054,21 +2345,23 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                                                             : 0;
+         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+ 
+-        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
+-               cdict->matchState.hashTable,
+-               hSize * sizeof(U32));
++        ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
++                                cdict->matchState.hashTable,
++                                hSize, cdict_cParams);
++
+         /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+         if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+-            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+-               cdict->matchState.chainTable,
+-               chainSize * sizeof(U32));
++            ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
++                                    cdict->matchState.chainTable,
++                                    chainSize, cdict_cParams);
+         }
+         /* copy tag table */
+         if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+-            size_t const tagTableSize = hSize*sizeof(U16);
++            size_t const tagTableSize = hSize;
+             ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+-                cdict->matchState.tagTable,
+-                tagTableSize);
++                        cdict->matchState.tagTable,
++                        tagTableSize);
++            cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
+         }
+     }
+ 
+@@ -2147,6 +2440,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+         params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
+         params.ldmParams = srcCCtx->appliedParams.ldmParams;
+         params.fParams = fParams;
++        params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
+         ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
+                                 /* loadedDictSize */ 0,
+                                 ZSTDcrp_leaveDirty, zbuff);
+@@ -2294,7 +2588,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
+ 
+ /* See doc/zstd_compression_format.md for detailed format description */
+ 
+-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
++int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+ {
+     const seqDef* const sequences = seqStorePtr->sequencesStart;
+     BYTE* const llCodeTable = seqStorePtr->llCode;
+@@ -2302,18 +2596,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+     BYTE* const mlCodeTable = seqStorePtr->mlCode;
+     U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+     U32 u;
++    int longOffsets = 0;
+     assert(nbSeq <= seqStorePtr->maxNbSeq);
+     for (u=0; u<nbSeq; u++) {
+         U32 const llv = sequences[u].litLength;
++        U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);
+         U32 const mlv = sequences[u].mlBase;
+         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+-        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
++        ofCodeTable[u] = (BYTE)ofCode;
+         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
++        assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));
++        if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)
++            longOffsets = 1;
+     }
+     if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
+         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+     if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
+         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
++    return longOffsets;
+ }
+ 
+ /* ZSTD_useTargetCBlockSize():
+@@ -2347,6 +2647,7 @@ typedef struct {
+     U32 MLtype;
+     size_t size;
+     size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
++    int longOffsets;
+ } ZSTD_symbolEncodingTypeStats_t;
+ 
+ /* ZSTD_buildSequencesStatistics():
+@@ -2357,11 +2658,13 @@ typedef struct {
+  * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
+  */
+ static ZSTD_symbolEncodingTypeStats_t
+-ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+-                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+-                              BYTE* dst, const BYTE* const dstEnd,
+-                              ZSTD_strategy strategy, unsigned* countWorkspace,
+-                              void* entropyWorkspace, size_t entropyWkspSize) {
++ZSTD_buildSequencesStatistics(
++                const seqStore_t* seqStorePtr, size_t nbSeq,
++                const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
++                      BYTE* dst, const BYTE* const dstEnd,
++                      ZSTD_strategy strategy, unsigned* countWorkspace,
++                      void* entropyWorkspace, size_t entropyWkspSize)
++{
+     BYTE* const ostart = dst;
+     const BYTE* const oend = dstEnd;
+     BYTE* op = ostart;
+@@ -2375,7 +2678,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+ 
+     stats.lastCountSize = 0;
+     /* convert length/distances into codes */
+-    ZSTD_seqToCodes(seqStorePtr);
++    stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
+     assert(op <= oend);
+     assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
+     /* build CTable for Literal Lengths */
+@@ -2480,22 +2783,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+  */
+ #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
+ MEM_STATIC size_t
+-ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+-                          const ZSTD_entropyCTables_t* prevEntropy,
+-                                ZSTD_entropyCTables_t* nextEntropy,
+-                          const ZSTD_CCtx_params* cctxParams,
+-                                void* dst, size_t dstCapacity,
+-                                void* entropyWorkspace, size_t entropyWkspSize,
+-                          const int bmi2)
++ZSTD_entropyCompressSeqStore_internal(
++                        const seqStore_t* seqStorePtr,
++                        const ZSTD_entropyCTables_t* prevEntropy,
++                              ZSTD_entropyCTables_t* nextEntropy,
++                        const ZSTD_CCtx_params* cctxParams,
++                              void* dst, size_t dstCapacity,
++                              void* entropyWorkspace, size_t entropyWkspSize,
++                        const int bmi2)
+ {
+-    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+     unsigned* count = (unsigned*)entropyWorkspace;
+     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+     const seqDef* const sequences = seqStorePtr->sequencesStart;
+-    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
++    const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+     const BYTE* const llCodeTable = seqStorePtr->llCode;
+     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+@@ -2503,29 +2806,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+     BYTE* const oend = ostart + dstCapacity;
+     BYTE* op = ostart;
+     size_t lastCountSize;
++    int longOffsets = 0;
+ 
+     entropyWorkspace = count + (MaxSeq + 1);
+     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
+ 
+-    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
++    DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
+     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
+ 
+     /* Compress literals */
+     {   const BYTE* const literals = seqStorePtr->litStart;
+-        size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+-        size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
++        size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
++        size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
+         /* Base suspicion of uncompressibility on ratio of literals to sequences */
+         unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
+         size_t const litSize = (size_t)(seqStorePtr->lit - literals);
++
+         size_t const cSize = ZSTD_compressLiterals(
+-                                    &prevEntropy->huf, &nextEntropy->huf,
+-                                    cctxParams->cParams.strategy,
+-                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
+                                     op, dstCapacity,
+                                     literals, litSize,
+                                     entropyWorkspace, entropyWkspSize,
+-                                    bmi2, suspectUncompressible);
++                                    &prevEntropy->huf, &nextEntropy->huf,
++                                    cctxParams->cParams.strategy,
++                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
++                                    suspectUncompressible, bmi2);
+         FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+         assert(cSize <= dstCapacity);
+         op += cSize;
+@@ -2551,11 +2856,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+         return (size_t)(op - ostart);
+     }
+-    {
+-        ZSTD_symbolEncodingTypeStats_t stats;
+-        BYTE* seqHead = op++;
++    {   BYTE* const seqHead = op++;
+         /* build stats for sequences */
+-        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
++        const ZSTD_symbolEncodingTypeStats_t stats =
++                ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                               op, oend,
+                                               strategy, count,
+@@ -2564,6 +2868,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+         *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
+         lastCountSize = stats.lastCountSize;
+         op += stats.size;
++        longOffsets = stats.longOffsets;
+     }
+ 
+     {   size_t const bitstreamSize = ZSTD_encodeSequences(
+@@ -2598,14 +2903,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+ }
+ 
+ MEM_STATIC size_t
+-ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
+-                       const ZSTD_entropyCTables_t* prevEntropy,
+-                             ZSTD_entropyCTables_t* nextEntropy,
+-                       const ZSTD_CCtx_params* cctxParams,
+-                             void* dst, size_t dstCapacity,
+-                             size_t srcSize,
+-                             void* entropyWorkspace, size_t entropyWkspSize,
+-                             int bmi2)
++ZSTD_entropyCompressSeqStore(
++                    const seqStore_t* seqStorePtr,
++                    const ZSTD_entropyCTables_t* prevEntropy,
++                          ZSTD_entropyCTables_t* nextEntropy,
++                    const ZSTD_CCtx_params* cctxParams,
++                          void* dst, size_t dstCapacity,
++                          size_t srcSize,
++                          void* entropyWorkspace, size_t entropyWkspSize,
++                          int bmi2)
+ {
+     size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
+                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+@@ -2615,15 +2921,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
+     /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+      * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+      */
+-    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
++    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {
++        DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
+         return 0;  /* block not compressed */
++    }
+     FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
+ 
+     /* Check compressibility */
+     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+         if (cSize >= maxCSize) return 0;  /* block not compressed */
+     }
+-    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
++    DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
++    /* libzstd decoder before  > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
++     * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
++     */
++    assert(cSize < ZSTD_BLOCKSIZE_MAX);
+     return cSize;
+ }
+ 
+@@ -2635,40 +2947,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
+     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
+         { ZSTD_compressBlock_fast  /* default for 0 */,
+           ZSTD_compressBlock_fast,
+-          ZSTD_compressBlock_doubleFast,
+-          ZSTD_compressBlock_greedy,
+-          ZSTD_compressBlock_lazy,
+-          ZSTD_compressBlock_lazy2,
+-          ZSTD_compressBlock_btlazy2,
+-          ZSTD_compressBlock_btopt,
+-          ZSTD_compressBlock_btultra,
+-          ZSTD_compressBlock_btultra2 },
++          ZSTD_COMPRESSBLOCK_DOUBLEFAST,
++          ZSTD_COMPRESSBLOCK_GREEDY,
++          ZSTD_COMPRESSBLOCK_LAZY,
++          ZSTD_COMPRESSBLOCK_LAZY2,
++          ZSTD_COMPRESSBLOCK_BTLAZY2,
++          ZSTD_COMPRESSBLOCK_BTOPT,
++          ZSTD_COMPRESSBLOCK_BTULTRA,
++          ZSTD_COMPRESSBLOCK_BTULTRA2
++        },
+         { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+           ZSTD_compressBlock_fast_extDict,
+-          ZSTD_compressBlock_doubleFast_extDict,
+-          ZSTD_compressBlock_greedy_extDict,
+-          ZSTD_compressBlock_lazy_extDict,
+-          ZSTD_compressBlock_lazy2_extDict,
+-          ZSTD_compressBlock_btlazy2_extDict,
+-          ZSTD_compressBlock_btopt_extDict,
+-          ZSTD_compressBlock_btultra_extDict,
+-          ZSTD_compressBlock_btultra_extDict },
++          ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
++          ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
++          ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
++          ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
++          ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
++          ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
++          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
++          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
++        },
+         { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+           ZSTD_compressBlock_fast_dictMatchState,
+-          ZSTD_compressBlock_doubleFast_dictMatchState,
+-          ZSTD_compressBlock_greedy_dictMatchState,
+-          ZSTD_compressBlock_lazy_dictMatchState,
+-          ZSTD_compressBlock_lazy2_dictMatchState,
+-          ZSTD_compressBlock_btlazy2_dictMatchState,
+-          ZSTD_compressBlock_btopt_dictMatchState,
+-          ZSTD_compressBlock_btultra_dictMatchState,
+-          ZSTD_compressBlock_btultra_dictMatchState },
++          ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
++          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
++        },
+         { NULL  /* default for 0 */,
+           NULL,
+           NULL,
+-          ZSTD_compressBlock_greedy_dedicatedDictSearch,
+-          ZSTD_compressBlock_lazy_dedicatedDictSearch,
+-          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
++          ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
++          ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
++          ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
+           NULL,
+           NULL,
+           NULL,
+@@ -2681,18 +2996,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
+     DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+     if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+         static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+-            { ZSTD_compressBlock_greedy_row,
+-            ZSTD_compressBlock_lazy_row,
+-            ZSTD_compressBlock_lazy2_row },
+-            { ZSTD_compressBlock_greedy_extDict_row,
+-            ZSTD_compressBlock_lazy_extDict_row,
+-            ZSTD_compressBlock_lazy2_extDict_row },
+-            { ZSTD_compressBlock_greedy_dictMatchState_row,
+-            ZSTD_compressBlock_lazy_dictMatchState_row,
+-            ZSTD_compressBlock_lazy2_dictMatchState_row },
+-            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+-            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+-            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
++            {
++                ZSTD_COMPRESSBLOCK_GREEDY_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY2_ROW
++            },
++            {
++                ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
++            },
++            {
++                ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
++            },
++            {
++                ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
++                ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
++            }
+         };
+         DEBUGLOG(4, "Selecting a row-based matchfinder");
+         assert(useRowMatchFinder != ZSTD_ps_auto);
+@@ -2718,6 +3041,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+     ssPtr->longLengthType = ZSTD_llt_none;
+ }
+ 
++/* ZSTD_postProcessSequenceProducerResult() :
++ * Validates and post-processes sequences obtained through the external matchfinder API:
++ *   - Checks whether nbExternalSeqs represents an error condition.
++ *   - Appends a block delimiter to outSeqs if one is not already present.
++ *     See zstd.h for context regarding block delimiters.
++ * Returns the number of sequences after post-processing, or an error code. */
++static size_t ZSTD_postProcessSequenceProducerResult(
++    ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
++) {
++    RETURN_ERROR_IF(
++        nbExternalSeqs > outSeqsCapacity,
++        sequenceProducer_failed,
++        "External sequence producer returned error code %lu",
++        (unsigned long)nbExternalSeqs
++    );
++
++    RETURN_ERROR_IF(
++        nbExternalSeqs == 0 && srcSize > 0,
++        sequenceProducer_failed,
++        "Got zero sequences from external sequence producer for a non-empty src buffer!"
++    );
++
++    if (srcSize == 0) {
++        ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
++        return 1;
++    }
++
++    {
++        ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
++
++        /* We can return early if lastSeq is already a block delimiter. */
++        if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
++            return nbExternalSeqs;
++        }
++
++        /* This error condition is only possible if the external matchfinder
++         * produced an invalid parse, by definition of ZSTD_sequenceBound(). */
++        RETURN_ERROR_IF(
++            nbExternalSeqs == outSeqsCapacity,
++            sequenceProducer_failed,
++            "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
++        );
++
++        /* lastSeq is not a block delimiter, so we need to append one. */
++        ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
++        return nbExternalSeqs + 1;
++    }
++}
++
++/* ZSTD_fastSequenceLengthSum() :
++ * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
++ * Similar to another function in zstd_compress.c (determine_blockSize),
++ * except it doesn't check for a block delimiter to end summation.
++ * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
++ * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
++static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
++    size_t matchLenSum, litLenSum, i;
++    matchLenSum = 0;
++    litLenSum = 0;
++    for (i = 0; i < seqBufSize; i++) {
++        litLenSum += seqBuf[i].litLength;
++        matchLenSum += seqBuf[i].matchLength;
++    }
++    return litLenSum + matchLenSum;
++}
++
+ typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+ 
+ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+@@ -2727,7 +3116,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+     /* Assert that we have correctly flushed the ctx params into the ms's copy */
+     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+-    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
++    /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
++     * additional 1. We need to revisit and change this logic to be more consistent */
++    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
+         if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+             ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+         } else {
+@@ -2763,6 +3154,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+         }
+         if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+             assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
++
++            /* External matchfinder + LDM is technically possible, just not implemented yet.
++             * We need to revisit soon and implement it. */
++            RETURN_ERROR_IF(
++                ZSTD_hasExtSeqProd(&zc->appliedParams),
++                parameter_combination_unsupported,
++                "Long-distance matching with external sequence producer enabled is not currently supported."
++            );
++
+             /* Updates ldmSeqStore.pos */
+             lastLLSize =
+                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
+@@ -2774,6 +3174,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+         } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
+             rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+ 
++            /* External matchfinder + LDM is technically possible, just not implemented yet.
++             * We need to revisit soon and implement it. */
++            RETURN_ERROR_IF(
++                ZSTD_hasExtSeqProd(&zc->appliedParams),
++                parameter_combination_unsupported,
++                "Long-distance matching with external sequence producer enabled is not currently supported."
++            );
++
+             ldmSeqStore.seq = zc->ldmSequences;
+             ldmSeqStore.capacity = zc->maxNbLdmSequences;
+             /* Updates ldmSeqStore.size */
+@@ -2788,10 +3196,74 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+                                        zc->appliedParams.useRowMatchFinder,
+                                        src, srcSize);
+             assert(ldmSeqStore.pos == ldmSeqStore.size);
+-        } else {   /* not long range mode */
+-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+-                                                                                    zc->appliedParams.useRowMatchFinder,
+-                                                                                    dictMode);
++        } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
++            assert(
++                zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
++            );
++            assert(zc->appliedParams.extSeqProdFunc != NULL);
++
++            {   U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
++
++                size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
++                    zc->appliedParams.extSeqProdState,
++                    zc->extSeqBuf,
++                    zc->extSeqBufCapacity,
++                    src, srcSize,
++                    NULL, 0,  /* dict and dictSize, currently not supported */
++                    zc->appliedParams.compressionLevel,
++                    windowSize
++                );
++
++                size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
++                    zc->extSeqBuf,
++                    nbExternalSeqs,
++                    zc->extSeqBufCapacity,
++                    srcSize
++                );
++
++                /* Return early if there is no error, since we don't need to worry about last literals */
++                if (!ZSTD_isError(nbPostProcessedSeqs)) {
++                    ZSTD_sequencePosition seqPos = {0,0,0};
++                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
++                    RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
++                    FORWARD_IF_ERROR(
++                        ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
++                            zc, &seqPos,
++                            zc->extSeqBuf, nbPostProcessedSeqs,
++                            src, srcSize,
++                            zc->appliedParams.searchForExternalRepcodes
++                        ),
++                        "Failed to copy external sequences to seqStore!"
++                    );
++                    ms->ldmSeqStore = NULL;
++                    DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
++                    return ZSTDbss_compress;
++                }
++
++                /* Propagate the error if fallback is disabled */
++                if (!zc->appliedParams.enableMatchFinderFallback) {
++                    return nbPostProcessedSeqs;
++                }
++
++                /* Fallback to software matchfinder */
++                {   ZSTD_blockCompressor const blockCompressor =
++                        ZSTD_selectBlockCompressor(
++                            zc->appliedParams.cParams.strategy,
++                            zc->appliedParams.useRowMatchFinder,
++                            dictMode);
++                    ms->ldmSeqStore = NULL;
++                    DEBUGLOG(
++                        5,
++                        "External sequence producer returned error code %lu. Falling back to internal parser.",
++                        (unsigned long)nbExternalSeqs
++                    );
++                    lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
++            }   }
++        } else {   /* not long range mode and no external matchfinder */
++            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
++                    zc->appliedParams.cParams.strategy,
++                    zc->appliedParams.useRowMatchFinder,
++                    dictMode);
+             ms->ldmSeqStore = NULL;
+             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+         }
+@@ -2801,29 +3273,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+     return ZSTDbss_compress;
+ }
+ 
+-static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
++static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
+ {
+-    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+-    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+-    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+-    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+-    size_t literalsRead = 0;
+-    size_t lastLLSize;
++    const seqDef* inSeqs = seqStore->sequencesStart;
++    const size_t nbInSequences = seqStore->sequences - inSeqs;
++    const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
+ 
+-    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
++    ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
++    const size_t nbOutSequences = nbInSequences + 1;
++    size_t nbOutLiterals = 0;
++    repcodes_t repcodes;
+     size_t i;
+-    repcodes_t updatedRepcodes;
+ 
+-    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+-    /* Ensure we have enough space for last literals "sequence" */
+-    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+-    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+-    for (i = 0; i < seqStoreSeqSize; ++i) {
+-        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
+-        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+-        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
++    /* Bounds check that we have enough space for every input sequence
++     * and the block delimiter
++     */
++    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
++    RETURN_ERROR_IF(
++        nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
++        dstSize_tooSmall,
++        "Not enough space to copy sequences");
++
++    ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
++    for (i = 0; i < nbInSequences; ++i) {
++        U32 rawOffset;
++        outSeqs[i].litLength = inSeqs[i].litLength;
++        outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
+         outSeqs[i].rep = 0;
+ 
++        /* Handle the possible single length >= 64K
++         * There can only be one because we add MINMATCH to every match length,
++         * and blocks are at most 128K.
++         */
+         if (i == seqStore->longLengthPos) {
+             if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+                 outSeqs[i].litLength += 0x10000;
+@@ -2832,37 +3313,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+             }
+         }
+ 
+-        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
+-            /* Derive the correct offset corresponding to a repcode */
+-            outSeqs[i].rep = seqStoreSeqs[i].offBase;
++        /* Determine the raw offset given the offBase, which may be a repcode. */
++        if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
++            const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
++            assert(repcode > 0);
++            outSeqs[i].rep = repcode;
+             if (outSeqs[i].litLength != 0) {
+-                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
++                rawOffset = repcodes.rep[repcode - 1];
+             } else {
+-                if (outSeqs[i].rep == 3) {
+-                    rawOffset = updatedRepcodes.rep[0] - 1;
++                if (repcode == 3) {
++                    assert(repcodes.rep[0] > 1);
++                    rawOffset = repcodes.rep[0] - 1;
+                 } else {
+-                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
++                    rawOffset = repcodes.rep[repcode];
+                 }
+             }
++        } else {
++            rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
+         }
+         outSeqs[i].offset = rawOffset;
+-        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+-           so we provide seqStoreSeqs[i].offset - 1 */
+-        ZSTD_updateRep(updatedRepcodes.rep,
+-                       seqStoreSeqs[i].offBase - 1,
+-                       seqStoreSeqs[i].litLength == 0);
+-        literalsRead += outSeqs[i].litLength;
++
++        /* Update repcode history for the sequence */
++        ZSTD_updateRep(repcodes.rep,
++                       inSeqs[i].offBase,
++                       inSeqs[i].litLength == 0);
++
++        nbOutLiterals += outSeqs[i].litLength;
+     }
+     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+      * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+      * for the block boundary, according to the API.
+      */
+-    assert(seqStoreLiteralsSize >= literalsRead);
+-    lastLLSize = seqStoreLiteralsSize - literalsRead;
+-    outSeqs[i].litLength = (U32)lastLLSize;
+-    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+-    seqStoreSeqSize++;
+-    zc->seqCollector.seqIndex += seqStoreSeqSize;
++    assert(nbInLiterals >= nbOutLiterals);
++    {
++        const size_t lastLLSize = nbInLiterals - nbOutLiterals;
++        outSeqs[nbInSequences].litLength = (U32)lastLLSize;
++        outSeqs[nbInSequences].matchLength = 0;
++        outSeqs[nbInSequences].offset = 0;
++        assert(nbOutSequences == nbInSequences + 1);
++    }
++    seqCollector->seqIndex += nbOutSequences;
++    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
++
++    return 0;
++}
++
++size_t ZSTD_sequenceBound(size_t srcSize) {
++    const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
++    const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
++    return maxNbSeq + maxNbDelims;
+ }
+ 
+ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+@@ -2871,6 +3370,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+     const size_t dstCapacity = ZSTD_compressBound(srcSize);
+     void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
+     SeqCollector seqCollector;
++    {
++        int targetCBlockSize;
++        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
++        RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
++    }
++    {
++        int nbWorkers;
++        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
++        RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
++    }
+ 
+     RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+ 
+@@ -2880,8 +3389,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+     seqCollector.maxSequences = outSeqsSize;
+     zc->seqCollector = seqCollector;
+ 
+-    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+-    ZSTD_customFree(dst, ZSTD_defaultCMem);
++    {
++        const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
++        ZSTD_customFree(dst, ZSTD_defaultCMem);
++        FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
++    }
++    assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
+     return zc->seqCollector.seqIndex;
+ }
+ 
+@@ -2910,19 +3423,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) {
+     const size_t unrollMask = unrollSize - 1;
+     const size_t prefixLength = length & unrollMask;
+     size_t i;
+-    size_t u;
+     if (length == 1) return 1;
+     /* Check if prefix is RLE first before using unrolled loop */
+     if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+         return 0;
+     }
+     for (i = prefixLength; i != length; i += unrollSize) {
++        size_t u;
+         for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+             if (MEM_readST(ip + i + u) != valueST) {
+                 return 0;
+-            }
+-        }
+-    }
++    }   }   }
+     return 1;
+ }
+ 
+@@ -2938,7 +3449,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+     return nbSeqs < 4 && nbLits < 10;
+ }
+ 
+-static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
++static void
++ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
+ {
+     ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+     bs->prevCBlock = bs->nextCBlock;
+@@ -2946,7 +3458,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c
+ }
+ 
+ /* Writes the block header */
+-static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
++static void
++writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
++{
+     U32 const cBlockHeader = cSize == 1 ?
+                         lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                         lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+@@ -2959,13 +3473,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
+  *  Stores literals block type (raw, rle, compressed, repeat) and
+  *  huffman description table to hufMetadata.
+  *  Requires ENTROPY_WORKSPACE_SIZE workspace
+- *  @return : size of huffman description table or error code */
+-static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+-                                            const ZSTD_hufCTables_t* prevHuf,
+-                                                  ZSTD_hufCTables_t* nextHuf,
+-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+-                                                  const int literalsCompressionIsDisabled,
+-                                                  void* workspace, size_t wkspSize)
++ * @return : size of huffman description table, or an error code
++ */
++static size_t
++ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
++                               const ZSTD_hufCTables_t* prevHuf,
++                                     ZSTD_hufCTables_t* nextHuf,
++                                     ZSTD_hufCTablesMetadata_t* hufMetadata,
++                               const int literalsCompressionIsDisabled,
++                                     void* workspace, size_t wkspSize,
++                                     int hufFlags)
+ {
+     BYTE* const wkspStart = (BYTE*)workspace;
+     BYTE* const wkspEnd = wkspStart + wkspSize;
+@@ -2973,9 +3490,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
+     unsigned* const countWksp = (unsigned*)workspace;
+     const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+     BYTE* const nodeWksp = countWkspStart + countWkspSize;
+-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
++    const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);
+     unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
++    unsigned huffLog = LitHufLog;
+     HUF_repeat repeat = prevHuf->repeatMode;
+     DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
+ 
+@@ -2990,73 +3507,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
+ 
+     /* small ? don't even attempt compression (speed opt) */
+ #ifndef COMPRESS_LITERALS_SIZE_MIN
+-#define COMPRESS_LITERALS_SIZE_MIN 63
++# define COMPRESS_LITERALS_SIZE_MIN 63  /* heuristic */
+ #endif
+     {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+         if (srcSize <= minLitSize) {
+             DEBUGLOG(5, "set_basic - too small");
+             hufMetadata->hType = set_basic;
+             return 0;
+-        }
+-    }
++    }   }
+ 
+     /* Scan input and build symbol stats */
+-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
++    {   size_t const largest =
++            HIST_count_wksp (countWksp, &maxSymbolValue,
++                            (const BYTE*)src, srcSize,
++                            workspace, wkspSize);
+         FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+         if (largest == srcSize) {
++            /* only one literal symbol */
+             DEBUGLOG(5, "set_rle");
+             hufMetadata->hType = set_rle;
+             return 0;
+         }
+         if (largest <= (srcSize >> 7)+4) {
++            /* heuristic: likely not compressible */
+             DEBUGLOG(5, "set_basic - no gain");
+             hufMetadata->hType = set_basic;
+             return 0;
+-        }
+-    }
++    }   }
+ 
+     /* Validate the previous Huffman table */
+-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
++    if (repeat == HUF_repeat_check
++      && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+         repeat = HUF_repeat_none;
+     }
+ 
+     /* Build Huffman Tree */
+     ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
++    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);
++    assert(huffLog <= LitHufLog);
+     {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                     maxSymbolValue, huffLog,
+                                                     nodeWksp, nodeWkspSize);
+         FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+         huffLog = (U32)maxBits;
+-        {   /* Build and write the CTable */
+-            size_t const newCSize = HUF_estimateCompressedSize(
+-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+-            size_t const hSize = HUF_writeCTable_wksp(
+-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+-                    nodeWksp, nodeWkspSize);
+-            /* Check against repeating the previous CTable */
+-            if (repeat != HUF_repeat_none) {
+-                size_t const oldCSize = HUF_estimateCompressedSize(
+-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+-                    DEBUGLOG(5, "set_repeat - smaller");
+-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+-                    hufMetadata->hType = set_repeat;
+-                    return 0;
+-                }
+-            }
+-            if (newCSize + hSize >= srcSize) {
+-                DEBUGLOG(5, "set_basic - no gains");
++    }
++    {   /* Build and write the CTable */
++        size_t const newCSize = HUF_estimateCompressedSize(
++                (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
++        size_t const hSize = HUF_writeCTable_wksp(
++                hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
++                (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
++                nodeWksp, nodeWkspSize);
++        /* Check against repeating the previous CTable */
++        if (repeat != HUF_repeat_none) {
++            size_t const oldCSize = HUF_estimateCompressedSize(
++                    (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
++            if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
++                DEBUGLOG(5, "set_repeat - smaller");
+                 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+-                hufMetadata->hType = set_basic;
++                hufMetadata->hType = set_repeat;
+                 return 0;
+-            }
+-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+-            hufMetadata->hType = set_compressed;
+-            nextHuf->repeatMode = HUF_repeat_check;
+-            return hSize;
++        }   }
++        if (newCSize + hSize >= srcSize) {
++            DEBUGLOG(5, "set_basic - no gains");
++            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
++            hufMetadata->hType = set_basic;
++            return 0;
+         }
++        DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
++        hufMetadata->hType = set_compressed;
++        nextHuf->repeatMode = HUF_repeat_check;
++        return hSize;
+     }
+ }
+ 
+@@ -3066,8 +3587,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
+  * and updates nextEntropy to the appropriate repeatMode.
+  */
+ static ZSTD_symbolEncodingTypeStats_t
+-ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+-    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
++ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
++{
++    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
+     nextEntropy->litlength_repeatMode = FSE_repeat_none;
+     nextEntropy->offcode_repeatMode = FSE_repeat_none;
+     nextEntropy->matchlength_repeatMode = FSE_repeat_none;
+@@ -3078,16 +3600,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+  *  Builds entropy for the sequences.
+  *  Stores symbol compression modes and fse table to fseMetadata.
+  *  Requires ENTROPY_WORKSPACE_SIZE wksp.
+- *  @return : size of fse tables or error code */
+-static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+-                                              const ZSTD_fseCTables_t* prevEntropy,
+-                                                    ZSTD_fseCTables_t* nextEntropy,
+-                                              const ZSTD_CCtx_params* cctxParams,
+-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+-                                                    void* workspace, size_t wkspSize)
++ * @return : size of fse tables or error code */
++static size_t
++ZSTD_buildBlockEntropyStats_sequences(
++                const seqStore_t* seqStorePtr,
++                const ZSTD_fseCTables_t* prevEntropy,
++                      ZSTD_fseCTables_t* nextEntropy,
++                const ZSTD_CCtx_params* cctxParams,
++                      ZSTD_fseCTablesMetadata_t* fseMetadata,
++                      void* workspace, size_t wkspSize)
+ {
+     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
++    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+     BYTE* const ostart = fseMetadata->fseTablesBuffer;
+     BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+     BYTE* op = ostart;
+@@ -3114,23 +3638,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+ /* ZSTD_buildBlockEntropyStats() :
+  *  Builds entropy for the block.
+  *  Requires workspace size ENTROPY_WORKSPACE_SIZE
+- *
+- *  @return : 0 on success or error code
++ * @return : 0 on success, or an error code
++ *  Note : also employed in superblock
+  */
+-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+-                             const ZSTD_entropyCTables_t* prevEntropy,
+-                                   ZSTD_entropyCTables_t* nextEntropy,
+-                             const ZSTD_CCtx_params* cctxParams,
+-                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+-                                   void* workspace, size_t wkspSize)
+-{
+-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
++size_t ZSTD_buildBlockEntropyStats(
++            const seqStore_t* seqStorePtr,
++            const ZSTD_entropyCTables_t* prevEntropy,
++                  ZSTD_entropyCTables_t* nextEntropy,
++            const ZSTD_CCtx_params* cctxParams,
++                  ZSTD_entropyCTablesMetadata_t* entropyMetadata,
++                  void* workspace, size_t wkspSize)
++{
++    size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
++    int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
++    int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;
++
+     entropyMetadata->hufMetadata.hufDesSize =
+         ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
+                                             &prevEntropy->huf, &nextEntropy->huf,
+                                             &entropyMetadata->hufMetadata,
+                                             ZSTD_literalsCompressionIsDisabled(cctxParams),
+-                                            workspace, wkspSize);
++                                            workspace, wkspSize, hufFlags);
++
+     FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
+     entropyMetadata->fseMetadata.fseTablesSize =
+         ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
+@@ -3143,11 +3672,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+ }
+ 
+ /* Returns the size estimate for the literals section (header + content) of a block */
+-static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+-                                                const ZSTD_hufCTables_t* huf,
+-                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+-                                                void* workspace, size_t wkspSize,
+-                                                int writeEntropy)
++static size_t
++ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
++                               const ZSTD_hufCTables_t* huf,
++                               const ZSTD_hufCTablesMetadata_t* hufMetadata,
++                               void* workspace, size_t wkspSize,
++                               int writeEntropy)
+ {
+     unsigned* const countWksp = (unsigned*)workspace;
+     unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+@@ -3169,12 +3699,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
+ }
+ 
+ /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
+-static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+-                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+-                        const FSE_CTable* fseCTable,
+-                        const U8* additionalBits,
+-                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+-                        void* workspace, size_t wkspSize)
++static size_t
++ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
++                    const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
++                    const FSE_CTable* fseCTable,
++                    const U8* additionalBits,
++                    short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
++                    void* workspace, size_t wkspSize)
+ {
+     unsigned* const countWksp = (unsigned*)workspace;
+     const BYTE* ctp = codeTable;
+@@ -3206,99 +3737,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+ }
+ 
+ /* Returns the size estimate for the sequences section (header + content) of a block */
+-static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+-                                                  const BYTE* llCodeTable,
+-                                                  const BYTE* mlCodeTable,
+-                                                  size_t nbSeq,
+-                                                  const ZSTD_fseCTables_t* fseTables,
+-                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+-                                                  void* workspace, size_t wkspSize,
+-                                                  int writeEntropy)
++static size_t
++ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
++                                 const BYTE* llCodeTable,
++                                 const BYTE* mlCodeTable,
++                                 size_t nbSeq,
++                                 const ZSTD_fseCTables_t* fseTables,
++                                 const ZSTD_fseCTablesMetadata_t* fseMetadata,
++                                 void* workspace, size_t wkspSize,
++                                 int writeEntropy)
+ {
+     size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
+     size_t cSeqSizeEstimate = 0;
+     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
+-                                         fseTables->offcodeCTable, NULL,
+-                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+-                                         workspace, wkspSize);
++                                    fseTables->offcodeCTable, NULL,
++                                    OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
++                                    workspace, wkspSize);
+     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
+-                                         fseTables->litlengthCTable, LL_bits,
+-                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+-                                         workspace, wkspSize);
++                                    fseTables->litlengthCTable, LL_bits,
++                                    LL_defaultNorm, LL_defaultNormLog, MaxLL,
++                                    workspace, wkspSize);
+     cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
+-                                         fseTables->matchlengthCTable, ML_bits,
+-                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+-                                         workspace, wkspSize);
++                                    fseTables->matchlengthCTable, ML_bits,
++                                    ML_defaultNorm, ML_defaultNormLog, MaxML,
++                                    workspace, wkspSize);
+     if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+     return cSeqSizeEstimate + sequencesSectionHeaderSize;
+ }
+ 
+ /* Returns the size estimate for a given stream of literals, of, ll, ml */
+-static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+-                                     const BYTE* ofCodeTable,
+-                                     const BYTE* llCodeTable,
+-                                     const BYTE* mlCodeTable,
+-                                     size_t nbSeq,
+-                                     const ZSTD_entropyCTables_t* entropy,
+-                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+-                                     void* workspace, size_t wkspSize,
+-                                     int writeLitEntropy, int writeSeqEntropy) {
++static size_t
++ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
++                       const BYTE* ofCodeTable,
++                       const BYTE* llCodeTable,
++                       const BYTE* mlCodeTable,
++                       size_t nbSeq,
++                       const ZSTD_entropyCTables_t* entropy,
++                       const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
++                       void* workspace, size_t wkspSize,
++                       int writeLitEntropy, int writeSeqEntropy)
++{
+     size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
+-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+-                                                         workspace, wkspSize, writeLitEntropy);
++                                    &entropy->huf, &entropyMetadata->hufMetadata,
++                                    workspace, wkspSize, writeLitEntropy);
+     size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+-                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+-                                                         workspace, wkspSize, writeSeqEntropy);
++                                    nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
++                                    workspace, wkspSize, writeSeqEntropy);
+     return seqSize + literalsSize + ZSTD_blockHeaderSize;
+ }
+ 
+ /* Builds entropy statistics and uses them for blocksize estimation.
+  *
+- * Returns the estimated compressed size of the seqStore, or a zstd error.
++ * @return: estimated compressed size of the seqStore, or a zstd error.
+  */
+-static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
+-    ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
++static size_t
++ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
++{
++    ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
+     DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
+     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
+                     &zc->blockState.prevCBlock->entropy,
+                     &zc->blockState.nextCBlock->entropy,
+                     &zc->appliedParams,
+                     entropyMetadata,
+-                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+-    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
++                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
++    return ZSTD_estimateBlockSize(
++                    seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+                     seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
+                     (size_t)(seqStore->sequences - seqStore->sequencesStart),
+-                    &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
++                    &zc->blockState.nextCBlock->entropy,
++                    entropyMetadata,
++                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+                     (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
+ }
+ 
+ /* Returns literals bytes represented in a seqStore */
+-static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
++static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
++{
+     size_t literalsBytes = 0;
+-    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
++    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+     size_t i;
+     for (i = 0; i < nbSeqs; ++i) {
+-        seqDef seq = seqStore->sequencesStart[i];
++        seqDef const seq = seqStore->sequencesStart[i];
+         literalsBytes += seq.litLength;
+         if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
+             literalsBytes += 0x10000;
+-        }
+-    }
++    }   }
+     return literalsBytes;
+ }
+ 
+ /* Returns match bytes represented in a seqStore */
+-static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
++static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
++{
+     size_t matchBytes = 0;
+-    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
++    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+     size_t i;
+     for (i = 0; i < nbSeqs; ++i) {
+         seqDef seq = seqStore->sequencesStart[i];
+         matchBytes += seq.mlBase + MINMATCH;
+         if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
+             matchBytes += 0x10000;
+-        }
+-    }
++    }   }
+     return matchBytes;
+ }
+ 
+@@ -3307,15 +3846,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+  */
+ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+                                const seqStore_t* originalSeqStore,
+-                                     size_t startIdx, size_t endIdx) {
+-    BYTE* const litEnd = originalSeqStore->lit;
+-    size_t literalsBytes;
+-    size_t literalsBytesPreceding = 0;
+-
++                                     size_t startIdx, size_t endIdx)
++{
+     *resultSeqStore = *originalSeqStore;
+     if (startIdx > 0) {
+         resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
+-        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
++        resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+     }
+ 
+     /* Move longLengthPos into the correct position if necessary */
+@@ -3328,13 +3864,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+     }
+     resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
+     resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
+-    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+-    resultSeqStore->litStart += literalsBytesPreceding;
+     if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
+         /* This accounts for possible last literals if the derived chunk reaches the end of the block */
+-        resultSeqStore->lit = litEnd;
++        assert(resultSeqStore->lit == originalSeqStore->lit);
+     } else {
+-        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
++        size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
++        resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;
+     }
+     resultSeqStore->llCode += startIdx;
+     resultSeqStore->mlCode += startIdx;
+@@ -3342,20 +3877,26 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+ }
+ 
+ /*
+- * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
+- * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
++ * Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
++ * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
+  */
+ static U32
+-ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
+-{
+-    U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0;  /* [ 0 - 3 ] */
+-    assert(STORED_IS_REPCODE(offCode));
+-    if (adjustedOffCode == ZSTD_REP_NUM) {
+-        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
+-        assert(rep[0] > 0);
++ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
++{
++    U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;  /* [ 0 - 3 ] */
++    assert(OFFBASE_IS_REPCODE(offBase));
++    if (adjustedRepCode == ZSTD_REP_NUM) {
++        assert(ll0);
++        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1
++         * This is only valid if it results in a valid offset value, aka > 0.
++         * Note : it may happen that `rep[0]==1` in exceptional circumstances.
++         * In which case this function will return 0, which is an invalid offset.
++         * It's not an issue though, since this value will be
++         * compared and discarded within ZSTD_seqStore_resolveOffCodes().
++         */
+         return rep[0] - 1;
+     }
+-    return rep[adjustedOffCode];
++    return rep[adjustedRepCode];
+ }
+ 
+ /*
+@@ -3371,30 +3912,33 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, c
+  *        1-3 : repcode 1-3
+  *        4+ : real_offset+3
+  */
+-static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+-                                          seqStore_t* const seqStore, U32 const nbSeq) {
++static void
++ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
++                        const seqStore_t* const seqStore, U32 const nbSeq)
++{
+     U32 idx = 0;
++    U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
+     for (; idx < nbSeq; ++idx) {
+         seqDef* const seq = seqStore->sequencesStart + idx;
+-        U32 const ll0 = (seq->litLength == 0);
+-        U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
+-        assert(seq->offBase > 0);
+-        if (STORED_IS_REPCODE(offCode)) {
+-            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
+-            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
++        U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
++        U32 const offBase = seq->offBase;
++        assert(offBase > 0);
++        if (OFFBASE_IS_REPCODE(offBase)) {
++            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
++            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
+             /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
+              * the repcode with the offset it actually references, determined by the compression
+              * repcode history.
+              */
+             if (dRawOffset != cRawOffset) {
+-                seq->offBase = cRawOffset + ZSTD_REP_NUM;
++                seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
+             }
+         }
+         /* Compression repcode history is always updated with values directly from the unmodified seqStore.
+          * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
+          */
+-        ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
+-        ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
++        ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
++        ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
+     }
+ }
+ 
+@@ -3404,10 +3948,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
+  * Returns the total size of that block (including header) or a ZSTD error code.
+  */
+ static size_t
+-ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
++ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
++                            const seqStore_t* const seqStore,
+                                   repcodes_t* const dRep, repcodes_t* const cRep,
+                                   void* dst, size_t dstCapacity,
+-                                  const void* src, size_t srcSize,
++                            const void* src, size_t srcSize,
+                                   U32 lastBlock, U32 isPartition)
+ {
+     const U32 rleMaxLength = 25;
+@@ -3442,8 +3987,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+         cSeqsSize = 1;
+     }
+ 
++    /* Sequence collection not supported when block splitting */
+     if (zc->seqCollector.collectSequences) {
+-        ZSTD_copyBlockSequences(zc);
++        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
+         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+         return 0;
+     }
+@@ -3481,45 +4027,49 @@ typedef struct {
+ 
+ /* Helper function to perform the recursive search for block splits.
+  * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
+- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
+- * we do not recurse.
++ * If advantageous to split, then we recurse down the two sub-blocks.
++ * If not, or if an error occurred in estimation, then we do not recurse.
+  *
+- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
++ * Note: The recursion depth is capped by a heuristic minimum number of sequences,
++ * defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+  * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
+  * In practice, recursion depth usually doesn't go beyond 4.
+  *
+- * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
++ * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
++ * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
+  * maximum of 128 KB, this value is actually impossible to reach.
+  */
+ static void
+ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                              ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
+ {
+-    seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
+-    seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
+-    seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
++    seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
++    seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
++    seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
+     size_t estimatedOriginalSize;
+     size_t estimatedFirstHalfSize;
+     size_t estimatedSecondHalfSize;
+     size_t midIdx = (startIdx + endIdx)/2;
+ 
++    DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
++    assert(endIdx >= startIdx);
+     if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
+-        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
++        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
+         return;
+     }
+-    DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
+     ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
+     ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
+     ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
+     estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
+     estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
+     estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
+-    DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
++    DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
+              estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
+     if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
+         return;
+     }
+     if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
++        DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);
+         ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
+         splits->splitLocations[splits->idx] = (U32)midIdx;
+         splits->idx++;
+@@ -3527,14 +4077,18 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end
+     }
+ }
+ 
+-/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
++/* Base recursive function.
++ * Populates a table with intra-block partition indices that can improve compression ratio.
+  *
+- * Returns the number of splits made (which equals the size of the partition table - 1).
++ * @return: number of splits made (which equals the size of the partition table - 1).
+  */
+-static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
+-    seqStoreSplits splits = {partitions, 0};
++static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
++{
++    seqStoreSplits splits;
++    splits.splitLocations = partitions;
++    splits.idx = 0;
+     if (nbSeq <= 4) {
+-        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
++        DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
+         /* Refuse to try and split anything with less than 4 sequences */
+         return 0;
+     }
+@@ -3550,18 +4104,20 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
+  * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
+  */
+ static size_t
+-ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+-                                       const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
++ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
++                                    void* dst, size_t dstCapacity,
++                              const void* src, size_t blockSize,
++                                    U32 lastBlock, U32 nbSeq)
+ {
+     size_t cSize = 0;
+     const BYTE* ip = (const BYTE*)src;
+     BYTE* op = (BYTE*)dst;
+     size_t i = 0;
+     size_t srcBytesTotal = 0;
+-    U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
+-    seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
+-    seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
+-    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
++    U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
++    seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
++    seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
++    size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+ 
+     /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
+      * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
+@@ -3583,30 +4139,31 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
+     ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+     ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
+ 
+-    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
++    DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                 (unsigned)zc->blockState.matchState.nextToUpdate);
+ 
+     if (numSplits == 0) {
+-        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+-                                                                   &dRep, &cRep,
+-                                                                    op, dstCapacity,
+-                                                                    ip, blockSize,
+-                                                                    lastBlock, 0 /* isPartition */);
++        size_t cSizeSingleBlock =
++            ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
++                                            &dRep, &cRep,
++                                            op, dstCapacity,
++                                            ip, blockSize,
++                                            lastBlock, 0 /* isPartition */);
+         FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
+         DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+-        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
++        assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
++        assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
+         return cSizeSingleBlock;
+     }
+ 
+     ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
+     for (i = 0; i <= numSplits; ++i) {
+-        size_t srcBytes;
+         size_t cSizeChunk;
+         U32 const lastPartition = (i == numSplits);
+         U32 lastBlockEntireSrc = 0;
+ 
+-        srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
++        size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
+         srcBytesTotal += srcBytes;
+         if (lastPartition) {
+             /* This is the final partition, need to account for possible last literals */
+@@ -3621,7 +4178,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
+                                                        op, dstCapacity,
+                                                        ip, srcBytes,
+                                                        lastBlockEntireSrc, 1 /* isPartition */);
+-        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
++        DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
++                    ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
+         FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
+ 
+         ip += srcBytes;
+@@ -3629,10 +4187,10 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
+         dstCapacity -= cSizeChunk;
+         cSize += cSizeChunk;
+         *currSeqStore = *nextSeqStore;
+-        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
++        assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
+     }
+-    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
+-     * for the next block.
++    /* cRep and dRep may have diverged during the compression.
++     * If so, we use the dRep repcodes for the next block.
+      */
+     ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
+     return cSize;
+@@ -3643,8 +4201,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize, U32 lastBlock)
+ {
+-    const BYTE* ip = (const BYTE*)src;
+-    BYTE* op = (BYTE*)dst;
+     U32 nbSeq;
+     size_t cSize;
+     DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
+@@ -3655,7 +4211,8 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+         if (bss == ZSTDbss_noCompress) {
+             if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+-            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
++            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
++            cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+             FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+             DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
+             return cSize;
+@@ -3673,9 +4230,9 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                             void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize, U32 frame)
+ {
+-    /* This the upper bound for the length of an rle block.
+-     * This isn't the actual upper bound. Finding the real threshold
+-     * needs further investigation.
++    /* This is an estimated upper bound for the length of an rle block.
++     * This isn't the actual upper bound.
++     * Finding the real threshold needs further investigation.
+      */
+     const U32 rleMaxLength = 25;
+     size_t cSize;
+@@ -3687,11 +4244,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+ 
+     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+-        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
++        if (bss == ZSTDbss_noCompress) {
++            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
++            cSize = 0;
++            goto out;
++        }
+     }
+ 
+     if (zc->seqCollector.collectSequences) {
+-        ZSTD_copyBlockSequences(zc);
++        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
+         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+         return 0;
+     }
+@@ -3767,10 +4328,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+          *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+          *     emit an uncompressed block.
+          */
+-        {
+-            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
++        {   size_t const cSize =
++                ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+             if (cSize != ERROR(dstSize_tooSmall)) {
+-                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
++                size_t const maxCSize =
++                    srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                     ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+@@ -3778,7 +4340,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                 }
+             }
+         }
+-    }
++    } /* if (bss == ZSTDbss_compress)*/
+ 
+     DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+     /* Superblock compression failed, attempt to emit a single no compress block.
+@@ -3836,7 +4398,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+ *   All blocks will be terminated, all input will be consumed.
+ *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+ *   Frame is supposed already started (header already produced)
+-*   @return : compressed size, or an error code
++*  @return : compressed size, or an error code
+ */
+ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
+                                      void* dst, size_t dstCapacity,
+@@ -3860,7 +4422,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
+         ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+ 
+-        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
++        /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
++         * additional 1. We need to revisit and change this logic to be more consistent */
++        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,
+                         dstSize_tooSmall,
+                         "not enough space to store compressed block");
+         if (remaining < blockSize) blockSize = remaining;
+@@ -3899,7 +4463,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
+                     MEM_writeLE24(op, cBlockHeader);
+                     cSize += ZSTD_blockHeaderSize;
+                 }
+-            }
++            }  /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
+ 
+ 
+             ip += blockSize;
+@@ -4001,19 +4565,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+     }
+ }
+ 
+-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
++void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+ {
+-    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+-                    "wrong cctx stage");
+-    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
+-                    parameter_unsupported,
+-                    "incompatible with ldm");
++    assert(cctx->stage == ZSTDcs_init);
++    assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
+     cctx->externSeqStore.seq = seq;
+     cctx->externSeqStore.size = nbSeq;
+     cctx->externSeqStore.capacity = nbSeq;
+     cctx->externSeqStore.pos = 0;
+     cctx->externSeqStore.posInSequence = 0;
+-    return 0;
+ }
+ 
+ 
+@@ -4078,31 +4638,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+     }
+ }
+ 
+-size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+-                              void* dst, size_t dstCapacity,
+-                        const void* src, size_t srcSize)
++size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
++                                        void* dst, size_t dstCapacity,
++                                  const void* src, size_t srcSize)
+ {
+     DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+ }
+ 
++/* NOTE: Must just wrap ZSTD_compressContinue_public() */
++size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
++                             void* dst, size_t dstCapacity,
++                       const void* src, size_t srcSize)
++{
++    return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
++}
+ 
+-size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
++static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
+ {
+     ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+     assert(!ZSTD_checkCParams(cParams));
+-    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
++    return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
+ }
+ 
+-size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
++/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
++size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
++{
++    return ZSTD_getBlockSize_deprecated(cctx);
++}
++
++/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
++size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+ {
+     DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+-    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
++    { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
+       RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+ 
+     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+ }
+ 
++/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
++size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
++{
++    return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
++}
++
+ /*! ZSTD_loadDictionaryContent() :
+  *  @return : 0, or an error code
+  */
+@@ -4111,25 +4691,36 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                          ZSTD_cwksp* ws,
+                                          ZSTD_CCtx_params const* params,
+                                          const void* src, size_t srcSize,
+-                                         ZSTD_dictTableLoadMethod_e dtlm)
++                                         ZSTD_dictTableLoadMethod_e dtlm,
++                                         ZSTD_tableFillPurpose_e tfp)
+ {
+     const BYTE* ip = (const BYTE*) src;
+     const BYTE* const iend = ip + srcSize;
+     int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
+ 
+-    /* Assert that we the ms params match the params we're being given */
++    /* Assert that the ms params match the params we're being given */
+     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+ 
+-    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
++    {   /* Ensure large dictionaries can't cause index overflow */
++
+         /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
+          * Dictionaries right at the edge will immediately trigger overflow
+          * correction, but I don't want to insert extra constraints here.
+          */
+-        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
+-        /* We must have cleared our windows when our source is this large. */
+-        assert(ZSTD_window_isEmpty(ms->window));
+-        if (loadLdmDict)
+-            assert(ZSTD_window_isEmpty(ls->window));
++        U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
++
++        int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(&params->cParams);
++        if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
++            /* Some dictionary matchfinders in zstd use "short cache",
++             * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
++             * CDict hashtable entry as a tag rather than as part of an index.
++             * When short cache is used, we need to truncate the dictionary
++             * so that its indices don't overlap with the tag. */
++            U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
++            maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
++            assert(!loadLdmDict);
++        }
++
+         /* If the dictionary is too large, only load the suffix of the dictionary. */
+         if (srcSize > maxDictSize) {
+             ip = iend - maxDictSize;
+@@ -4138,35 +4729,58 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+         }
+     }
+ 
+-    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
++    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
++        /* We must have cleared our windows when our source is this large. */
++        assert(ZSTD_window_isEmpty(ms->window));
++        if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
++    }
+     ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
+-    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+-    ms->forceNonContiguous = params->deterministicRefPrefix;
+ 
+-    if (loadLdmDict) {
++    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
++
++    if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
+         ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
+         ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
++        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
+     }
+ 
++    /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
++    if (params->cParams.strategy < ZSTD_btultra) {
++        U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
++        if (srcSize > maxDictSize) {
++            ip = iend - maxDictSize;
++            src = ip;
++            srcSize = maxDictSize;
++        }
++    }
++
++    ms->nextToUpdate = (U32)(ip - ms->window.base);
++    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
++    ms->forceNonContiguous = params->deterministicRefPrefix;
++
+     if (srcSize <= HASH_READ_SIZE) return 0;
+ 
+     ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
+ 
+-    if (loadLdmDict)
+-        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
+-
+     switch(params->cParams.strategy)
+     {
+     case ZSTD_fast:
+-        ZSTD_fillHashTable(ms, iend, dtlm);
++        ZSTD_fillHashTable(ms, iend, dtlm, tfp);
+         break;
+     case ZSTD_dfast:
+-        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
++#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
++        ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
++#else
++        assert(0); /* shouldn't be called: cparams should've been adjusted. */
++#endif
+         break;
+ 
+     case ZSTD_greedy:
+     case ZSTD_lazy:
+     case ZSTD_lazy2:
++#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
+         assert(srcSize >= HASH_READ_SIZE);
+         if (ms->dedicatedDictSearch) {
+             assert(ms->chainTable != NULL);
+@@ -4174,7 +4788,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+         } else {
+             assert(params->useRowMatchFinder != ZSTD_ps_auto);
+             if (params->useRowMatchFinder == ZSTD_ps_enable) {
+-                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
++                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
+                 ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                 ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                 DEBUGLOG(4, "Using row-based hash table for lazy dict");
+@@ -4183,14 +4797,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                 DEBUGLOG(4, "Using chain-based hash table for lazy dict");
+             }
+         }
++#else
++        assert(0); /* shouldn't be called: cparams should've been adjusted. */
++#endif
+         break;
+ 
+     case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+     case ZSTD_btopt:
+     case ZSTD_btultra:
+     case ZSTD_btultra2:
++#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
+         assert(srcSize >= HASH_READ_SIZE);
+         ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
++#else
++        assert(0); /* shouldn't be called: cparams should've been adjusted. */
++#endif
+         break;
+ 
+     default:
+@@ -4237,11 +4860,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+ 
+         /* We only set the loaded table as valid if it contains all non-zero
+          * weights. Otherwise, we set it to check */
+-        if (!hasZeroWeights)
++        if (!hasZeroWeights && maxSymbolValue == 255)
+             bs->entropy.huf.repeatMode = HUF_repeat_valid;
+ 
+         RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+-        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+         dictPtr += hufHeaderSize;
+     }
+ 
+@@ -4327,6 +4949,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                       ZSTD_CCtx_params const* params,
+                                       const void* dict, size_t dictSize,
+                                       ZSTD_dictTableLoadMethod_e dtlm,
++                                      ZSTD_tableFillPurpose_e tfp,
+                                       void* workspace)
+ {
+     const BYTE* dictPtr = (const BYTE*)dict;
+@@ -4345,7 +4968,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+     {
+         size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+         FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+-            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
++            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
+     }
+     return dictID;
+ }
+@@ -4361,6 +4984,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                          const void* dict, size_t dictSize,
+                                ZSTD_dictContentType_e dictContentType,
+                                ZSTD_dictTableLoadMethod_e dtlm,
++                               ZSTD_tableFillPurpose_e tfp,
+                                void* workspace)
+ {
+     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+@@ -4373,13 +4997,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+ 
+     /* dict restricted modes */
+     if (dictContentType == ZSTD_dct_rawContent)
+-        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
++        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
+ 
+     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+         if (dictContentType == ZSTD_dct_auto) {
+             DEBUGLOG(4, "raw content dictionary detected");
+             return ZSTD_loadDictionaryContent(
+-                ms, ls, ws, params, dict, dictSize, dtlm);
++                ms, ls, ws, params, dict, dictSize, dtlm, tfp);
+         }
+         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+         assert(0);   /* impossible */
+@@ -4387,13 +5011,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+ 
+     /* dict as full zstd dictionary */
+     return ZSTD_loadZstdDictionary(
+-        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
++        bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
+ }
+ 
+ #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+ #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
+ 
+ /*! ZSTD_compressBegin_internal() :
++ * Assumption : either @dict OR @cdict (or none) is non-NULL, never both
+  * @return : 0, or an error code */
+ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                     const void* dict, size_t dictSize,
+@@ -4426,11 +5051,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                         cdict->dictContentSize, cdict->dictContentType, dtlm,
+-                        cctx->entropyWorkspace)
++                        ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
+               : ZSTD_compress_insertDictionary(
+                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+-                        dictContentType, dtlm, cctx->entropyWorkspace);
++                        dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
+         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+         assert(dictID <= UINT_MAX);
+         cctx->dictID = (U32)dictID;
+@@ -4471,11 +5096,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                                             &cctxParams, pledgedSrcSize);
+ }
+ 
+-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
++static size_t
++ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+ {
+     ZSTD_CCtx_params cctxParams;
+-    {
+-        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
++    {   ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+         ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+     }
+     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+@@ -4483,9 +5108,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
+                                        &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+ }
+ 
++size_t
++ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
++{
++    return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
++}
++
+ size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+ {
+-    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
++    return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
+ }
+ 
+ 
+@@ -4496,14 +5127,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+ {
+     BYTE* const ostart = (BYTE*)dst;
+     BYTE* op = ostart;
+-    size_t fhSize = 0;
+ 
+     DEBUGLOG(4, "ZSTD_writeEpilogue");
+     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+ 
+     /* special case : empty frame */
+     if (cctx->stage == ZSTDcs_init) {
+-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
++        size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+         dstCapacity -= fhSize;
+         op += fhSize;
+@@ -4513,8 +5143,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+     if (cctx->stage != ZSTDcs_ending) {
+         /* write one last empty block, make it the "last" block */
+         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+-        MEM_writeLE32(op, cBlockHeader24);
++        ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
++        RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
++        MEM_writeLE24(op, cBlockHeader24);
+         op += ZSTD_blockHeaderSize;
+         dstCapacity -= ZSTD_blockHeaderSize;
+     }
+@@ -4537,9 +5168,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+     (void)extraCSize;
+ }
+ 
+-size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+-                         void* dst, size_t dstCapacity,
+-                   const void* src, size_t srcSize)
++size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
++                               void* dst, size_t dstCapacity,
++                         const void* src, size_t srcSize)
+ {
+     size_t endResult;
+     size_t const cSize = ZSTD_compressContinue_internal(cctx,
+@@ -4563,6 +5194,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+     return cSize + endResult;
+ }
+ 
++/* NOTE: Must just wrap ZSTD_compressEnd_public() */
++size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
++                        void* dst, size_t dstCapacity,
++                  const void* src, size_t srcSize)
++{
++    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
++}
++
+ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize,
+@@ -4591,7 +5230,7 @@ size_t ZSTD_compress_advanced_internal(
+     FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                          dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                          params, srcSize, ZSTDb_not_buffered) , "");
+-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
++    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
+ }
+ 
+ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+@@ -4709,7 +5348,7 @@ static size_t ZSTD_initCDict_internal(
+         {   size_t const dictID = ZSTD_compress_insertDictionary(
+                     &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                     &params, cdict->dictContent, cdict->dictContentSize,
+-                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
++                    dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
+             FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+             assert(dictID <= (size_t)(U32)-1);
+             cdict->dictID = (U32)dictID;
+@@ -4811,7 +5450,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
+                         cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
+                         customMem);
+ 
+-    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
++    if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                     dict, dictSize,
+                                     dictLoadMethod, dictContentType,
+                                     cctxParams) )) {
+@@ -4906,6 +5545,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
+     params.cParams = cParams;
+     params.useRowMatchFinder = useRowMatchFinder;
+     cdict->useRowMatchFinder = useRowMatchFinder;
++    cdict->compressionLevel = ZSTD_NO_CLEVEL;
+ 
+     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                               dict, dictSize,
+@@ -4985,12 +5625,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
+ 
+ /* ZSTD_compressBegin_usingCDict() :
+  * cdict must be != NULL */
+-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
++size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+ {
+     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+     return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+ }
+ 
++size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
++{
++    return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
++}
++
+ /*! ZSTD_compress_usingCDict_internal():
+  * Implementation of various ZSTD_compress_usingCDict* functions.
+  */
+@@ -5000,7 +5645,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
+                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+ {
+     FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
+-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
++    return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
+ }
+ 
+ /*! ZSTD_compress_usingCDict_advanced():
+@@ -5197,30 +5842,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+ 
+ static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+ {
+-    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+-    if (hintInSize==0) hintInSize = cctx->blockSize;
+-    return hintInSize;
++    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
++        return cctx->blockSize - cctx->stableIn_notConsumed;
++    }
++    assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
++    {   size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
++        if (hintInSize==0) hintInSize = cctx->blockSize;
++        return hintInSize;
++    }
+ }
+ 
+ /* ZSTD_compressStream_generic():
+  *  internal function for all *compressStream*() variants
+- *  non-static, because can be called from zstdmt_compress.c
+- * @return : hint size for next input */
++ * @return : hint size for next input to complete ongoing block */
+ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                           ZSTD_outBuffer* output,
+                                           ZSTD_inBuffer* input,
+                                           ZSTD_EndDirective const flushMode)
+ {
+-    const char* const istart = (const char*)input->src;
+-    const char* const iend = input->size != 0 ? istart + input->size : istart;
+-    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+-    char* const ostart = (char*)output->dst;
+-    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+-    char* op = output->pos != 0 ? ostart + output->pos : ostart;
++    const char* const istart = (assert(input != NULL), (const char*)input->src);
++    const char* const iend = (istart != NULL) ? istart + input->size : istart;
++    const char* ip = (istart != NULL) ? istart + input->pos : istart;
++    char* const ostart = (assert(output != NULL), (char*)output->dst);
++    char* const oend = (ostart != NULL) ? ostart + output->size : ostart;
++    char* op = (ostart != NULL) ? ostart + output->pos : ostart;
+     U32 someMoreWork = 1;
+ 
+     /* check expectations */
+-    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
++    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);
++    assert(zcs != NULL);
++    if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
++        assert(input->pos >= zcs->stableIn_notConsumed);
++        input->pos -= zcs->stableIn_notConsumed;
++        if (ip) ip -= zcs->stableIn_notConsumed;
++        zcs->stableIn_notConsumed = 0;
++    }
+     if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+         assert(zcs->inBuff != NULL);
+         assert(zcs->inBuffSize > 0);
+@@ -5229,8 +5885,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+         assert(zcs->outBuff !=  NULL);
+         assert(zcs->outBuffSize > 0);
+     }
+-    assert(output->pos <= output->size);
++    if (input->src == NULL) assert(input->size == 0);
+     assert(input->pos <= input->size);
++    if (output->dst == NULL) assert(output->size == 0);
++    assert(output->pos <= output->size);
+     assert((U32)flushMode <= (U32)ZSTD_e_end);
+ 
+     while (someMoreWork) {
+@@ -5245,7 +5903,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
+               && (zcs->inBuffPos == 0) ) {
+                 /* shortcut to compression pass directly into output buffer */
+-                size_t const cSize = ZSTD_compressEnd(zcs,
++                size_t const cSize = ZSTD_compressEnd_public(zcs,
+                                                 op, oend-op, ip, iend-ip);
+                 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+@@ -5262,8 +5920,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                         zcs->inBuff + zcs->inBuffPos, toLoad,
+                                         ip, iend-ip);
+                 zcs->inBuffPos += loaded;
+-                if (loaded != 0)
+-                    ip += loaded;
++                if (ip) ip += loaded;
+                 if ( (flushMode == ZSTD_e_continue)
+                   && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                     /* not enough input to fill full block : stop here */
+@@ -5274,6 +5931,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                     /* empty */
+                     someMoreWork = 0; break;
+                 }
++            } else {
++                assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
++                if ( (flushMode == ZSTD_e_continue)
++                  && ( (size_t)(iend - ip) < zcs->blockSize) ) {
++                    /* can't compress a full block : stop here */
++                    zcs->stableIn_notConsumed = (size_t)(iend - ip);
++                    ip = iend;  /* pretend to have consumed input */
++                    someMoreWork = 0; break;
++                }
++                if ( (flushMode == ZSTD_e_flush)
++                  && (ip == iend) ) {
++                    /* empty */
++                    someMoreWork = 0; break;
++                }
+             }
+             /* compress current block (note : this stage cannot be stopped in the middle) */
+             DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+@@ -5281,9 +5952,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                 void* cDst;
+                 size_t cSize;
+                 size_t oSize = oend-op;
+-                size_t const iSize = inputBuffered
+-                    ? zcs->inBuffPos - zcs->inToCompress
+-                    : MIN((size_t)(iend - ip), zcs->blockSize);
++                size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
++                                                   : MIN((size_t)(iend - ip), zcs->blockSize);
+                 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
+                     cDst = op;   /* compress into output buffer, to skip flush stage */
+                 else
+@@ -5291,9 +5961,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                 if (inputBuffered) {
+                     unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                     cSize = lastBlock ?
+-                            ZSTD_compressEnd(zcs, cDst, oSize,
++                            ZSTD_compressEnd_public(zcs, cDst, oSize,
+                                         zcs->inBuff + zcs->inToCompress, iSize) :
+-                            ZSTD_compressContinue(zcs, cDst, oSize,
++                            ZSTD_compressContinue_public(zcs, cDst, oSize,
+                                         zcs->inBuff + zcs->inToCompress, iSize);
+                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                     zcs->frameEnded = lastBlock;
+@@ -5306,19 +5976,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                     if (!lastBlock)
+                         assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                     zcs->inToCompress = zcs->inBuffPos;
+-                } else {
+-                    unsigned const lastBlock = (ip + iSize == iend);
+-                    assert(flushMode == ZSTD_e_end /* Already validated */);
++                } else { /* !inputBuffered, hence ZSTD_bm_stable */
++                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
+                     cSize = lastBlock ?
+-                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+-                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
++                            ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
++                            ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
+                     /* Consume the input prior to error checking to mirror buffered mode. */
+-                    if (iSize > 0)
+-                        ip += iSize;
++                    if (ip) ip += iSize;
+                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                     zcs->frameEnded = lastBlock;
+-                    if (lastBlock)
+-                        assert(ip == iend);
++                    if (lastBlock) assert(ip == iend);
+                 }
+                 if (cDst == op) {  /* no need to flush */
+                     op += cSize;
+@@ -5388,8 +6055,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
+ /* After a compression call set the expected input/output buffer.
+  * This is validated at the start of the next compression call.
+  */
+-static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
++static void
++ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)
+ {
++    DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");
+     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+         cctx->expectedInBuffer = *input;
+     }
+@@ -5408,22 +6077,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+ {
+     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+         ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+-        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+-            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+-        if (endOp != ZSTD_e_end)
+-            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
++        if (expect.src != input->src || expect.pos != input->pos)
++            RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");
+     }
++    (void)endOp;
+     if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+         size_t const outBufferSize = output->size - output->pos;
+         if (cctx->expectedOutBufferSize != outBufferSize)
+-            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
++            RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+     }
+     return 0;
+ }
+ 
+ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                              ZSTD_EndDirective endOp,
+-                                             size_t inSize) {
++                                             size_t inSize)
++{
+     ZSTD_CCtx_params params = cctx->requestedParams;
+     ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+@@ -5437,9 +6106,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+         params.compressionLevel = cctx->cdict->compressionLevel;
+     }
+     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+-    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+-    {
+-        size_t const dictSize = prefixDict.dict
++    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-determine pledgedSrcSize */
++
++    {   size_t const dictSize = prefixDict.dict
+                 ? prefixDict.dictSize
+                 : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+         ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+@@ -5451,6 +6120,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+     params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, &params.cParams);
+     params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams);
+     params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
++    params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
++    params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
++    params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);
+ 
+     {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+         assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+@@ -5477,6 +6149,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+     return 0;
+ }
+ 
++/* @return provides a minimum amount of data remaining to be flushed from internal buffers
++ */
+ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                              ZSTD_outBuffer* output,
+                              ZSTD_inBuffer* input,
+@@ -5491,8 +6165,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+ 
+     /* transparent initialization stage */
+     if (cctx->streamStage == zcss_init) {
+-        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+-        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
++        size_t const inputSize = input->size - input->pos;  /* no obligation to start from pos==0 */
++        size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;
++        if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */
++          && (endOp == ZSTD_e_continue)                             /* no flush requested, more input to come */
++          && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) {              /* not even reached one block yet */
++            if (cctx->stableIn_notConsumed) {  /* not the first time */
++                /* check stable source guarantees */
++                RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");
++                RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");
++            }
++            /* pretend input was consumed, to give a sense forward progress */
++            input->pos = input->size;
++            /* save stable inBuffer, for later control, and flush/end */
++            cctx->expectedInBuffer = *input;
++            /* but actually input wasn't consumed, so keep track of position from where compression shall resume */
++            cctx->stableIn_notConsumed += inputSize;
++            /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */
++            return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format);  /* at least some header to produce */
++        }
++        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");
++        ZSTD_setBufferExpectations(cctx, output, input);   /* Set initial buffer expectations now that we've initialized */
+     }
+     /* end of transparent initialization stage */
+ 
+@@ -5510,13 +6203,20 @@ size_t ZSTD_compressStream2_simpleArgs (
+                       const void* src, size_t srcSize, size_t* srcPos,
+                             ZSTD_EndDirective endOp)
+ {
+-    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+-    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
++    ZSTD_outBuffer output;
++    ZSTD_inBuffer  input;
++    output.dst = dst;
++    output.size = dstCapacity;
++    output.pos = *dstPos;
++    input.src = src;
++    input.size = srcSize;
++    input.pos = *srcPos;
+     /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+-    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+-    *dstPos = output.pos;
+-    *srcPos = input.pos;
+-    return cErr;
++    {   size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
++        *dstPos = output.pos;
++        *srcPos = input.pos;
++        return cErr;
++    }
+ }
+ 
+ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+@@ -5539,6 +6239,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+         /* Reset to the original values. */
+         cctx->requestedParams.inBufferMode = originalInBufferMode;
+         cctx->requestedParams.outBufferMode = originalOutBufferMode;
++
+         FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+         if (result != 0) {  /* compression not completed, due to lack of output space */
+             assert(oPos == dstCapacity);
+@@ -5549,64 +6250,61 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+     }
+ }
+ 
+-typedef struct {
+-    U32 idx;             /* Index in array of ZSTD_Sequence */
+-    U32 posInSequence;   /* Position within sequence at idx */
+-    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+-} ZSTD_sequencePosition;
+-
+ /* ZSTD_validateSequence() :
+  * @offCode : is presumed to follow format required by ZSTD_storeSeq()
+  * @returns a ZSTD error code if sequence is not valid
+  */
+ static size_t
+-ZSTD_validateSequence(U32 offCode, U32 matchLength,
+-                      size_t posInSrc, U32 windowLog, size_t dictSize)
++ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
++                      size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
+ {
+-    U32 const windowSize = 1 << windowLog;
++    U32 const windowSize = 1u << windowLog;
+     /* posInSrc represents the amount of data the decoder would decode up to this point.
+      * As long as the amount of data decoded is less than or equal to window size, offsets may be
+      * larger than the total length of output decoded in order to reference the dict, even larger than
+      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+      */
+     size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+-    RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
+-    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
++    size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
++    RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
++    /* Validate maxNbSeq is large enough for the given matchLength and minMatch */
++    RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
+     return 0;
+ }
+ 
+ /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+-static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
++static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
+ {
+-    U32 offCode = STORE_OFFSET(rawOffset);
++    U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
+ 
+     if (!ll0 && rawOffset == rep[0]) {
+-        offCode = STORE_REPCODE_1;
++        offBase = REPCODE1_TO_OFFBASE;
+     } else if (rawOffset == rep[1]) {
+-        offCode = STORE_REPCODE(2 - ll0);
++        offBase = REPCODE_TO_OFFBASE(2 - ll0);
+     } else if (rawOffset == rep[2]) {
+-        offCode = STORE_REPCODE(3 - ll0);
++        offBase = REPCODE_TO_OFFBASE(3 - ll0);
+     } else if (ll0 && rawOffset == rep[0] - 1) {
+-        offCode = STORE_REPCODE_3;
++        offBase = REPCODE3_TO_OFFBASE;
+     }
+-    return offCode;
++    return offBase;
+ }
+ 
+-/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+- */
+-static size_t
++size_t
+ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+                                               ZSTD_sequencePosition* seqPos,
+                                         const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+-                                        const void* src, size_t blockSize)
++                                        const void* src, size_t blockSize,
++                                        ZSTD_paramSwitch_e externalRepSearch)
+ {
+     U32 idx = seqPos->idx;
++    U32 const startIdx = idx;
+     BYTE const* ip = (BYTE const*)(src);
+     const BYTE* const iend = ip + blockSize;
+     repcodes_t updatedRepcodes;
+     U32 dictSize;
+ 
++    DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
++
+     if (cctx->cdict) {
+         dictSize = (U32)cctx->cdict->dictContentSize;
+     } else if (cctx->prefixDict.dict) {
+@@ -5615,25 +6313,55 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+         dictSize = 0;
+     }
+     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+-    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
++    for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
+         U32 const litLength = inSeqs[idx].litLength;
+-        U32 const ll0 = (litLength == 0);
+         U32 const matchLength = inSeqs[idx].matchLength;
+-        U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+-        ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
++        U32 offBase;
++
++        if (externalRepSearch == ZSTD_ps_disable) {
++            offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
++        } else {
++            U32 const ll0 = (litLength == 0);
++            offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
++            ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
++        }
+ 
+-        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
++        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
+         if (cctx->appliedParams.validateSequences) {
+             seqPos->posInSrc += litLength + matchLength;
+-            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+-                                                cctx->appliedParams.cParams.windowLog, dictSize),
++            FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
++                                                cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
+                                                 "Sequence validation failed");
+         }
+-        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
++        RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
+                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
++        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
+         ip += matchLength + litLength;
+     }
++
++    /* If we skipped repcode search while parsing, we need to update repcodes now */
++    assert(externalRepSearch != ZSTD_ps_auto);
++    assert(idx >= startIdx);
++    if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
++        U32* const rep = updatedRepcodes.rep;
++        U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
++
++        if (lastSeqIdx >= startIdx + 2) {
++            rep[2] = inSeqs[lastSeqIdx - 2].offset;
++            rep[1] = inSeqs[lastSeqIdx - 1].offset;
++            rep[0] = inSeqs[lastSeqIdx].offset;
++        } else if (lastSeqIdx == startIdx + 1) {
++            rep[2] = rep[0];
++            rep[1] = inSeqs[lastSeqIdx - 1].offset;
++            rep[0] = inSeqs[lastSeqIdx].offset;
++        } else {
++            assert(lastSeqIdx == startIdx);
++            rep[2] = rep[1];
++            rep[1] = rep[0];
++            rep[0] = inSeqs[lastSeqIdx].offset;
++        }
++    }
++
+     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+ 
+     if (inSeqs[idx].litLength) {
+@@ -5642,26 +6370,15 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+         ip += inSeqs[idx].litLength;
+         seqPos->posInSrc += inSeqs[idx].litLength;
+     }
+-    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
++    RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
+     seqPos->idx = idx+1;
+     return 0;
+ }
+ 
+-/* Returns the number of bytes to move the current read position back by. Only non-zero
+- * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+- * went wrong.
+- *
+- * This function will attempt to scan through blockSize bytes represented by the sequences
+- * in inSeqs, storing any (partial) sequences.
+- *
+- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+- */
+-static size_t
++size_t
+ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                    const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+-                                   const void* src, size_t blockSize)
++                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
+ {
+     U32 idx = seqPos->idx;
+     U32 startPosInSequence = seqPos->posInSequence;
+@@ -5673,6 +6390,9 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+     U32 bytesAdjustment = 0;
+     U32 finalMatchSplit = 0;
+ 
++    /* TODO(embg) support fast parsing mode in noBlockDelim mode */
++    (void)externalRepSearch;
++
+     if (cctx->cdict) {
+         dictSize = cctx->cdict->dictContentSize;
+     } else if (cctx->prefixDict.dict) {
+@@ -5680,7 +6400,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+     } else {
+         dictSize = 0;
+     }
+-    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
++    DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+     DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+     while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+@@ -5688,7 +6408,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+         U32 litLength = currSeq.litLength;
+         U32 matchLength = currSeq.matchLength;
+         U32 const rawOffset = currSeq.offset;
+-        U32 offCode;
++        U32 offBase;
+ 
+         /* Modify the sequence depending on where endPosInSequence lies */
+         if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+@@ -5702,7 +6422,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+             /* Move to the next sequence */
+             endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+             startPosInSequence = 0;
+-            idx++;
+         } else {
+             /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+                does not reach the end of the match. So, we have to split the sequence */
+@@ -5742,21 +6461,23 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+         }
+         /* Check if this offset can be represented with a repcode */
+         {   U32 const ll0 = (litLength == 0);
+-            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+-            ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
++            offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
++            ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
+         }
+ 
+         if (cctx->appliedParams.validateSequences) {
+             seqPos->posInSrc += litLength + matchLength;
+-            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+-                                                   cctx->appliedParams.cParams.windowLog, dictSize),
++            FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
++                                                   cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
+                                                    "Sequence validation failed");
+         }
+-        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+-        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
++        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
++        RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
+                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
++        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
+         ip += matchLength + litLength;
++        if (!finalMatchSplit)
++            idx++; /* Next Sequence */
+     }
+     DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+     assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+@@ -5779,7 +6500,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
+ 
+ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+-                                       const void* src, size_t blockSize);
++                                       const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
+ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
+ {
+     ZSTD_sequenceCopier sequenceCopier = NULL;
+@@ -5793,6 +6514,57 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
+     return sequenceCopier;
+ }
+ 
++/* Discover the size of next block by searching for the delimiter.
++ * Note that a block delimiter **must** exist in this mode,
++ * otherwise it's an input error.
++ * The block size retrieved will be later compared to ensure it remains within bounds */
++static size_t
++blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
++{
++    int end = 0;
++    size_t blockSize = 0;
++    size_t spos = seqPos.idx;
++    DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
++    assert(spos <= inSeqsSize);
++    while (spos < inSeqsSize) {
++        end = (inSeqs[spos].offset == 0);
++        blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
++        if (end) {
++            if (inSeqs[spos].matchLength != 0)
++                RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
++            break;
++        }
++        spos++;
++    }
++    if (!end)
++        RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
++    return blockSize;
++}
++
++/* More a "target" block size */
++static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
++{
++    int const lastBlock = (remaining <= blockSize);
++    return lastBlock ? remaining : blockSize;
++}
++
++static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
++                           size_t blockSize, size_t remaining,
++                     const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
++{
++    DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
++    if (mode == ZSTD_sf_noBlockDelimiters)
++        return blockSize_noDelimiter(blockSize, remaining);
++    {   size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
++        FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
++        if (explicitBlockSize > blockSize)
++            RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
++        if (explicitBlockSize > remaining)
++            RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
++        return explicitBlockSize;
++    }
++}
++
+ /* Compress, block-by-block, all of the sequences given.
+  *
+  * Returns the cumulative size of all compressed blocks (including their headers),
+@@ -5805,9 +6577,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                           const void* src, size_t srcSize)
+ {
+     size_t cSize = 0;
+-    U32 lastBlock;
+-    size_t blockSize;
+-    size_t compressedSeqsSize;
+     size_t remaining = srcSize;
+     ZSTD_sequencePosition seqPos = {0, 0, 0};
+ 
+@@ -5827,22 +6596,29 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+     }
+ 
+     while (remaining) {
++        size_t compressedSeqsSize;
+         size_t cBlockSize;
+         size_t additionalByteAdjustment;
+-        lastBlock = remaining <= cctx->blockSize;
+-        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
++        size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
++                                        cctx->blockSize, remaining,
++                                        inSeqs, inSeqsSize, seqPos);
++        U32 const lastBlock = (blockSize == remaining);
++        FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
++        assert(blockSize <= remaining);
+         ZSTD_resetSeqStore(&cctx->seqStore);
+-        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
++        DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
+ 
+-        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
++        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
+         FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+         blockSize -= additionalByteAdjustment;
+ 
+         /* If blocks are too small, emit as a nocompress block */
+-        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
++        /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
++         * additional 1. We need to revisit and change this logic to be more consistent */
++        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
+             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+             FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+-            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
++            DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+             cSize += cBlockSize;
+             ip += blockSize;
+             op += cBlockSize;
+@@ -5851,6 +6627,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+             continue;
+         }
+ 
++        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
+         compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
+                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                 &cctx->appliedParams,
+@@ -5859,11 +6636,11 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                 cctx->bmi2);
+         FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+-        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
++        DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
+ 
+         if (!cctx->isFirstBlock &&
+             ZSTD_maybeRLE(&cctx->seqStore) &&
+-            ZSTD_isRLE((BYTE const*)src, srcSize)) {
++            ZSTD_isRLE(ip, blockSize)) {
+             /* We don't want to emit our first block as a RLE even if it qualifies because
+             * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+             * This is only an issue for zstd <= v1.4.3
+@@ -5874,12 +6651,12 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+         if (compressedSeqsSize == 0) {
+             /* ZSTD_noCompressBlock writes the block header as well */
+             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+-            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+-            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
++            FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");
++            DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);
+         } else if (compressedSeqsSize == 1) {
+             cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+-            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+-            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
++            FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");
++            DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);
+         } else {
+             U32 cBlockHeader;
+             /* Error checking and repcodes update */
+@@ -5891,11 +6668,10 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+             cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+             MEM_writeLE24(op, cBlockHeader);
+             cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+-            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
++            DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);
+         }
+ 
+         cSize += cBlockSize;
+-        DEBUGLOG(4, "cSize running total: %zu", cSize);
+ 
+         if (lastBlock) {
+             break;
+@@ -5906,12 +6682,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+             dstCapacity -= cBlockSize;
+             cctx->isFirstBlock = 0;
+         }
++        DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
+     }
+ 
++    DEBUGLOG(4, "cSize final total: %zu", cSize);
+     return cSize;
+ }
+ 
+-size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
++size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
++                              void* dst, size_t dstCapacity,
+                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                               const void* src, size_t srcSize)
+ {
+@@ -5921,7 +6700,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
+     size_t frameHeaderSize = 0;
+ 
+     /* Transparent initialization stage, same as compressStream2() */
+-    DEBUGLOG(3, "ZSTD_compressSequences()");
++    DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
+     assert(cctx != NULL);
+     FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+     /* Begin writing output, starting with frame header */
+@@ -5949,26 +6728,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
+         cSize += 4;
+     }
+ 
+-    DEBUGLOG(3, "Final compressed size: %zu", cSize);
++    DEBUGLOG(4, "Final compressed size: %zu", cSize);
+     return cSize;
+ }
+ 
+ /*======   Finalize   ======*/
+ 
++static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)
++{
++    const ZSTD_inBuffer nullInput = { NULL, 0, 0 };
++    const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
++    return stableInput ? zcs->expectedInBuffer : nullInput;
++}
++
+ /*! ZSTD_flushStream() :
+  * @return : amount of data remaining to flush */
+ size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+ {
+-    ZSTD_inBuffer input = { NULL, 0, 0 };
++    ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
++    input.size = input.pos; /* do not ingest more input during flush */
+     return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+ }
+ 
+ 
+ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+ {
+-    ZSTD_inBuffer input = { NULL, 0, 0 };
++    ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
+     size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+-    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
++    FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");
+     if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+     /* single thread mode : attempt to calculate remaining to flush more precisely */
+     {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+@@ -6090,7 +6877,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
+             cp.targetLength = (unsigned)(-clampedCompressionLevel);
+         }
+         /* refine parameters based on srcSize & dictSize */
+-        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
++        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
+     }
+ }
+ 
+@@ -6125,3 +6912,29 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
+     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+     return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+ }
++
++void ZSTD_registerSequenceProducer(
++    ZSTD_CCtx* zc,
++    void* extSeqProdState,
++    ZSTD_sequenceProducer_F extSeqProdFunc
++) {
++    assert(zc != NULL);
++    ZSTD_CCtxParams_registerSequenceProducer(
++        &zc->requestedParams, extSeqProdState, extSeqProdFunc
++    );
++}
++
++void ZSTD_CCtxParams_registerSequenceProducer(
++  ZSTD_CCtx_params* params,
++  void* extSeqProdState,
++  ZSTD_sequenceProducer_F extSeqProdFunc
++) {
++    assert(params != NULL);
++    if (extSeqProdFunc != NULL) {
++        params->extSeqProdFunc = extSeqProdFunc;
++        params->extSeqProdState = extSeqProdState;
++    } else {
++        params->extSeqProdFunc = NULL;
++        params->extSeqProdState = NULL;
++    }
++}
+diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
+index 71697a11ae30..53cb582a8d2b 100644
+--- a/lib/zstd/compress/zstd_compress_internal.h
++++ b/lib/zstd/compress/zstd_compress_internal.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -20,6 +21,7 @@
+ ***************************************/
+ #include "../common/zstd_internal.h"
+ #include "zstd_cwksp.h"
++#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
+ 
+ 
+ /*-*************************************
+@@ -32,7 +34,7 @@
+                                        It's not a big deal though : candidate will just be sorted again.
+                                        Additionally, candidate position 1 will be lost.
+                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
++                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
+                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+ 
+ 
+@@ -111,12 +113,13 @@ typedef struct {
+ /* ZSTD_buildBlockEntropyStats() :
+  *  Builds entropy for the block.
+  *  @return : 0 on success or error code */
+-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+-                             const ZSTD_entropyCTables_t* prevEntropy,
+-                                   ZSTD_entropyCTables_t* nextEntropy,
+-                             const ZSTD_CCtx_params* cctxParams,
+-                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+-                                   void* workspace, size_t wkspSize);
++size_t ZSTD_buildBlockEntropyStats(
++                    const seqStore_t* seqStorePtr,
++                    const ZSTD_entropyCTables_t* prevEntropy,
++                          ZSTD_entropyCTables_t* nextEntropy,
++                    const ZSTD_CCtx_params* cctxParams,
++                          ZSTD_entropyCTablesMetadata_t* entropyMetadata,
++                          void* workspace, size_t wkspSize);
+ 
+ /* *******************************
+ *  Compression internals structs *
+@@ -142,26 +145,33 @@ typedef struct {
+   size_t capacity;      /* The capacity starting from `seq` pointer */
+ } rawSeqStore_t;
+ 
++typedef struct {
++    U32 idx;            /* Index in array of ZSTD_Sequence */
++    U32 posInSequence;  /* Position within sequence at idx */
++    size_t posInSrc;    /* Number of bytes given by sequences provided so far */
++} ZSTD_sequencePosition;
++
+ UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+ 
+ typedef struct {
+-    int price;
+-    U32 off;
+-    U32 mlen;
+-    U32 litlen;
+-    U32 rep[ZSTD_REP_NUM];
++    int price;  /* price from beginning of segment to this position */
++    U32 off;    /* offset of previous match */
++    U32 mlen;   /* length of previous match */
++    U32 litlen; /* nb of literals since previous match */
++    U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
+ } ZSTD_optimal_t;
+ 
+ typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+ 
++#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
+ typedef struct {
+     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+     unsigned* litFreq;           /* table of literals statistics, of size 256 */
+     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+-    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+-    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
++    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
++    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
+ 
+     U32  litSum;                 /* nb of literals */
+     U32  litLengthSum;           /* nb of litLength codes */
+@@ -212,8 +222,10 @@ struct ZSTD_matchState_t {
+     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+ 
+     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+-    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
++    BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
++    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
++    U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
+ 
+     U32* hashTable;
+     U32* hashTable3;
+@@ -228,6 +240,18 @@ struct ZSTD_matchState_t {
+     const ZSTD_matchState_t* dictMatchState;
+     ZSTD_compressionParameters cParams;
+     const rawSeqStore_t* ldmSeqStore;
++
++    /* Controls prefetching in some dictMatchState matchfinders.
++     * This behavior is controlled from the cctx ms.
++     * This parameter has no effect in the cdict ms. */
++    int prefetchCDictTables;
++
++    /* When == 0, lazy match finders insert every position.
++     * When != 0, lazy match finders only insert positions they search.
++     * This allows them to skip much faster over incompressible data,
++     * at a small cost to compression ratio.
++     */
++    int lazySkipping;
+ };
+ 
+ typedef struct {
+@@ -324,6 +348,25 @@ struct ZSTD_CCtx_params_s {
+ 
+     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+     ZSTD_customMem customMem;
++
++    /* Controls prefetching in some dictMatchState matchfinders */
++    ZSTD_paramSwitch_e prefetchCDictTables;
++
++    /* Controls whether zstd will fall back to an internal matchfinder
++     * if the external matchfinder returns an error code. */
++    int enableMatchFinderFallback;
++
++    /* Parameters for the external sequence producer API.
++     * Users set these parameters through ZSTD_registerSequenceProducer().
++     * It is not possible to set these parameters individually through the public API. */
++    void* extSeqProdState;
++    ZSTD_sequenceProducer_F extSeqProdFunc;
++
++    /* Adjust the max block size*/
++    size_t maxBlockSize;
++
++    /* Controls repcode search in external sequence parsing */
++    ZSTD_paramSwitch_e searchForExternalRepcodes;
+ };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+ 
+ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+@@ -404,6 +447,7 @@ struct ZSTD_CCtx_s {
+ 
+     /* Stable in/out buffer verification */
+     ZSTD_inBuffer expectedInBuffer;
++    size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
+     size_t expectedOutBufferSize;
+ 
+     /* Dictionary */
+@@ -417,9 +461,14 @@ struct ZSTD_CCtx_s {
+ 
+     /* Workspace for block splitter */
+     ZSTD_blockSplitCtx blockSplitCtx;
++
++    /* Buffer for output from external sequence producer */
++    ZSTD_Sequence* extSeqBuf;
++    size_t extSeqBufCapacity;
+ };
+ 
+ typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
++typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
+ 
+ typedef enum {
+     ZSTD_noDict = 0,
+@@ -441,7 +490,7 @@ typedef enum {
+                                  * In this mode we take both the source size and the dictionary size
+                                  * into account when selecting and adjusting the parameters.
+                                  */
+-    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
++    ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                  * We don't know what these parameters are for. We default to the legacy
+                                  * behavior of taking both the source size and the dict size into account
+                                  * when selecting and adjusting parameters.
+@@ -500,9 +549,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+ /* ZSTD_noCompressBlock() :
+  * Writes uncompressed block to dst buffer from given src.
+  * Returns the size of the block */
+-MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
++MEM_STATIC size_t
++ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+ {
+     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
++    DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
+     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                     dstSize_tooSmall, "dst buf too small for uncompressed block");
+     MEM_writeLE24(dst, cBlockHeader24);
+@@ -510,7 +561,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
+     return ZSTD_blockHeaderSize + srcSize;
+ }
+ 
+-MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
++MEM_STATIC size_t
++ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+ {
+     BYTE* const op = (BYTE*)dst;
+     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+@@ -529,7 +581,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+ {
+     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+-    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
++    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
+     return (srcSize >> minlog) + 2;
+ }
+ 
+@@ -565,29 +617,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
+     while (ip < iend) *op++ = *ip++;
+ }
+ 
+-#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+-#define STORE_REPCODE_1 STORE_REPCODE(1)
+-#define STORE_REPCODE_2 STORE_REPCODE(2)
+-#define STORE_REPCODE_3 STORE_REPCODE(3)
+-#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
+-#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
+-#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
+-#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
+-#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
+-#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
+-#define STORED_TO_OFFBASE(o) ((o)+1)
+-#define OFFBASE_TO_STORED(o) ((o)-1)
++
++#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
++#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
++#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
++#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
++#define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
++#define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
++#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
++#define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
++#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
+ 
+ /*! ZSTD_storeSeq() :
+- *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
+- *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
++ *  Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
++ *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
+  *  @matchLength : must be >= MINMATCH
+- *  Allowed to overread literals up to litLimit.
++ *  Allowed to over-read literals up to litLimit.
+ */
+ HINT_INLINE UNUSED_ATTR void
+ ZSTD_storeSeq(seqStore_t* seqStorePtr,
+               size_t litLength, const BYTE* literals, const BYTE* litLimit,
+-              U32 offBase_minus1,
++              U32 offBase,
+               size_t matchLength)
+ {
+     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+@@ -596,8 +646,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
+     static const BYTE* g_start = NULL;
+     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+-        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+-               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
++        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
++               pos, (U32)litLength, (U32)matchLength, (U32)offBase);
+     }
+ #endif
+     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+@@ -607,9 +657,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
+     assert(literals + litLength <= litLimit);
+     if (litEnd <= litLimit_w) {
+         /* Common case we can use wildcopy.
+-	 * First copy 16 bytes, because literals are likely short.
+-	 */
+-        assert(WILDCOPY_OVERLENGTH >= 16);
++         * First copy 16 bytes, because literals are likely short.
++         */
++        ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
+         ZSTD_copy16(seqStorePtr->lit, literals);
+         if (litLength > 16) {
+             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+@@ -628,7 +678,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
+     seqStorePtr->sequences[0].litLength = (U16)litLength;
+ 
+     /* match offset */
+-    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
++    seqStorePtr->sequences[0].offBase = offBase;
+ 
+     /* match Length */
+     assert(matchLength >= MINMATCH);
+@@ -646,17 +696,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
+ 
+ /* ZSTD_updateRep() :
+  * updates in-place @rep (array of repeat offsets)
+- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
++ * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
+  */
+ MEM_STATIC void
+-ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
++ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
+ {
+-    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
++    if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
+         rep[2] = rep[1];
+         rep[1] = rep[0];
+-        rep[0] = STORED_OFFSET(offBase_minus1);
++        rep[0] = OFFBASE_TO_OFFSET(offBase);
+     } else {   /* repcode */
+-        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
++        U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
+         if (repCode > 0) {  /* note : if repCode==0, no change */
+             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+@@ -673,11 +723,11 @@ typedef struct repcodes_s {
+ } repcodes_t;
+ 
+ MEM_STATIC repcodes_t
+-ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
++ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
+ {
+     repcodes_t newReps;
+     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+-    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
++    ZSTD_updateRep(newReps.rep, offBase, ll0);
+     return newReps;
+ }
+ 
+@@ -685,59 +735,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
+ /*-*************************************
+ *  Match length counter
+ ***************************************/
+-static unsigned ZSTD_NbCommonBytes (size_t val)
+-{
+-    if (MEM_isLittleEndian()) {
+-        if (MEM_64bits()) {
+-#       if (__GNUC__ >= 4)
+-            return (__builtin_ctzll((U64)val) >> 3);
+-#       else
+-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+-                                                     0, 3, 1, 3, 1, 4, 2, 7,
+-                                                     0, 2, 3, 6, 1, 5, 3, 5,
+-                                                     1, 3, 4, 4, 2, 5, 6, 7,
+-                                                     7, 0, 1, 2, 3, 3, 4, 6,
+-                                                     2, 6, 5, 5, 3, 4, 5, 6,
+-                                                     7, 1, 2, 4, 6, 4, 4, 5,
+-                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+-#       endif
+-        } else { /* 32 bits */
+-#       if (__GNUC__ >= 3)
+-            return (__builtin_ctz((U32)val) >> 3);
+-#       else
+-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+-                                                     3, 2, 2, 1, 3, 2, 0, 1,
+-                                                     3, 3, 1, 2, 2, 2, 2, 0,
+-                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+-#       endif
+-        }
+-    } else {  /* Big Endian CPU */
+-        if (MEM_64bits()) {
+-#       if (__GNUC__ >= 4)
+-            return (__builtin_clzll(val) >> 3);
+-#       else
+-            unsigned r;
+-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+-            r += (!val);
+-            return r;
+-#       endif
+-        } else { /* 32 bits */
+-#       if (__GNUC__ >= 3)
+-            return (__builtin_clz((U32)val) >> 3);
+-#       else
+-            unsigned r;
+-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+-            r += (!val);
+-            return r;
+-#       endif
+-    }   }
+-}
+-
+-
+ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+ {
+     const BYTE* const pStart = pIn;
+@@ -783,32 +780,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+  *  Hashes
+  ***************************************/
+ static const U32 prime3bytes = 506832829U;
+-static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
++static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
++MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
++MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
+ 
+ static const U32 prime4bytes = 2654435761U;
+-static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+-static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
++static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
++static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
++static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
+ 
+ static const U64 prime5bytes = 889523592379ULL;
+-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
++static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
++static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
++static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
+ 
+ static const U64 prime6bytes = 227718039650203ULL;
+-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
++static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
++static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
++static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
+ 
+ static const U64 prime7bytes = 58295818150454627ULL;
+-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
++static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
++static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
++static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
+ 
+ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
++static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
++static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
++static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
++
+ 
+ MEM_STATIC FORCE_INLINE_ATTR
+ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+ {
++    /* Although some of these hashes do support hBits up to 64, some do not.
++     * To be on the safe side, always avoid hBits > 32. */
++    assert(hBits <= 32);
++
+     switch(mls)
+     {
+     default:
+@@ -820,6 +828,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+     }
+ }
+ 
++MEM_STATIC FORCE_INLINE_ATTR
++size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
++    /* Although some of these hashes do support hBits up to 64, some do not.
++     * To be on the safe side, always avoid hBits > 32. */
++    assert(hBits <= 32);
++
++    switch(mls)
++    {
++        default:
++        case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
++        case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
++        case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
++        case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
++        case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
++    }
++}
++
++
+ /* ZSTD_ipow() :
+  * Return base^exponent.
+  */
+@@ -1011,7 +1037,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+  * The least significant cycleLog bits of the indices must remain the same,
+  * which may be 0. Every index up to maxDist in the past must be valid.
+  */
+-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
++MEM_STATIC
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                            U32 maxDist, void const* src)
+ {
+     /* preemptive overflow correction:
+@@ -1167,10 +1195,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+         assert(blockEndIdx >= loadedDictEnd);
+ 
+-        if (blockEndIdx > loadedDictEnd + maxDist) {
++        if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
+             /* On reaching window size, dictionaries are invalidated.
+              * For simplification, if window size is reached anywhere within next block,
+              * the dictionary is invalidated for the full block.
++             *
++             * We also have to invalidate the dictionary if ZSTD_window_update() has detected
++             * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
++             * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
++             * dictMatchState, so setting it to NULL is not a problem.
+              */
+             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+             *loadedDictEndPtr = 0;
+@@ -1199,7 +1232,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+  * forget about the extDict. Handles overlap of the prefix and extDict.
+  * Returns non-zero if the segment is contiguous.
+  */
+-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
++MEM_STATIC
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_window_update(ZSTD_window_t* window,
+                                   void const* src, size_t srcSize,
+                                   int forceNonContiguous)
+ {
+@@ -1302,6 +1337,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+ 
+ #endif
+ 
++/* Short Cache */
++
++/* Normally, zstd matchfinders follow this flow:
++ *     1. Compute hash at ip
++ *     2. Load index from hashTable[hash]
++ *     3. Check if *ip == *(base + index)
++ * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
++ *
++ * Short cache is an optimization which allows us to avoid step 3 most of the time
++ * when the data doesn't actually match. With short cache, the flow becomes:
++ *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
++ *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
++ *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
++ *
++ * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
++ * dictMatchState matchfinders.
++ */
++#define ZSTD_SHORT_CACHE_TAG_BITS 8
++#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
++
++/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
++ * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
++MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
++    size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
++    U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
++    assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
++    hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
++}
++
++/* Helper function for short cache matchfinders.
++ * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
++MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
++    U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
++    U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
++    return tag1 == tag2;
++}
+ 
+ 
+ /* ===============================================================
+@@ -1381,11 +1452,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+  * This cannot be used when long range matching is enabled.
+  * Zstd will use these sequences, and pass the literals to a secondary block
+  * compressor.
+- * @return : An error code on failure.
+  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+  * access and data corruption.
+  */
+-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
++void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+ 
+ /* ZSTD_cycleLog() :
+  *  condition for correct operation : hashLog > 1 */
+@@ -1396,4 +1466,55 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+  */
+ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+ 
++/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
++ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
++ * Note that the block delimiter must include the last literals of the block.
++ */
++size_t
++ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
++                                              ZSTD_sequencePosition* seqPos,
++                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
++                                        const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
++
++/* Returns the number of bytes to move the current read position back by.
++ * Only non-zero if we ended up splitting a sequence.
++ * Otherwise, it may return a ZSTD error if something went wrong.
++ *
++ * This function will attempt to scan through blockSize bytes
++ * represented by the sequences in @inSeqs,
++ * storing any (partial) sequences.
++ *
++ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
++ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
++ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
++ */
++size_t
++ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
++                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
++                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
++
++/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
++MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
++    return params->extSeqProdFunc != NULL;
++}
++
++/* ===============================================================
++ * Deprecated definitions that are still used internally to avoid
++ * deprecation warnings. These functions are exactly equivalent to
++ * their public variants, but avoid the deprecation warnings.
++ * =============================================================== */
++
++size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
++
++size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
++                                    void* dst, size_t dstCapacity,
++                              const void* src, size_t srcSize);
++
++size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
++                               void* dst, size_t dstCapacity,
++                         const void* src, size_t srcSize);
++
++size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
++
++
+ #endif /* ZSTD_COMPRESS_H */
+diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
+index 52b0a8059aba..3e9ea46a670a 100644
+--- a/lib/zstd/compress/zstd_compress_literals.c
++++ b/lib/zstd/compress/zstd_compress_literals.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -13,11 +14,36 @@
+  ***************************************/
+ #include "zstd_compress_literals.h"
+ 
++
++/* **************************************************************
++*  Debug Traces
++****************************************************************/
++#if DEBUGLEVEL >= 2
++
++static size_t showHexa(const void* src, size_t srcSize)
++{
++    const BYTE* const ip = (const BYTE*)src;
++    size_t u;
++    for (u=0; u<srcSize; u++) {
++        RAWLOG(5, " %02X", ip[u]); (void)ip;
++    }
++    RAWLOG(5, " \n");
++    return srcSize;
++}
++
++#endif
++
++
++/* **************************************************************
++*  Literals compression - special cases
++****************************************************************/
+ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+ {
+     BYTE* const ostart = (BYTE*)dst;
+     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+ 
++    DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
++
+     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+ 
+     switch(flSize)
+@@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
+     }
+ 
+     ZSTD_memcpy(ostart + flSize, src, srcSize);
+-    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
++    DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+     return srcSize + flSize;
+ }
+ 
++static int allBytesIdentical(const void* src, size_t srcSize)
++{
++    assert(srcSize >= 1);
++    assert(src != NULL);
++    {   const BYTE b = ((const BYTE*)src)[0];
++        size_t p;
++        for (p=1; p<srcSize; p++) {
++            if (((const BYTE*)src)[p] != b) return 0;
++        }
++        return 1;
++    }
++}
++
+ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+ {
+     BYTE* const ostart = (BYTE*)dst;
+     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+ 
+-    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
++    assert(dstCapacity >= 4); (void)dstCapacity;
++    assert(allBytesIdentical(src, srcSize));
+ 
+     switch(flSize)
+     {
+@@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
+     }
+ 
+     ostart[flSize] = *(const BYTE*)src;
+-    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
++    DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
+     return flSize+1;
+ }
+ 
+-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+-                              ZSTD_hufCTables_t* nextHuf,
+-                              ZSTD_strategy strategy, int disableLiteralCompression,
+-                              void* dst, size_t dstCapacity,
+-                        const void* src, size_t srcSize,
+-                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+-                        const int bmi2,
+-                        unsigned suspectUncompressible)
++/* ZSTD_minLiteralsToCompress() :
++ * returns minimal amount of literals
++ * for literal compression to even be attempted.
++ * Minimum is made tighter as compression strategy increases.
++ */
++static size_t
++ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
++{
++    assert((int)strategy >= 0);
++    assert((int)strategy <= 9);
++    /* btultra2 : min 8 bytes;
++     * then 2x larger for each successive compression strategy
++     * max threshold 64 bytes */
++    {   int const shift = MIN(9-(int)strategy, 3);
++        size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
++        DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
++        return mintc;
++    }
++}
++
++size_t ZSTD_compressLiterals (
++                  void* dst, size_t dstCapacity,
++            const void* src, size_t srcSize,
++                  void* entropyWorkspace, size_t entropyWorkspaceSize,
++            const ZSTD_hufCTables_t* prevHuf,
++                  ZSTD_hufCTables_t* nextHuf,
++                  ZSTD_strategy strategy,
++                  int disableLiteralCompression,
++                  int suspectUncompressible,
++                  int bmi2)
+ {
+-    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+     BYTE*  const ostart = (BYTE*)dst;
+     U32 singleStream = srcSize < 256;
+     symbolEncodingType_e hType = set_compressed;
+     size_t cLitSize;
+ 
+-    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+-                disableLiteralCompression, (U32)srcSize);
++    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
++                disableLiteralCompression, (U32)srcSize, dstCapacity);
++
++    DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
+ 
+     /* Prepare nextEntropy assuming reusing the existing table */
+     ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+@@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+     if (disableLiteralCompression)
+         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+ 
+-    /* small ? don't even attempt compression (speed opt) */
+-#   define COMPRESS_LITERALS_SIZE_MIN 63
+-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+-        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+-    }
++    /* if too small, don't even attempt compression (speed opt) */
++    if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
++        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+ 
+     RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+     {   HUF_repeat repeat = prevHuf->repeatMode;
+-        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
++        int const flags = 0
++            | (bmi2 ? HUF_flags_bmi2 : 0)
++            | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
++            | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
++            | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
++
++        typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
++        huf_compress_f huf_compress;
+         if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+-        cLitSize = singleStream ?
+-            HUF_compress1X_repeat(
+-                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+-                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
+-            HUF_compress4X_repeat(
+-                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+-                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
++        huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
++        cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
++                                src, srcSize,
++                                HUF_SYMBOLVALUE_MAX, LitHufLog,
++                                entropyWorkspace, entropyWorkspaceSize,
++                                (HUF_CElt*)nextHuf->CTable,
++                                &repeat, flags);
++        DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
+         if (repeat != HUF_repeat_none) {
+             /* reused the existing table */
+-            DEBUGLOG(5, "Reusing previous huffman table");
++            DEBUGLOG(5, "reusing statistics from previous huffman block");
+             hType = set_repeat;
+         }
+     }
+ 
+-    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
+-        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+-        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+-    }
++    {   size_t const minGain = ZSTD_minGain(srcSize, strategy);
++        if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
++            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
++            return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
++    }   }
+     if (cLitSize==1) {
+-        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+-        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+-    }
++        /* A return value of 1 signals that the alphabet consists of a single symbol.
++         * However, in some rare circumstances, it could be the compressed size (a single byte).
++         * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
++         * (it's also necessary to not generate statistics).
++         * Therefore, in such a case, actively check that all bytes are identical. */
++        if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
++            ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
++            return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
++    }   }
+ 
+     if (hType == set_compressed) {
+         /* using a newly constructed table */
+@@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+     switch(lhSize)
+     {
+     case 3: /* 2 - 2 - 10 - 10 */
+-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
++        if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
++        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+             MEM_writeLE24(ostart, lhc);
+             break;
+         }
+     case 4: /* 2 - 2 - 14 - 14 */
++        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
+         {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+             MEM_writeLE32(ostart, lhc);
+             break;
+         }
+     case 5: /* 2 - 2 - 18 - 18 */
++        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
+         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+             MEM_writeLE32(ostart, lhc);
+             ostart[4] = (BYTE)(cLitSize >> 10);
+diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
+index 9775fb97cb70..a2a85d6b69e5 100644
+--- a/lib/zstd/compress/zstd_compress_literals.h
++++ b/lib/zstd/compress/zstd_compress_literals.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -16,16 +17,24 @@
+ 
+ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ 
++/* ZSTD_compressRleLiteralsBlock() :
++ * Conditions :
++ * - All bytes in @src are identical
++ * - dstCapacity >= 4 */
+ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ 
+-/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
+-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+-                              ZSTD_hufCTables_t* nextHuf,
+-                              ZSTD_strategy strategy, int disableLiteralCompression,
+-                              void* dst, size_t dstCapacity,
++/* ZSTD_compressLiterals():
++ * @entropyWorkspace: must be aligned on 4-bytes boundaries
++ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
++ * @suspectUncompressible: sampling checks, to potentially skip huffman coding
++ */
++size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                               void* entropyWorkspace, size_t entropyWorkspaceSize,
+-                        const int bmi2,
+-                        unsigned suspectUncompressible);
++                        const ZSTD_hufCTables_t* prevHuf,
++                              ZSTD_hufCTables_t* nextHuf,
++                              ZSTD_strategy strategy, int disableLiteralCompression,
++                              int suspectUncompressible,
++                              int bmi2);
+ 
+ #endif /* ZSTD_COMPRESS_LITERALS_H */
+diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
+index 21ddc1b37acf..5c028c78d889 100644
+--- a/lib/zstd/compress/zstd_compress_sequences.c
++++ b/lib/zstd/compress/zstd_compress_sequences.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+ {
+     /* Heuristic: This should cover most blocks <= 16K and
+      * start to fade out after 16K to about 32K depending on
+-     * comprssibility.
++     * compressibility.
+      */
+     return nbSeq >= 2048;
+ }
+@@ -166,7 +167,7 @@ ZSTD_selectEncodingType(
+     if (mostFrequent == nbSeq) {
+         *repeatMode = FSE_repeat_none;
+         if (isDefaultAllowed && nbSeq <= 2) {
+-            /* Prefer set_basic over set_rle when there are 2 or less symbols,
++            /* Prefer set_basic over set_rle when there are 2 or fewer symbols,
+              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+              * If basic encoding isn't possible, always choose RLE.
+              */
+diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h
+index 7991364c2f71..7fe6f4ff5cf2 100644
+--- a/lib/zstd/compress/zstd_compress_sequences.h
++++ b/lib/zstd/compress/zstd_compress_sequences.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
+index 17d836cc84e8..41f6521b27cd 100644
+--- a/lib/zstd/compress/zstd_compress_superblock.c
++++ b/lib/zstd/compress/zstd_compress_superblock.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -36,13 +37,14 @@
+  *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+  *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+  *  @return : compressed size of literals section of a sub-block
+- *            Or 0 if it unable to compress.
++ *            Or 0 if unable to compress.
+  *            Or error code */
+-static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+-                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+-                                    const BYTE* literals, size_t litSize,
+-                                    void* dst, size_t dstSize,
+-                                    const int bmi2, int writeEntropy, int* entropyWritten)
++static size_t
++ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
++                              const ZSTD_hufCTablesMetadata_t* hufMetadata,
++                              const BYTE* literals, size_t litSize,
++                              void* dst, size_t dstSize,
++                              const int bmi2, int writeEntropy, int* entropyWritten)
+ {
+     size_t const header = writeEntropy ? 200 : 0;
+     size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+@@ -53,8 +55,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+     symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+     size_t cLitSize = 0;
+ 
+-    (void)bmi2; /* TODO bmi2... */
+-
+     DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+ 
+     *entropyWritten = 0;
+@@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+     }
+ 
+-    /* TODO bmi2 */
+-    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+-                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
++    {   int const flags = bmi2 ? HUF_flags_bmi2 : 0;
++        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
++                                          : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
+         op += cSize;
+         cLitSize += cSize;
+         if (cSize == 0 || ERR_isError(cSize)) {
+@@ -103,7 +103,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+     switch(lhSize)
+     {
+     case 3: /* 2 - 2 - 10 - 10 */
+-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
++        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+             MEM_writeLE24(ostart, lhc);
+             break;
+         }
+@@ -123,26 +123,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+     }
+     *entropyWritten = 1;
+     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+-    return op-ostart;
++    return (size_t)(op-ostart);
+ }
+ 
+-static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+-    const seqDef* const sstart = sequences;
+-    const seqDef* const send = sequences + nbSeq;
+-    const seqDef* sp = sstart;
++static size_t
++ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
++                   const seqDef* sequences, size_t nbSeqs,
++                         size_t litSize, int lastSubBlock)
++{
+     size_t matchLengthSum = 0;
+     size_t litLengthSum = 0;
+-    (void)(litLengthSum); /* suppress unused variable warning on some environments */
+-    while (send-sp > 0) {
+-        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
++    size_t n;
++    for (n=0; n<nbSeqs; n++) {
++        const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
+         litLengthSum += seqLen.litLength;
+         matchLengthSum += seqLen.matchLength;
+-        sp++;
+     }
+-    assert(litLengthSum <= litSize);
+-    if (!lastSequence) {
++    DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
++                (unsigned)nbSeqs, (const void*)sequences,
++                (unsigned)litLengthSum, (unsigned)matchLengthSum);
++    if (!lastSubBlock)
+         assert(litLengthSum == litSize);
+-    }
++    else
++        assert(litLengthSum <= litSize);
++    (void)litLengthSum;
+     return matchLengthSum + litSize;
+ }
+ 
+@@ -156,13 +160,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
+  *  @return : compressed size of sequences section of a sub-block
+  *            Or 0 if it is unable to compress
+  *            Or error code. */
+-static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+-                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+-                                              const seqDef* sequences, size_t nbSeq,
+-                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+-                                              const ZSTD_CCtx_params* cctxParams,
+-                                              void* dst, size_t dstCapacity,
+-                                              const int bmi2, int writeEntropy, int* entropyWritten)
++static size_t
++ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
++                                const ZSTD_fseCTablesMetadata_t* fseMetadata,
++                                const seqDef* sequences, size_t nbSeq,
++                                const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
++                                const ZSTD_CCtx_params* cctxParams,
++                                void* dst, size_t dstCapacity,
++                                const int bmi2, int writeEntropy, int* entropyWritten)
+ {
+     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+     BYTE* const ostart = (BYTE*)dst;
+@@ -176,14 +181,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
+     /* Sequences Header */
+     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                     dstSize_tooSmall, "");
+-    if (nbSeq < 0x7F)
++    if (nbSeq < 128)
+         *op++ = (BYTE)nbSeq;
+     else if (nbSeq < LONGNBSEQ)
+         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+     else
+         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+     if (nbSeq==0) {
+-        return op - ostart;
++        return (size_t)(op - ostart);
+     }
+ 
+     /* seqHead : flags for FSE encoding type */
+@@ -205,7 +210,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
+     }
+ 
+     {   size_t const bitstreamSize = ZSTD_encodeSequences(
+-                                        op, oend - op,
++                                        op, (size_t)(oend - op),
+                                         fseTables->matchlengthCTable, mlCode,
+                                         fseTables->offcodeCTable, ofCode,
+                                         fseTables->litlengthCTable, llCode,
+@@ -249,7 +254,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
+ #endif
+ 
+     *entropyWritten = 1;
+-    return op - ostart;
++    return (size_t)(op - ostart);
+ }
+ 
+ /* ZSTD_compressSubBlock() :
+@@ -275,7 +280,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                         &entropyMetadata->hufMetadata, literals, litSize,
+-                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
++                                                        op, (size_t)(oend-op),
++                                                        bmi2, writeLitEntropy, litEntropyWritten);
+         FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+         if (cLitSize == 0) return 0;
+         op += cLitSize;
+@@ -285,18 +291,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                                   sequences, nbSeq,
+                                                   llCode, mlCode, ofCode,
+                                                   cctxParams,
+-                                                  op, oend-op,
++                                                  op, (size_t)(oend-op),
+                                                   bmi2, writeSeqEntropy, seqEntropyWritten);
+         FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+         if (cSeqSize == 0) return 0;
+         op += cSeqSize;
+     }
+     /* Write block header */
+-    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
++    {   size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
+         U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+         MEM_writeLE24(ostart, cBlockHeader24);
+     }
+-    return op-ostart;
++    return (size_t)(op-ostart);
+ }
+ 
+ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+@@ -385,7 +391,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+     return cSeqSizeEstimate + sequencesSectionHeaderSize;
+ }
+ 
+-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
++typedef struct {
++    size_t estLitSize;
++    size_t estBlockSize;
++} EstimatedBlockSize;
++static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                         const BYTE* ofCodeTable,
+                                         const BYTE* llCodeTable,
+                                         const BYTE* mlCodeTable,
+@@ -393,15 +403,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                         const ZSTD_entropyCTables_t* entropy,
+                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                         void* workspace, size_t wkspSize,
+-                                        int writeLitEntropy, int writeSeqEntropy) {
+-    size_t cSizeEstimate = 0;
+-    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+-                                                         workspace, wkspSize, writeLitEntropy);
+-    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
++                                        int writeLitEntropy, int writeSeqEntropy)
++{
++    EstimatedBlockSize ebs;
++    ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
++                                                        &entropy->huf, &entropyMetadata->hufMetadata,
++                                                        workspace, wkspSize, writeLitEntropy);
++    ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                          workspace, wkspSize, writeSeqEntropy);
+-    return cSizeEstimate + ZSTD_blockHeaderSize;
++    ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
++    return ebs;
+ }
+ 
+ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+@@ -415,13 +427,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
+     return 0;
+ }
+ 
++static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
++{
++    size_t n, total = 0;
++    assert(sp != NULL);
++    for (n=0; n<seqCount; n++) {
++        total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
++    }
++    DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
++    return total;
++}
++
++#define BYTESCALE 256
++
++static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
++                size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
++                int firstSubBlock)
++{
++    size_t n, budget = 0, inSize=0;
++    /* entropy headers */
++    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
++    assert(firstSubBlock==0 || firstSubBlock==1);
++    budget += headerSize;
++
++    /* first sequence => at least one sequence*/
++    budget += sp[0].litLength * avgLitCost + avgSeqCost;
++    if (budget > targetBudget) return 1;
++    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
++
++    /* loop over sequences */
++    for (n=1; n<nbSeqs; n++) {
++        size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
++        budget += currentCost;
++        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
++        /* stop when sub-block budget is reached */
++        if ( (budget > targetBudget)
++            /* though continue to expand until the sub-block is deemed compressible */
++          && (budget < inSize * BYTESCALE) )
++            break;
++    }
++
++    return n;
++}
++
+ /* ZSTD_compressSubBlock_multi() :
+  *  Breaks super-block into multiple sub-blocks and compresses them.
+- *  Entropy will be written to the first block.
+- *  The following blocks will use repeat mode to compress.
+- *  All sub-blocks are compressed blocks (no raw or rle blocks).
+- *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+- *            Or 0 if it failed to compress. */
++ *  Entropy will be written into the first block.
++ *  The following blocks use repeat_mode to compress.
++ *  Sub-blocks are all compressed, except the last one when beneficial.
++ *  @return : compressed size of the super block (which features multiple ZSTD blocks)
++ *            or 0 if it failed to compress. */
+ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                             const ZSTD_compressedBlockState_t* prevCBlock,
+                             ZSTD_compressedBlockState_t* nextCBlock,
+@@ -434,10 +489,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+ {
+     const seqDef* const sstart = seqStorePtr->sequencesStart;
+     const seqDef* const send = seqStorePtr->sequences;
+-    const seqDef* sp = sstart;
++    const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
++    size_t const nbSeqs = (size_t)(send - sstart);
+     const BYTE* const lstart = seqStorePtr->litStart;
+     const BYTE* const lend = seqStorePtr->lit;
+     const BYTE* lp = lstart;
++    size_t const nbLiterals = (size_t)(lend - lstart);
+     BYTE const* ip = (BYTE const*)src;
+     BYTE const* const iend = ip + srcSize;
+     BYTE* const ostart = (BYTE*)dst;
+@@ -446,112 +503,171 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+     const BYTE* llCodePtr = seqStorePtr->llCode;
+     const BYTE* mlCodePtr = seqStorePtr->mlCode;
+     const BYTE* ofCodePtr = seqStorePtr->ofCode;
+-    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+-    size_t litSize, seqCount;
+-    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
++    size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
++    size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
++    int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
+     int writeSeqEntropy = 1;
+-    int lastSequence = 0;
+-
+-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+-                (unsigned)(lend-lp), (unsigned)(send-sstart));
+-
+-    litSize = 0;
+-    seqCount = 0;
+-    do {
+-        size_t cBlockSizeEstimate = 0;
+-        if (sstart == send) {
+-            lastSequence = 1;
+-        } else {
+-            const seqDef* const sequence = sp + seqCount;
+-            lastSequence = sequence == send - 1;
+-            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+-            seqCount++;
+-        }
+-        if (lastSequence) {
+-            assert(lp <= lend);
+-            assert(litSize <= (size_t)(lend - lp));
+-            litSize = (size_t)(lend - lp);
++
++    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
++               (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
++
++        /* let's start by a general estimation for the full block */
++    if (nbSeqs > 0) {
++        EstimatedBlockSize const ebs =
++                ZSTD_estimateSubBlockSize(lp, nbLiterals,
++                                        ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
++                                        &nextCBlock->entropy, entropyMetadata,
++                                        workspace, wkspSize,
++                                        writeLitEntropy, writeSeqEntropy);
++        /* quick estimation */
++        size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
++        size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
++        const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
++        size_t n, avgBlockBudget, blockBudgetSupp=0;
++        avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
++        DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
++                    (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
++                    (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
++        /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
++         * this will result in the production of a single uncompressed block covering @srcSize.*/
++        if (ebs.estBlockSize > srcSize) return 0;
++
++        /* compress and write sub-blocks */
++        assert(nbSubBlocks>0);
++        for (n=0; n < nbSubBlocks-1; n++) {
++            /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
++            size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
++                                        avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
++            /* if reached last sequence : break to last sub-block (simplification) */
++            assert(seqCount <= (size_t)(send-sp));
++            if (sp + seqCount == send) break;
++            assert(seqCount > 0);
++            /* compress sub-block */
++            {   int litEntropyWritten = 0;
++                int seqEntropyWritten = 0;
++                size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
++                const size_t decompressedSize =
++                        ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
++                size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
++                                                sp, seqCount,
++                                                lp, litSize,
++                                                llCodePtr, mlCodePtr, ofCodePtr,
++                                                cctxParams,
++                                                op, (size_t)(oend-op),
++                                                bmi2, writeLitEntropy, writeSeqEntropy,
++                                                &litEntropyWritten, &seqEntropyWritten,
++                                                0);
++                FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
++
++                /* check compressibility, update state components */
++                if (cSize > 0 && cSize < decompressedSize) {
++                    DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
++                                (unsigned)decompressedSize, (unsigned)cSize);
++                    assert(ip + decompressedSize <= iend);
++                    ip += decompressedSize;
++                    lp += litSize;
++                    op += cSize;
++                    llCodePtr += seqCount;
++                    mlCodePtr += seqCount;
++                    ofCodePtr += seqCount;
++                    /* Entropy only needs to be written once */
++                    if (litEntropyWritten) {
++                        writeLitEntropy = 0;
++                    }
++                    if (seqEntropyWritten) {
++                        writeSeqEntropy = 0;
++                    }
++                    sp += seqCount;
++                    blockBudgetSupp = 0;
++            }   }
++            /* otherwise : do not compress yet, coalesce current sub-block with following one */
+         }
+-        /* I think there is an optimization opportunity here.
+-         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+-         * since it recalculates estimate from scratch.
+-         * For example, it would recount literal distribution and symbol codes every time.
+-         */
+-        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+-                                                       &nextCBlock->entropy, entropyMetadata,
+-                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+-        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+-            int litEntropyWritten = 0;
+-            int seqEntropyWritten = 0;
+-            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+-            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+-                                                       sp, seqCount,
+-                                                       lp, litSize,
+-                                                       llCodePtr, mlCodePtr, ofCodePtr,
+-                                                       cctxParams,
+-                                                       op, oend-op,
+-                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+-                                                       &litEntropyWritten, &seqEntropyWritten,
+-                                                       lastBlock && lastSequence);
+-            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+-            if (cSize > 0 && cSize < decompressedSize) {
+-                DEBUGLOG(5, "Committed the sub-block");
+-                assert(ip + decompressedSize <= iend);
+-                ip += decompressedSize;
+-                sp += seqCount;
+-                lp += litSize;
+-                op += cSize;
+-                llCodePtr += seqCount;
+-                mlCodePtr += seqCount;
+-                ofCodePtr += seqCount;
+-                litSize = 0;
+-                seqCount = 0;
+-                /* Entropy only needs to be written once */
+-                if (litEntropyWritten) {
+-                    writeLitEntropy = 0;
+-                }
+-                if (seqEntropyWritten) {
+-                    writeSeqEntropy = 0;
+-                }
++    } /* if (nbSeqs > 0) */
++
++    /* write last block */
++    DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
++    {   int litEntropyWritten = 0;
++        int seqEntropyWritten = 0;
++        size_t litSize = (size_t)(lend - lp);
++        size_t seqCount = (size_t)(send - sp);
++        const size_t decompressedSize =
++                ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
++        size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
++                                            sp, seqCount,
++                                            lp, litSize,
++                                            llCodePtr, mlCodePtr, ofCodePtr,
++                                            cctxParams,
++                                            op, (size_t)(oend-op),
++                                            bmi2, writeLitEntropy, writeSeqEntropy,
++                                            &litEntropyWritten, &seqEntropyWritten,
++                                            lastBlock);
++        FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
++
++        /* update pointers, the nb of literals borrowed from next sequence must be preserved */
++        if (cSize > 0 && cSize < decompressedSize) {
++            DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
++                        (unsigned)decompressedSize, (unsigned)cSize);
++            assert(ip + decompressedSize <= iend);
++            ip += decompressedSize;
++            lp += litSize;
++            op += cSize;
++            llCodePtr += seqCount;
++            mlCodePtr += seqCount;
++            ofCodePtr += seqCount;
++            /* Entropy only needs to be written once */
++            if (litEntropyWritten) {
++                writeLitEntropy = 0;
+             }
++            if (seqEntropyWritten) {
++                writeSeqEntropy = 0;
++            }
++            sp += seqCount;
+         }
+-    } while (!lastSequence);
++    }
++
++
+     if (writeLitEntropy) {
+-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
++        DEBUGLOG(5, "Literal entropy tables were never written");
+         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+     }
+     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+         /* If we haven't written our entropy tables, then we've violated our contract and
+          * must emit an uncompressed block.
+          */
+-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
++        DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
+         return 0;
+     }
++
+     if (ip < iend) {
+-        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
++        /* some data left : last part of the block sent uncompressed */
++        size_t const rSize = (size_t)((iend - ip));
++        size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
++        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
+         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+         assert(cSize != 0);
+         op += cSize;
+         /* We have to regenerate the repcodes because we've skipped some sequences */
+         if (sp < send) {
+-            seqDef const* seq;
++            const seqDef* seq;
+             repcodes_t rep;
+             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+             for (seq = sstart; seq < sp; ++seq) {
+-                ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
++                ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+             }
+             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+         }
+     }
+-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+-    return op-ostart;
++
++    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
++                (unsigned)(op-ostart));
++    return (size_t)(op-ostart);
+ }
+ 
+ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                                void* dst, size_t dstCapacity,
+-                               void const* src, size_t srcSize,
+-                               unsigned lastBlock) {
++                               const void* src, size_t srcSize,
++                               unsigned lastBlock)
++{
+     ZSTD_entropyCTablesMetadata_t entropyMetadata;
+ 
+     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
+diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h
+index 224ece79546e..826bbc9e029b 100644
+--- a/lib/zstd/compress/zstd_compress_superblock.h
++++ b/lib/zstd/compress/zstd_compress_superblock.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
+index 349fc923c355..86bc3c2c23c7 100644
+--- a/lib/zstd/compress/zstd_cwksp.h
++++ b/lib/zstd/compress/zstd_cwksp.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -14,7 +15,9 @@
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
++#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
+ #include "../common/zstd_internal.h"
++#include "../common/portability_macros.h"
+ 
+ 
+ /*-*************************************
+@@ -41,8 +44,9 @@
+ ***************************************/
+ typedef enum {
+     ZSTD_cwksp_alloc_objects,
+-    ZSTD_cwksp_alloc_buffers,
+-    ZSTD_cwksp_alloc_aligned
++    ZSTD_cwksp_alloc_aligned_init_once,
++    ZSTD_cwksp_alloc_aligned,
++    ZSTD_cwksp_alloc_buffers
+ } ZSTD_cwksp_alloc_phase_e;
+ 
+ /*
+@@ -95,8 +99,8 @@ typedef enum {
+  *
+  * Workspace Layout:
+  *
+- * [                        ... workspace ...                         ]
+- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
++ * [                        ... workspace ...                           ]
++ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
+  *
+  * The various objects that live in the workspace are divided into the
+  * following categories, and are allocated separately:
+@@ -120,9 +124,18 @@ typedef enum {
+  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+  *   Their sizes depend on the cparams. These tables are 64-byte aligned.
+  *
+- * - Aligned: these buffers are used for various purposes that require 4 byte
+- *   alignment, but don't require any initialization before they're used. These
+- *   buffers are each aligned to 64 bytes.
++ * - Init once: these buffers require to be initialized at least once before
++ *   use. They should be used when we want to skip memory initialization
++ *   while not triggering memory checkers (like Valgrind) when reading from
++ *   from this memory without writing to it first.
++ *   These buffers should be used carefully as they might contain data
++ *   from previous compressions.
++ *   Buffers are aligned to 64 bytes.
++ *
++ * - Aligned: these buffers don't require any initialization before they're
++ *   used. The user of the buffer should make sure they write into a buffer
++ *   location before reading from it.
++ *   Buffers are aligned to 64 bytes.
+  *
+  * - Buffers: these buffers are used for various purposes that don't require
+  *   any alignment or initialization before they're used. This means they can
+@@ -134,8 +147,9 @@ typedef enum {
+  * correctly packed into the workspace buffer. That order is:
+  *
+  * 1. Objects
+- * 2. Buffers
+- * 3. Aligned/Tables
++ * 2. Init once / Tables
++ * 3. Aligned / Tables
++ * 4. Buffers / Tables
+  *
+  * Attempts to reserve objects of different types out of order will fail.
+  */
+@@ -147,6 +161,7 @@ typedef struct {
+     void* tableEnd;
+     void* tableValidEnd;
+     void* allocStart;
++    void* initOnceStart;
+ 
+     BYTE allocFailed;
+     int workspaceOversizedDuration;
+@@ -159,6 +174,7 @@ typedef struct {
+ ***************************************/
+ 
+ MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
++MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
+ 
+ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+     (void)ws;
+@@ -168,6 +184,8 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+     assert(ws->tableEnd <= ws->allocStart);
+     assert(ws->tableValidEnd <= ws->allocStart);
+     assert(ws->allocStart <= ws->workspaceEnd);
++    assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
++    assert(ws->workspace <= ws->initOnceStart);
+ }
+ 
+ /*
+@@ -210,14 +228,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+  * for internal purposes (currently only alignment).
+  */
+ MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+-    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+-     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+-     * to align the beginning of the aligned section.
+-     *
+-     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+-     * aligneds being sized in multiples of 64 bytes.
++    /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
++     * bytes to align the beginning of tables section and end of buffers;
+      */
+-    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
++    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
+     return slackSpace;
+ }
+ 
+@@ -230,10 +244,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
+     size_t const alignBytesMask = alignBytes - 1;
+     size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+     assert((alignBytes & alignBytesMask) == 0);
+-    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
++    assert(bytes < alignBytes);
+     return bytes;
+ }
+ 
++/*
++ * Returns the initial value for allocStart which is used to determine the position from
++ * which we can allocate from the end of the workspace.
++ */
++MEM_STATIC void*  ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
++    return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
++}
++
+ /*
+  * Internal function. Do not use directly.
+  * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
+@@ -274,27 +296,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
+ {
+     assert(phase >= ws->phase);
+     if (phase > ws->phase) {
+-        /* Going from allocating objects to allocating buffers */
+-        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+-                phase >= ZSTD_cwksp_alloc_buffers) {
++        /* Going from allocating objects to allocating initOnce / tables */
++        if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
++            phase >= ZSTD_cwksp_alloc_aligned_init_once) {
+             ws->tableValidEnd = ws->objectEnd;
+-        }
++            ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
+ 
+-        /* Going from allocating buffers to allocating aligneds/tables */
+-        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+-                phase >= ZSTD_cwksp_alloc_aligned) {
+-            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+-                size_t const bytesToAlign =
+-                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+-                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+-                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+-                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+-                                memory_allocation, "aligned phase - alignment initial allocation failed!");
+-            }
+             {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+-                void* const alloc = ws->objectEnd;
++                void *const alloc = ws->objectEnd;
+                 size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+-                void* const objectEnd = (BYTE*)alloc + bytesToAlign;
++                void *const objectEnd = (BYTE *) alloc + bytesToAlign;
+                 DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                 RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
+                                 "table phase - alignment initial allocation failed!");
+@@ -302,7 +313,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
+                 ws->tableEnd = objectEnd;  /* table area starts being empty */
+                 if (ws->tableValidEnd < ws->tableEnd) {
+                     ws->tableValidEnd = ws->tableEnd;
+-        }   }   }
++                }
++            }
++        }
+         ws->phase = phase;
+         ZSTD_cwksp_assert_internal_consistency(ws);
+     }
+@@ -314,7 +327,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
+  */
+ MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
+ {
+-    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
++    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
+ }
+ 
+ /*
+@@ -343,6 +356,33 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
+     return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+ }
+ 
++/*
++ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
++ * This memory has been initialized at least once in the past.
++ * This doesn't mean it has been initialized this time, and it might contain data from previous
++ * operations.
++ * The main usage is for algorithms that might need read access into uninitialized memory.
++ * The algorithm must maintain safety under these conditions and must make sure it doesn't
++ * leak any of the past data (directly or in side channels).
++ */
++MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
++{
++    size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
++    void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
++    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
++    if(ptr && ptr < ws->initOnceStart) {
++        /* We assume the memory following the current allocation is either:
++         * 1. Not usable as initOnce memory (end of workspace)
++         * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
++         * 3. An ASAN redzone, in which case we don't want to write on it
++         * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
++         * Note that we assume here that MSAN and ASAN cannot run in the same time. */
++        ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
++        ws->initOnceStart = ptr;
++    }
++    return ptr;
++}
++
+ /*
+  * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
+  */
+@@ -356,18 +396,22 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
+ 
+ /*
+  * Aligned on 64 bytes. These buffers have the special property that
+- * their values remain constrained, allowing us to re-use them without
++ * their values remain constrained, allowing us to reuse them without
+  * memset()-ing them.
+  */
+ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
+ {
+-    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
++    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
+     void* alloc;
+     void* end;
+     void* top;
+ 
+-    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+-        return NULL;
++    /* We can only start allocating tables after we are done reserving space for objects at the
++     * start of the workspace */
++    if(ws->phase < phase) {
++        if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
++            return NULL;
++        }
+     }
+     alloc = ws->tableEnd;
+     end = (BYTE *)alloc + bytes;
+@@ -451,7 +495,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+     assert(ws->tableValidEnd >= ws->objectEnd);
+     assert(ws->tableValidEnd <= ws->allocStart);
+     if (ws->tableValidEnd < ws->tableEnd) {
+-        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
++        ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
+     }
+     ZSTD_cwksp_mark_tables_clean(ws);
+ }
+@@ -478,14 +522,23 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+ 
+ 
+     ws->tableEnd = ws->objectEnd;
+-    ws->allocStart = ws->workspaceEnd;
++    ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
+     ws->allocFailed = 0;
+-    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+-        ws->phase = ZSTD_cwksp_alloc_buffers;
++    if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
++        ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
+     }
+     ZSTD_cwksp_assert_internal_consistency(ws);
+ }
+ 
++MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
++    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
++}
++
++MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
++    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
++         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
++}
++
+ /*
+  * The provided workspace takes ownership of the buffer [start, start+size).
+  * Any existing values in the workspace are ignored (the previously managed
+@@ -498,6 +551,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
+     ws->workspaceEnd = (BYTE*)start + size;
+     ws->objectEnd = ws->workspace;
+     ws->tableValidEnd = ws->objectEnd;
++    ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
+     ws->phase = ZSTD_cwksp_alloc_objects;
+     ws->isStatic = isStatic;
+     ZSTD_cwksp_clear(ws);
+@@ -529,15 +583,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+     ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
+ }
+ 
+-MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+-    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+-}
+-
+-MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+-    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+-         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+-}
+-
+ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+     return ws->allocFailed;
+ }
+@@ -550,17 +595,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+  * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+  * actual amount of space used.
+  */
+-MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+-                                                        size_t const estimatedSpace, int resizedWorkspace) {
+-    if (resizedWorkspace) {
+-        /* Resized/newly allocated wksp should have exact bounds */
+-        return ZSTD_cwksp_used(ws) == estimatedSpace;
+-    } else {
+-        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+-         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+-         */
+-        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+-    }
++MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
++    /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
++     * the alignment bytes difference between estimation and actual usage */
++    return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
++           ZSTD_cwksp_used(ws) <= estimatedSpace;
+ }
+ 
+ 
+diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
+index 76933dea2624..5ff54f17d92f 100644
+--- a/lib/zstd/compress/zstd_double_fast.c
++++ b/lib/zstd/compress/zstd_double_fast.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -11,8 +12,49 @@
+ #include "zstd_compress_internal.h"
+ #include "zstd_double_fast.h"
+ 
++#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+ 
+-void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
++                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
++{
++    const ZSTD_compressionParameters* const cParams = &ms->cParams;
++    U32* const hashLarge = ms->hashTable;
++    U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
++    U32  const mls = cParams->minMatch;
++    U32* const hashSmall = ms->chainTable;
++    U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
++    const BYTE* const base = ms->window.base;
++    const BYTE* ip = base + ms->nextToUpdate;
++    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
++    const U32 fastHashFillStep = 3;
++
++    /* Always insert every fastHashFillStep position into the hash tables.
++     * Insert the other positions into the large hash table if their entry
++     * is empty.
++     */
++    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
++        U32 const curr = (U32)(ip - base);
++        U32 i;
++        for (i = 0; i < fastHashFillStep; ++i) {
++            size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
++            size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
++            if (i == 0) {
++                ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
++            }
++            if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
++                ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
++            }
++            /* Only load extra positions for ZSTD_dtlm_full */
++            if (dtlm == ZSTD_dtlm_fast)
++                break;
++    }   }
++}
++
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+ {
+     const ZSTD_compressionParameters* const cParams = &ms->cParams;
+@@ -43,11 +85,24 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+             /* Only load extra positions for ZSTD_dtlm_full */
+             if (dtlm == ZSTD_dtlm_fast)
+                 break;
+-    }   }
++        }   }
++}
++
++void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
++                        const void* const end,
++                        ZSTD_dictTableLoadMethod_e dtlm,
++                        ZSTD_tableFillPurpose_e tfp)
++{
++    if (tfp == ZSTD_tfp_forCDict) {
++        ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
++    } else {
++        ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
++    }
+ }
+ 
+ 
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize, U32 const mls /* template */)
+@@ -67,7 +122,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+     const BYTE* const iend = istart + srcSize;
+     const BYTE* const ilimit = iend - HASH_READ_SIZE;
+     U32 offset_1=rep[0], offset_2=rep[1];
+-    U32 offsetSaved = 0;
++    U32 offsetSaved1 = 0, offsetSaved2 = 0;
+ 
+     size_t mLength;
+     U32 offset;
+@@ -100,8 +155,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+         U32 const current = (U32)(ip - base);
+         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+         U32 const maxRep = current - windowLow;
+-        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+-        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
++        if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
++        if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
+     }
+ 
+     /* Outer Loop: one iteration per match found and stored */
+@@ -131,7 +186,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
+                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+                 ip++;
+-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
++                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
+                 goto _match_stored;
+             }
+ 
+@@ -175,9 +230,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+         } while (ip1 <= ilimit);
+ 
+ _cleanup:
++        /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
++         * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
++        offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
++
+         /* save reps for next block */
+-        rep[0] = offset_1 ? offset_1 : offsetSaved;
+-        rep[1] = offset_2 ? offset_2 : offsetSaved;
++        rep[0] = offset_1 ? offset_1 : offsetSaved1;
++        rep[1] = offset_2 ? offset_2 : offsetSaved2;
+ 
+         /* Return the last literals size */
+         return (size_t)(iend - anchor);
+@@ -217,7 +276,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+             hashLong[hl1] = (U32)(ip1 - base);
+         }
+ 
+-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
++        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
+ 
+ _match_stored:
+         /* match found */
+@@ -243,7 +302,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
++                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
+                 ip += rLength;
+                 anchor = ip;
+                 continue;   /* faster when present ... (?) */
+@@ -254,6 +313,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+ 
+ 
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize,
+@@ -275,7 +335,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+     const BYTE* const iend = istart + srcSize;
+     const BYTE* const ilimit = iend - HASH_READ_SIZE;
+     U32 offset_1=rep[0], offset_2=rep[1];
+-    U32 offsetSaved = 0;
+ 
+     const ZSTD_matchState_t* const dms = ms->dictMatchState;
+     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
+@@ -286,8 +345,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+     const BYTE* const dictStart    = dictBase + dictStartIndex;
+     const BYTE* const dictEnd      = dms->window.nextSrc;
+     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
+-    const U32 dictHBitsL           = dictCParams->hashLog;
+-    const U32 dictHBitsS           = dictCParams->chainLog;
++    const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
++    const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
+     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+ 
+     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
+@@ -295,6 +354,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+     /* if a dictionary is attached, it must be within window range */
+     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+ 
++    if (ms->prefetchCDictTables) {
++        size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
++        size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
++        PREFETCH_AREA(dictHashLong, hashTableBytes);
++        PREFETCH_AREA(dictHashSmall, chainTableBytes);
++    }
++
+     /* init */
+     ip += (dictAndPrefixLength == 0);
+ 
+@@ -309,8 +375,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+         U32 offset;
+         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+-        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+-        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
++        size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
++        size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
++        U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
++        U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
++        int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
++        int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
+         U32 const curr = (U32)(ip-base);
+         U32 const matchIndexL = hashLong[h2];
+         U32 matchIndexS = hashSmall[h];
+@@ -328,7 +398,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+             ip++;
+-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
++            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
+             goto _match_stored;
+         }
+ 
+@@ -340,9 +410,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                 goto _match_found;
+             }
+-        } else {
++        } else if (dictTagsMatchL) {
+             /* check dictMatchState long match */
+-            U32 const dictMatchIndexL = dictHashLong[dictHL];
++            U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
+             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+             assert(dictMatchL < dictEnd);
+ 
+@@ -358,9 +428,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+             if (MEM_read32(match) == MEM_read32(ip)) {
+                 goto _search_next_long;
+             }
+-        } else {
++        } else if (dictTagsMatchS) {
+             /* check dictMatchState short match */
+-            U32 const dictMatchIndexS = dictHashSmall[dictHS];
++            U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
+             match = dictBase + dictMatchIndexS;
+             matchIndexS = dictMatchIndexS + dictIndexDelta;
+ 
+@@ -375,10 +445,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+         continue;
+ 
+ _search_next_long:
+-
+         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+-            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
++            size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+             U32 const matchIndexL3 = hashLong[hl3];
++            U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
++            int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
+             const BYTE* matchL3 = base + matchIndexL3;
+             hashLong[hl3] = curr + 1;
+ 
+@@ -391,9 +462,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                     goto _match_found;
+                 }
+-            } else {
++            } else if (dictTagsMatchL3) {
+                 /* check dict long +1 match */
+-                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
++                U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
+                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                 assert(dictMatchL3 < dictEnd);
+                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+@@ -419,7 +490,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+         offset_2 = offset_1;
+         offset_1 = offset;
+ 
+-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
++        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
+ 
+ _match_stored:
+         /* match found */
+@@ -448,7 +519,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
++                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
+                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                     ip += repLength2;
+@@ -461,8 +532,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
+     }   /* while (ip < ilimit) */
+ 
+     /* save reps for next block */
+-    rep[0] = offset_1 ? offset_1 : offsetSaved;
+-    rep[1] = offset_2 ? offset_2 : offsetSaved;
++    rep[0] = offset_1;
++    rep[1] = offset_2;
+ 
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+@@ -527,7 +598,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+ }
+ 
+ 
+-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize,
+         U32 const mls /* template */)
+@@ -585,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+             ip++;
+-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
++            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
+         } else {
+             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+@@ -596,7 +669,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                 offset_2 = offset_1;
+                 offset_1 = offset;
+-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
++                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
+ 
+             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+@@ -621,7 +694,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+                 }
+                 offset_2 = offset_1;
+                 offset_1 = offset;
+-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
++                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
+ 
+             } else {
+                 ip += ((ip-anchor) >> kSearchStrength) + 1;
+@@ -653,7 +726,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
++                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
+                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                     ip += repLength2;
+@@ -694,3 +767,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
+         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
+     }
+ }
++
++#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
+diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h
+index 6822bde65a1d..b7ddc714f13e 100644
+--- a/lib/zstd/compress/zstd_double_fast.h
++++ b/lib/zstd/compress/zstd_double_fast.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -15,8 +16,12 @@
+ #include "../common/mem.h"      /* U32 */
+ #include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
+ 
++#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
++
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+-                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
++                              void const* end, ZSTD_dictTableLoadMethod_e dtlm,
++                              ZSTD_tableFillPurpose_e tfp);
++
+ size_t ZSTD_compressBlock_doubleFast(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+@@ -27,6 +32,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+ 
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
++#else
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
++#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
+ 
+ 
+ #endif /* ZSTD_DOUBLE_FAST_H */
+diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
+index a752e6beab52..b7a63ba4ce56 100644
+--- a/lib/zstd/compress/zstd_fast.c
++++ b/lib/zstd/compress/zstd_fast.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -11,8 +12,46 @@
+ #include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
+ #include "zstd_fast.h"
+ 
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
++                        const void* const end,
++                        ZSTD_dictTableLoadMethod_e dtlm)
++{
++    const ZSTD_compressionParameters* const cParams = &ms->cParams;
++    U32* const hashTable = ms->hashTable;
++    U32  const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
++    U32  const mls = cParams->minMatch;
++    const BYTE* const base = ms->window.base;
++    const BYTE* ip = base + ms->nextToUpdate;
++    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
++    const U32 fastHashFillStep = 3;
+ 
+-void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
++    /* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
++     * Feel free to remove this assert if there's a good reason! */
++    assert(dtlm == ZSTD_dtlm_full);
++
++    /* Always insert every fastHashFillStep position into the hash table.
++     * Insert the other positions if their hash entry is empty.
++     */
++    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
++        U32 const curr = (U32)(ip - base);
++        {   size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
++            ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr);   }
++
++        if (dtlm == ZSTD_dtlm_fast) continue;
++        /* Only load extra positions for ZSTD_dtlm_full */
++        {   U32 p;
++            for (p = 1; p < fastHashFillStep; ++p) {
++                size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
++                if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {  /* not yet filled */
++                    ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
++                }   }   }   }
++}
++
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+                         const void* const end,
+                         ZSTD_dictTableLoadMethod_e dtlm)
+ {
+@@ -25,6 +64,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+     const U32 fastHashFillStep = 3;
+ 
++    /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
++     * Feel free to remove this assert if there's a good reason! */
++    assert(dtlm == ZSTD_dtlm_fast);
++
+     /* Always insert every fastHashFillStep position into the hash table.
+      * Insert the other positions if their hash entry is empty.
+      */
+@@ -42,6 +85,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+     }   }   }   }
+ }
+ 
++void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
++                        const void* const end,
++                        ZSTD_dictTableLoadMethod_e dtlm,
++                        ZSTD_tableFillPurpose_e tfp)
++{
++    if (tfp == ZSTD_tfp_forCDict) {
++        ZSTD_fillHashTableForCDict(ms, end, dtlm);
++    } else {
++        ZSTD_fillHashTableForCCtx(ms, end, dtlm);
++    }
++}
++
+ 
+ /*
+  * If you squint hard enough (and ignore repcodes), the search operation at any
+@@ -89,8 +144,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+  *
+  * This is also the work we do at the beginning to enter the loop initially.
+  */
+-FORCE_INLINE_TEMPLATE size_t
+-ZSTD_compressBlock_fast_noDict_generic(
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_compressBlock_fast_noDict_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize,
+         U32 const mls, U32 const hasStep)
+@@ -117,7 +173,7 @@ ZSTD_compressBlock_fast_noDict_generic(
+ 
+     U32 rep_offset1 = rep[0];
+     U32 rep_offset2 = rep[1];
+-    U32 offsetSaved = 0;
++    U32 offsetSaved1 = 0, offsetSaved2 = 0;
+ 
+     size_t hash0; /* hash for ip0 */
+     size_t hash1; /* hash for ip1 */
+@@ -141,8 +197,8 @@ ZSTD_compressBlock_fast_noDict_generic(
+     {   U32 const curr = (U32)(ip0 - base);
+         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+         U32 const maxRep = curr - windowLow;
+-        if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
+-        if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
++        if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
++        if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
+     }
+ 
+     /* start each op */
+@@ -180,8 +236,14 @@ ZSTD_compressBlock_fast_noDict_generic(
+             mLength = ip0[-1] == match0[-1];
+             ip0 -= mLength;
+             match0 -= mLength;
+-            offcode = STORE_REPCODE_1;
++            offcode = REPCODE1_TO_OFFBASE;
+             mLength += 4;
++
++            /* First write next hash table entry; we've already calculated it.
++             * This write is known to be safe because the ip1 is before the
++             * repcode (ip2). */
++            hashTable[hash1] = (U32)(ip1 - base);
++
+             goto _match;
+         }
+ 
+@@ -195,6 +257,12 @@ ZSTD_compressBlock_fast_noDict_generic(
+         /* check match at ip[0] */
+         if (MEM_read32(ip0) == mval) {
+             /* found a match! */
++
++            /* First write next hash table entry; we've already calculated it.
++             * This write is known to be safe because the ip1 == ip0 + 1, so
++             * we know we will resume searching after ip1 */
++            hashTable[hash1] = (U32)(ip1 - base);
++
+             goto _offset;
+         }
+ 
+@@ -224,6 +292,21 @@ ZSTD_compressBlock_fast_noDict_generic(
+         /* check match at ip[0] */
+         if (MEM_read32(ip0) == mval) {
+             /* found a match! */
++
++            /* first write next hash table entry; we've already calculated it */
++            if (step <= 4) {
++                /* We need to avoid writing an index into the hash table >= the
++                 * position at which we will pick up our searching after we've
++                 * taken this match.
++                 *
++                 * The minimum possible match has length 4, so the earliest ip0
++                 * can be after we take this match will be the current ip0 + 4.
++                 * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
++                 * write this position.
++                 */
++                hashTable[hash1] = (U32)(ip1 - base);
++            }
++
+             goto _offset;
+         }
+ 
+@@ -254,9 +337,24 @@ ZSTD_compressBlock_fast_noDict_generic(
+      * However, it seems to be a meaningful performance hit to try to search
+      * them. So let's not. */
+ 
++    /* When the repcodes are outside of the prefix, we set them to zero before the loop.
++     * When the offsets are still zero, we need to restore them after the block to have a correct
++     * repcode history. If only one offset was invalid, it is easy. The tricky case is when both
++     * offsets were invalid. We need to figure out which offset to refill with.
++     *     - If both offsets are zero they are in the same order.
++     *     - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
++     *     - If only one is zero, we need to decide which offset to restore.
++     *         - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
++     *         - It is impossible for rep_offset2 to be non-zero.
++     *
++     * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
++     * set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
++     */
++    offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
++
+     /* save reps for next block */
+-    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
+-    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
++    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
++    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
+ 
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+@@ -267,7 +365,7 @@ ZSTD_compressBlock_fast_noDict_generic(
+     match0 = base + idx;
+     rep_offset2 = rep_offset1;
+     rep_offset1 = (U32)(ip0-match0);
+-    offcode = STORE_OFFSET(rep_offset1);
++    offcode = OFFSET_TO_OFFBASE(rep_offset1);
+     mLength = 4;
+ 
+     /* Count the backwards match length. */
+@@ -287,11 +385,6 @@ ZSTD_compressBlock_fast_noDict_generic(
+     ip0 += mLength;
+     anchor = ip0;
+ 
+-    /* write next hash table entry */
+-    if (ip1 < ip0) {
+-        hashTable[hash1] = (U32)(ip1 - base);
+-    }
+-
+     /* Fill table and check for immediate repcode. */
+     if (ip0 <= ilimit) {
+         /* Fill Table */
+@@ -306,7 +399,7 @@ ZSTD_compressBlock_fast_noDict_generic(
+                 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
+                 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                 ip0 += rLength;
+-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
++                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
+                 anchor = ip0;
+                 continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+     }   }   }
+@@ -369,6 +462,7 @@ size_t ZSTD_compressBlock_fast(
+ }
+ 
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
+@@ -380,14 +474,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+     const BYTE* const base = ms->window.base;
+     const BYTE* const istart = (const BYTE*)src;
+-    const BYTE* ip = istart;
++    const BYTE* ip0 = istart;
++    const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
+     const BYTE* anchor = istart;
+     const U32   prefixStartIndex = ms->window.dictLimit;
+     const BYTE* const prefixStart = base + prefixStartIndex;
+     const BYTE* const iend = istart + srcSize;
+     const BYTE* const ilimit = iend - HASH_READ_SIZE;
+     U32 offset_1=rep[0], offset_2=rep[1];
+-    U32 offsetSaved = 0;
+ 
+     const ZSTD_matchState_t* const dms = ms->dictMatchState;
+     const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+@@ -397,13 +491,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+     const BYTE* const dictStart    = dictBase + dictStartIndex;
+     const BYTE* const dictEnd      = dms->window.nextSrc;
+     const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+-    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+-    const U32 dictHLog             = dictCParams->hashLog;
++    const U32 dictAndPrefixLength  = (U32)(istart - prefixStart + dictEnd - dictStart);
++    const U32 dictHBits            = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
+ 
+     /* if a dictionary is still attached, it necessarily means that
+      * it is within window size. So we just check it. */
+     const U32 maxDistance = 1U << cParams->windowLog;
+-    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
++    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+     assert(endIndex - prefixStartIndex <= maxDistance);
+     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+ 
+@@ -413,106 +507,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+      * when translating a dict index into a local index */
+     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+ 
++    if (ms->prefetchCDictTables) {
++        size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
++        PREFETCH_AREA(dictHashTable, hashTableBytes);
++    }
++
+     /* init */
+     DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+-    ip += (dictAndPrefixLength == 0);
++    ip0 += (dictAndPrefixLength == 0);
+     /* dictMatchState repCode checks don't currently handle repCode == 0
+      * disabling. */
+     assert(offset_1 <= dictAndPrefixLength);
+     assert(offset_2 <= dictAndPrefixLength);
+ 
+-    /* Main Search Loop */
+-    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
++    /* Outer search loop */
++    assert(stepSize >= 1);
++    while (ip1 <= ilimit) {   /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
+         size_t mLength;
+-        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+-        U32 const curr = (U32)(ip-base);
+-        U32 const matchIndex = hashTable[h];
+-        const BYTE* match = base + matchIndex;
+-        const U32 repIndex = curr + 1 - offset_1;
+-        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+-                               dictBase + (repIndex - dictIndexDelta) :
+-                               base + repIndex;
+-        hashTable[h] = curr;   /* update hash table */
+-
+-        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+-          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+-            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+-            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+-            ip++;
+-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
+-        } else if ( (matchIndex <= prefixStartIndex) ) {
+-            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+-            U32 const dictMatchIndex = dictHashTable[dictHash];
+-            const BYTE* dictMatch = dictBase + dictMatchIndex;
+-            if (dictMatchIndex <= dictStartIndex ||
+-                MEM_read32(dictMatch) != MEM_read32(ip)) {
+-                assert(stepSize >= 1);
+-                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+-                continue;
+-            } else {
+-                /* found a dict match */
+-                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
+-                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+-                while (((ip>anchor) & (dictMatch>dictStart))
+-                     && (ip[-1] == dictMatch[-1])) {
+-                    ip--; dictMatch--; mLength++;
++        size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
++
++        size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
++        U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
++        int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
++
++        U32 matchIndex = hashTable[hash0];
++        U32 curr = (U32)(ip0 - base);
++        size_t step = stepSize;
++        const size_t kStepIncr = 1 << kSearchStrength;
++        const BYTE* nextStep = ip0 + kStepIncr;
++
++        /* Inner search loop */
++        while (1) {
++            const BYTE* match = base + matchIndex;
++            const U32 repIndex = curr + 1 - offset_1;
++            const BYTE* repMatch = (repIndex < prefixStartIndex) ?
++                                   dictBase + (repIndex - dictIndexDelta) :
++                                   base + repIndex;
++            const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
++            size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
++            hashTable[hash0] = curr;   /* update hash table */
++
++            if (((U32) ((prefixStartIndex - 1) - repIndex) >=
++                 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
++                && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
++                const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
++                mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
++                ip0++;
++                ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
++                break;
++            }
++
++            if (dictTagsMatch) {
++                /* Found a possible dict match */
++                const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
++                const BYTE* dictMatch = dictBase + dictMatchIndex;
++                if (dictMatchIndex > dictStartIndex &&
++                    MEM_read32(dictMatch) == MEM_read32(ip0)) {
++                    /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
++                    if (matchIndex <= prefixStartIndex) {
++                        U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
++                        mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
++                        while (((ip0 > anchor) & (dictMatch > dictStart))
++                            && (ip0[-1] == dictMatch[-1])) {
++                            ip0--;
++                            dictMatch--;
++                            mLength++;
++                        } /* catch up */
++                        offset_2 = offset_1;
++                        offset_1 = offset;
++                        ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
++                        break;
++                    }
++                }
++            }
++
++            if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
++                /* found a regular match */
++                U32 const offset = (U32) (ip0 - match);
++                mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
++                while (((ip0 > anchor) & (match > prefixStart))
++                       && (ip0[-1] == match[-1])) {
++                    ip0--;
++                    match--;
++                    mLength++;
+                 } /* catch up */
+                 offset_2 = offset_1;
+                 offset_1 = offset;
+-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
++                ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
++                break;
+             }
+-        } else if (MEM_read32(match) != MEM_read32(ip)) {
+-            /* it's not a match, and we're not going to check the dictionary */
+-            assert(stepSize >= 1);
+-            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+-            continue;
+-        } else {
+-            /* found a regular match */
+-            U32 const offset = (U32)(ip-match);
+-            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+-            while (((ip>anchor) & (match>prefixStart))
+-                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+-            offset_2 = offset_1;
+-            offset_1 = offset;
+-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
+-        }
++
++            /* Prepare for next iteration */
++            dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
++            dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
++            matchIndex = hashTable[hash1];
++
++            if (ip1 >= nextStep) {
++                step++;
++                nextStep += kStepIncr;
++            }
++            ip0 = ip1;
++            ip1 = ip1 + step;
++            if (ip1 > ilimit) goto _cleanup;
++
++            curr = (U32)(ip0 - base);
++            hash0 = hash1;
++        }   /* end inner search loop */
+ 
+         /* match found */
+-        ip += mLength;
+-        anchor = ip;
++        assert(mLength);
++        ip0 += mLength;
++        anchor = ip0;
+ 
+-        if (ip <= ilimit) {
++        if (ip0 <= ilimit) {
+             /* Fill Table */
+             assert(base+curr+2 > istart);  /* check base overflow */
+             hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
+-            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
++            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+ 
+             /* check immediate repcode */
+-            while (ip <= ilimit) {
+-                U32 const current2 = (U32)(ip-base);
++            while (ip0 <= ilimit) {
++                U32 const current2 = (U32)(ip0-base);
+                 U32 const repIndex2 = current2 - offset_2;
+                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                         dictBase - dictIndexDelta + repIndex2 :
+                         base + repIndex2;
+                 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+-                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
++                   && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
+                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
++                    size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
+-                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+-                    ip += repLength2;
+-                    anchor = ip;
++                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
++                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
++                    ip0 += repLength2;
++                    anchor = ip0;
+                     continue;
+                 }
+                 break;
+             }
+         }
++
++        /* Prepare for next iteration */
++        assert(ip0 == anchor);
++        ip1 = ip0 + stepSize;
+     }
+ 
++_cleanup:
+     /* save reps for next block */
+-    rep[0] = offset_1 ? offset_1 : offsetSaved;
+-    rep[1] = offset_2 ? offset_2 : offsetSaved;
++    rep[0] = offset_1;
++    rep[1] = offset_2;
+ 
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+@@ -545,7 +688,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
+ }
+ 
+ 
+-static size_t ZSTD_compressBlock_fast_extDict_generic(
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_compressBlock_fast_extDict_generic(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
+ {
+@@ -553,11 +698,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
+     U32* const hashTable = ms->hashTable;
+     U32 const hlog = cParams->hashLog;
+     /* support stepSize of 0 */
+-    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
++    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+     const BYTE* const base = ms->window.base;
+     const BYTE* const dictBase = ms->window.dictBase;
+     const BYTE* const istart = (const BYTE*)src;
+-    const BYTE* ip = istart;
+     const BYTE* anchor = istart;
+     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+@@ -570,6 +714,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
+     const BYTE* const iend = istart + srcSize;
+     const BYTE* const ilimit = iend - 8;
+     U32 offset_1=rep[0], offset_2=rep[1];
++    U32 offsetSaved1 = 0, offsetSaved2 = 0;
++
++    const BYTE* ip0 = istart;
++    const BYTE* ip1;
++    const BYTE* ip2;
++    const BYTE* ip3;
++    U32 current0;
++
++
++    size_t hash0; /* hash for ip0 */
++    size_t hash1; /* hash for ip1 */
++    U32 idx; /* match idx for ip0 */
++    const BYTE* idxBase; /* base pointer for idx */
++
++    U32 offcode;
++    const BYTE* match0;
++    size_t mLength;
++    const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
++
++    size_t step;
++    const BYTE* nextStep;
++    const size_t kStepIncr = (1 << (kSearchStrength - 1));
+ 
+     (void)hasStep; /* not currently specialized on whether it's accelerated */
+ 
+@@ -579,75 +745,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
+     if (prefixStartIndex == dictStartIndex)
+         return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
+ 
+-    /* Search Loop */
+-    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+-        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+-        const U32    matchIndex = hashTable[h];
+-        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+-        const BYTE*  match = matchBase + matchIndex;
+-        const U32    curr = (U32)(ip-base);
+-        const U32    repIndex = curr + 1 - offset_1;
+-        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+-        const BYTE* const repMatch = repBase + repIndex;
+-        hashTable[h] = curr;   /* update hash table */
+-        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+-
+-        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+-             & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
+-           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+-            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+-            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+-            ip++;
+-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
+-            ip += rLength;
+-            anchor = ip;
+-        } else {
+-            if ( (matchIndex < dictStartIndex) ||
+-                 (MEM_read32(match) != MEM_read32(ip)) ) {
+-                assert(stepSize >= 1);
+-                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+-                continue;
++    {   U32 const curr = (U32)(ip0 - base);
++        U32 const maxRep = curr - dictStartIndex;
++        if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
++        if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
++    }
++
++    /* start each op */
++_start: /* Requires: ip0 */
++
++    step = stepSize;
++    nextStep = ip0 + kStepIncr;
++
++    /* calculate positions, ip0 - anchor == 0, so we skip step calc */
++    ip1 = ip0 + 1;
++    ip2 = ip0 + step;
++    ip3 = ip2 + 1;
++
++    if (ip3 >= ilimit) {
++        goto _cleanup;
++    }
++
++    hash0 = ZSTD_hashPtr(ip0, hlog, mls);
++    hash1 = ZSTD_hashPtr(ip1, hlog, mls);
++
++    idx = hashTable[hash0];
++    idxBase = idx < prefixStartIndex ? dictBase : base;
++
++    do {
++        {   /* load repcode match for ip[2] */
++            U32 const current2 = (U32)(ip2 - base);
++            U32 const repIndex = current2 - offset_1;
++            const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
++            U32 rval;
++            if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
++                 & (offset_1 > 0) ) {
++                rval = MEM_read32(repBase + repIndex);
++            } else {
++                rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
+             }
+-            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+-                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+-                U32 const offset = curr - matchIndex;
+-                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+-                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+-                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
+-                ip += mLength;
+-                anchor = ip;
++
++            /* write back hash table entry */
++            current0 = (U32)(ip0 - base);
++            hashTable[hash0] = current0;
++
++            /* check repcode at ip[2] */
++            if (MEM_read32(ip2) == rval) {
++                ip0 = ip2;
++                match0 = repBase + repIndex;
++                matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
++                assert((match0 != prefixStart) & (match0 != dictStart));
++                mLength = ip0[-1] == match0[-1];
++                ip0 -= mLength;
++                match0 -= mLength;
++                offcode = REPCODE1_TO_OFFBASE;
++                mLength += 4;
++                goto _match;
+         }   }
+ 
+-        if (ip <= ilimit) {
+-            /* Fill Table */
+-            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
+-            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+-            /* check immediate repcode */
+-            while (ip <= ilimit) {
+-                U32 const current2 = (U32)(ip-base);
+-                U32 const repIndex2 = current2 - offset_2;
+-                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
+-                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+-                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+-                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
+-                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+-                    ip += repLength2;
+-                    anchor = ip;
+-                    continue;
+-                }
+-                break;
+-    }   }   }
++        {   /* load match for ip[0] */
++            U32 const mval = idx >= dictStartIndex ?
++                    MEM_read32(idxBase + idx) :
++                    MEM_read32(ip0) ^ 1; /* guaranteed not to match */
++
++            /* check match at ip[0] */
++            if (MEM_read32(ip0) == mval) {
++                /* found a match! */
++                goto _offset;
++        }   }
++
++        /* lookup ip[1] */
++        idx = hashTable[hash1];
++        idxBase = idx < prefixStartIndex ? dictBase : base;
++
++        /* hash ip[2] */
++        hash0 = hash1;
++        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
++
++        /* advance to next positions */
++        ip0 = ip1;
++        ip1 = ip2;
++        ip2 = ip3;
++
++        /* write back hash table entry */
++        current0 = (U32)(ip0 - base);
++        hashTable[hash0] = current0;
++
++        {   /* load match for ip[0] */
++            U32 const mval = idx >= dictStartIndex ?
++                    MEM_read32(idxBase + idx) :
++                    MEM_read32(ip0) ^ 1; /* guaranteed not to match */
++
++            /* check match at ip[0] */
++            if (MEM_read32(ip0) == mval) {
++                /* found a match! */
++                goto _offset;
++        }   }
++
++        /* lookup ip[1] */
++        idx = hashTable[hash1];
++        idxBase = idx < prefixStartIndex ? dictBase : base;
++
++        /* hash ip[2] */
++        hash0 = hash1;
++        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
++
++        /* advance to next positions */
++        ip0 = ip1;
++        ip1 = ip2;
++        ip2 = ip0 + step;
++        ip3 = ip1 + step;
++
++        /* calculate step */
++        if (ip2 >= nextStep) {
++            step++;
++            PREFETCH_L1(ip1 + 64);
++            PREFETCH_L1(ip1 + 128);
++            nextStep += kStepIncr;
++        }
++    } while (ip3 < ilimit);
++
++_cleanup:
++    /* Note that there are probably still a couple positions we could search.
++     * However, it seems to be a meaningful performance hit to try to search
++     * them. So let's not. */
++
++    /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
++     * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
++    offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
+ 
+     /* save reps for next block */
+-    rep[0] = offset_1;
+-    rep[1] = offset_2;
++    rep[0] = offset_1 ? offset_1 : offsetSaved1;
++    rep[1] = offset_2 ? offset_2 : offsetSaved2;
+ 
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
++
++_offset: /* Requires: ip0, idx, idxBase */
++
++    /* Compute the offset code. */
++    {   U32 const offset = current0 - idx;
++        const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
++        matchEnd = idx < prefixStartIndex ? dictEnd : iend;
++        match0 = idxBase + idx;
++        offset_2 = offset_1;
++        offset_1 = offset;
++        offcode = OFFSET_TO_OFFBASE(offset);
++        mLength = 4;
++
++        /* Count the backwards match length. */
++        while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
++            ip0--;
++            match0--;
++            mLength++;
++    }   }
++
++_match: /* Requires: ip0, match0, offcode, matchEnd */
++
++    /* Count the forward length. */
++    assert(matchEnd != 0);
++    mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
++
++    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
++
++    ip0 += mLength;
++    anchor = ip0;
++
++    /* write next hash table entry */
++    if (ip1 < ip0) {
++        hashTable[hash1] = (U32)(ip1 - base);
++    }
++
++    /* Fill table and check for immediate repcode. */
++    if (ip0 <= ilimit) {
++        /* Fill Table */
++        assert(base+current0+2 > istart);  /* check base overflow */
++        hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
++        hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
++
++        while (ip0 <= ilimit) {
++            U32 const repIndex2 = (U32)(ip0-base) - offset_2;
++            const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
++            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0))  /* intentional underflow */
++                 && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
++                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
++                size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
++                { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
++                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
++                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
++                ip0 += repLength2;
++                anchor = ip0;
++                continue;
++            }
++            break;
++    }   }
++
++    goto _start;
+ }
+ 
+ ZSTD_GEN_FAST_FN(extDict, 4, 0)
+@@ -660,6 +953,7 @@ size_t ZSTD_compressBlock_fast_extDict(
+         void const* src, size_t srcSize)
+ {
+     U32 const mls = ms->cParams.minMatch;
++    assert(ms->dictMatchState == NULL);
+     switch(mls)
+     {
+     default: /* includes case 3 */
+diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h
+index fddc2f532d21..e64d9e1b2d39 100644
+--- a/lib/zstd/compress/zstd_fast.h
++++ b/lib/zstd/compress/zstd_fast.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -16,7 +17,8 @@
+ #include "zstd_compress_internal.h"
+ 
+ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+-                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
++                        void const* end, ZSTD_dictTableLoadMethod_e dtlm,
++                        ZSTD_tableFillPurpose_e tfp);
+ size_t ZSTD_compressBlock_fast(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
+index 0298a01a7504..3e88d8a1a136 100644
+--- a/lib/zstd/compress/zstd_lazy.c
++++ b/lib/zstd/compress/zstd_lazy.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -10,14 +11,23 @@
+ 
+ #include "zstd_compress_internal.h"
+ #include "zstd_lazy.h"
++#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
++
++#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
++
++#define kLazySkippingStep 8
+ 
+ 
+ /*-*************************************
+ *  Binary Tree search
+ ***************************************/
+ 
+-static void
+-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                 const BYTE* ip, const BYTE* iend,
+                 U32 mls)
+ {
+@@ -60,8 +70,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+  *  sort one already inserted but unsorted position
+  *  assumption : curr >= btlow == (curr - btmask)
+  *  doesn't fail */
+-static void
+-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+                  U32 curr, const BYTE* inputEnd,
+                  U32 nbCompares, U32 btLow,
+                  const ZSTD_dictMode_e dictMode)
+@@ -149,8 +160,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+ }
+ 
+ 
+-static size_t
+-ZSTD_DUBT_findBetterDictMatch (
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_DUBT_findBetterDictMatch (
+         const ZSTD_matchState_t* ms,
+         const BYTE* const ip, const BYTE* const iend,
+         size_t* offsetPtr,
+@@ -197,8 +209,8 @@ ZSTD_DUBT_findBetterDictMatch (
+             U32 matchIndex = dictMatchIndex + dictIndexDelta;
+             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+-                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
+-                bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
++                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
++                bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
+             }
+             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+@@ -218,7 +230,7 @@ ZSTD_DUBT_findBetterDictMatch (
+     }
+ 
+     if (bestLength >= MINMATCH) {
+-        U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
++        U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
+         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                     curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+     }
+@@ -227,10 +239,11 @@ ZSTD_DUBT_findBetterDictMatch (
+ }
+ 
+ 
+-static size_t
+-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                         const BYTE* const ip, const BYTE* const iend,
+-                        size_t* offsetPtr,
++                        size_t* offBasePtr,
+                         U32 const mls,
+                         const ZSTD_dictMode_e dictMode)
+ {
+@@ -327,8 +340,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+             if (matchLength > bestLength) {
+                 if (matchLength > matchEndIdx - matchIndex)
+                     matchEndIdx = matchIndex + (U32)matchLength;
+-                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+-                    bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
++                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
++                    bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
+                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                     if (dictMode == ZSTD_dictMatchState) {
+                         nbCompares = 0; /* in addition to avoiding checking any
+@@ -361,16 +374,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+         if (dictMode == ZSTD_dictMatchState && nbCompares) {
+             bestLength = ZSTD_DUBT_findBetterDictMatch(
+                     ms, ip, iend,
+-                    offsetPtr, bestLength, nbCompares,
++                    offBasePtr, bestLength, nbCompares,
+                     mls, dictMode);
+         }
+ 
+         assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
+         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+         if (bestLength >= MINMATCH) {
+-            U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
++            U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
+             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+-                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
++                        curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
+         }
+         return bestLength;
+     }
+@@ -378,17 +391,18 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+ 
+ 
+ /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+-FORCE_INLINE_TEMPLATE size_t
+-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                 const BYTE* const ip, const BYTE* const iLimit,
+-                      size_t* offsetPtr,
++                      size_t* offBasePtr,
+                 const U32 mls /* template */,
+                 const ZSTD_dictMode_e dictMode)
+ {
+     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+     ZSTD_updateDUBT(ms, ip, iLimit, mls);
+-    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
++    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
+ }
+ 
+ /* *********************************
+@@ -561,7 +575,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
+         /* save best solution */
+         if (currentMl > ml) {
+             ml = currentMl;
+-            *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
++            *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
+             if (ip+currentMl == iLimit) {
+                 /* best possible, avoids read overflow on next attempt */
+                 return ml;
+@@ -598,7 +612,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
+             /* save best solution */
+             if (currentMl > ml) {
+                 ml = currentMl;
+-                *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
++                *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
+                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+             }
+         }
+@@ -614,10 +628,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
+ 
+ /* Update chains up to ip (excluded)
+    Assumption : always within prefix (i.e. not within extDict) */
+-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_insertAndFindFirstIndex_internal(
+                         ZSTD_matchState_t* ms,
+                         const ZSTD_compressionParameters* const cParams,
+-                        const BYTE* ip, U32 const mls)
++                        const BYTE* ip, U32 const mls, U32 const lazySkipping)
+ {
+     U32* const hashTable  = ms->hashTable;
+     const U32 hashLog = cParams->hashLog;
+@@ -632,6 +648,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+         hashTable[h] = idx;
+         idx++;
++        /* Stop inserting every position when in the lazy skipping mode. */
++        if (lazySkipping)
++            break;
+     }
+ 
+     ms->nextToUpdate = target;
+@@ -640,11 +659,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+ 
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+     const ZSTD_compressionParameters* const cParams = &ms->cParams;
+-    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
++    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
+ }
+ 
+ /* inlining is important to hardwire a hot branch (template emulation) */
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_HcFindBestMatch(
+                         ZSTD_matchState_t* ms,
+                         const BYTE* const ip, const BYTE* const iLimit,
+@@ -684,14 +704,15 @@ size_t ZSTD_HcFindBestMatch(
+     }
+ 
+     /* HC4 match finder */
+-    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
++    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
+ 
+     for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+         size_t currentMl=0;
+         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+             const BYTE* const match = base + matchIndex;
+             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+-            if (match[ml] == ip[ml])   /* potentially better */
++            /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
++            if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
+                 currentMl = ZSTD_count(ip, match, iLimit);
+         } else {
+             const BYTE* const match = dictBase + matchIndex;
+@@ -703,7 +724,7 @@ size_t ZSTD_HcFindBestMatch(
+         /* save best solution */
+         if (currentMl > ml) {
+             ml = currentMl;
+-            *offsetPtr = STORE_OFFSET(curr - matchIndex);
++            *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
+             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+         }
+ 
+@@ -739,7 +760,7 @@ size_t ZSTD_HcFindBestMatch(
+             if (currentMl > ml) {
+                 ml = currentMl;
+                 assert(curr > matchIndex + dmsIndexDelta);
+-                *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
++                *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
+                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+             }
+ 
+@@ -756,8 +777,6 @@ size_t ZSTD_HcFindBestMatch(
+ * (SIMD) Row-based matchfinder
+ ***********************************/
+ /* Constants for row-based hash */
+-#define ZSTD_ROW_HASH_TAG_OFFSET 16     /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+-#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
+ #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+ #define ZSTD_ROW_HASH_MAX_ENTRIES 64    /* absolute maximum number of entries per row, for all configurations */
+ 
+@@ -769,64 +788,19 @@ typedef U64 ZSTD_VecMask;   /* Clarifies when we are interacting with a U64 repr
+  * Starting from the LSB, returns the idx of the next non-zero bit.
+  * Basically counting the nb of trailing zeroes.
+  */
+-static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+-    assert(val != 0);
+-#   if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
+-    if (sizeof(size_t) == 4) {
+-        U32 mostSignificantWord = (U32)(val >> 32);
+-        U32 leastSignificantWord = (U32)val;
+-        if (leastSignificantWord == 0) {
+-            return 32 + (U32)__builtin_ctz(mostSignificantWord);
+-        } else {
+-            return (U32)__builtin_ctz(leastSignificantWord);
+-        }
+-    } else {
+-        return (U32)__builtin_ctzll(val);
+-    }
+-#   else
+-    /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
+-     * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
+-     */
+-    val = ~val & (val - 1ULL); /* Lowest set bit mask */
+-    val = val - ((val >> 1) & 0x5555555555555555);
+-    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+-    return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
+-#   endif
+-}
+-
+-/* ZSTD_rotateRight_*():
+- * Rotates a bitfield to the right by "count" bits.
+- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
+- */
+-FORCE_INLINE_TEMPLATE
+-U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
+-    assert(count < 64);
+-    count &= 0x3F; /* for fickle pattern recognition */
+-    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
+-}
+-
+-FORCE_INLINE_TEMPLATE
+-U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
+-    assert(count < 32);
+-    count &= 0x1F; /* for fickle pattern recognition */
+-    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
+-}
+-
+-FORCE_INLINE_TEMPLATE
+-U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
+-    assert(count < 16);
+-    count &= 0x0F; /* for fickle pattern recognition */
+-    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
++MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
++    return ZSTD_countTrailingZeros64(val);
+ }
+ 
+ /* ZSTD_row_nextIndex():
+  * Returns the next index to insert at within a tagTable row, and updates the "head"
+- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
++ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
+  */
+ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+-  U32 const next = (*tagRow - 1) & rowMask;
+-  *tagRow = (BYTE)next;
+-  return next;
++    U32 next = (*tagRow-1) & rowMask;
++    next += (next == 0) ? rowMask : 0; /* skip first position */
++    *tagRow = (BYTE)next;
++    return next;
+ }
+ 
+ /* ZSTD_isAligned():
+@@ -840,7 +814,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+ /* ZSTD_row_prefetch():
+  * Performs prefetching for the hashTable and tagTable at a given row.
+  */
+-FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
++FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
+     PREFETCH_L1(hashTable + relRow);
+     if (rowLog >= 5) {
+         PREFETCH_L1(hashTable + relRow + 16);
+@@ -859,18 +833,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta
+  * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
+  * but not beyond iLimit.
+  */
+-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+                                    U32 const rowLog, U32 const mls,
+                                    U32 idx, const BYTE* const iLimit)
+ {
+     U32 const* const hashTable = ms->hashTable;
+-    U16 const* const tagTable = ms->tagTable;
++    BYTE const* const tagTable = ms->tagTable;
+     U32 const hashLog = ms->rowHashLog;
+     U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
+     U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+ 
+     for (; idx < lim; ++idx) {
+-        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
++        U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
+         U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+         ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+         ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
+@@ -885,12 +861,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
+  * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+  */
+-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+-                                                  U16 const* tagTable, BYTE const* base,
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
++                                                  BYTE const* tagTable, BYTE const* base,
+                                                   U32 idx, U32 const hashLog,
+-                                                  U32 const rowLog, U32 const mls)
++                                                  U32 const rowLog, U32 const mls,
++                                                  U64 const hashSalt)
+ {
+-    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
++    U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
+     U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+     ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+     {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+@@ -902,28 +881,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
+ /* ZSTD_row_update_internalImpl():
+  * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
+  */
+-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+-                                                        U32 updateStartIdx, U32 const updateEndIdx,
+-                                                        U32 const mls, U32 const rowLog,
+-                                                        U32 const rowMask, U32 const useCache)
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
++                                  U32 updateStartIdx, U32 const updateEndIdx,
++                                  U32 const mls, U32 const rowLog,
++                                  U32 const rowMask, U32 const useCache)
+ {
+     U32* const hashTable = ms->hashTable;
+-    U16* const tagTable = ms->tagTable;
++    BYTE* const tagTable = ms->tagTable;
+     U32 const hashLog = ms->rowHashLog;
+     const BYTE* const base = ms->window.base;
+ 
+     DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
+     for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
+-        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
+-                                  : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
++        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
++                                  : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
+         U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+         U32* const row = hashTable + relRow;
+-        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+-                                                       Explicit cast allows us to get exact desired position within each row */
++        BYTE* tagRow = tagTable + relRow;
+         U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+ 
+-        assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+-        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
++        assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
++        tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
+         row[pos] = updateStartIdx;
+     }
+ }
+@@ -932,9 +912,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+  * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
+  * Skips sections of long matches as is necessary.
+  */
+-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+-                                                    U32 const mls, U32 const rowLog,
+-                                                    U32 const rowMask, U32 const useCache)
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
++                              U32 const mls, U32 const rowLog,
++                              U32 const rowMask, U32 const useCache)
+ {
+     U32 idx = ms->nextToUpdate;
+     const BYTE* const base = ms->window.base;
+@@ -971,7 +953,35 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+     const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+ 
+     DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+-    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
++    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
++}
++
++/* Returns the mask width of bits group of which will be set to 1. Given not all
++ * architectures have easy movemask instruction, this helps to iterate over
++ * groups of bits easier and faster.
++ */
++FORCE_INLINE_TEMPLATE U32
++ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
++{
++    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
++    assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
++    (void)rowEntries;
++#if defined(ZSTD_ARCH_ARM_NEON)
++    /* NEON path only works for little endian */
++    if (!MEM_isLittleEndian()) {
++        return 1;
++    }
++    if (rowEntries == 16) {
++        return 4;
++    }
++    if (rowEntries == 32) {
++        return 2;
++    }
++    if (rowEntries == 64) {
++        return 1;
++    }
++#endif
++    return 1;
+ }
+ 
+ #if defined(ZSTD_ARCH_X86_SSE2)
+@@ -994,71 +1004,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U
+ }
+ #endif
+ 
+-/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+- * the hash at the nth position in a row of the tagTable.
+- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+- * to match up with the actual layout of the entries within the hashTable */
++#if defined(ZSTD_ARCH_ARM_NEON)
++FORCE_INLINE_TEMPLATE ZSTD_VecMask
++ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
++{
++    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
++    if (rowEntries == 16) {
++        /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
++         * After that groups of 4 bits represent the equalMask. We lower
++         * all bits except the highest in these groups by doing AND with
++         * 0x88 = 0b10001000.
++         */
++        const uint8x16_t chunk = vld1q_u8(src);
++        const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
++        const uint8x8_t res = vshrn_n_u16(equalMask, 4);
++        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
++        return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
++    } else if (rowEntries == 32) {
++        /* Same idea as with rowEntries == 16 but doing AND with
++         * 0x55 = 0b01010101.
++         */
++        const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
++        const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
++        const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
++        const uint8x16_t dup = vdupq_n_u8(tag);
++        const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
++        const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
++        const uint8x8_t res = vsli_n_u8(t0, t1, 4);
++        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
++        return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
++    } else { /* rowEntries == 64 */
++        const uint8x16x4_t chunk = vld4q_u8(src);
++        const uint8x16_t dup = vdupq_n_u8(tag);
++        const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
++        const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
++        const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
++        const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
++
++        const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
++        const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
++        const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
++        const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
++        const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
++        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
++        return ZSTD_rotateRight_U64(matches, headGrouped);
++    }
++}
++#endif
++
++/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
++ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
++ * matches the hash at the nth position in a row of the tagTable.
++ * Each row is a circular buffer beginning at the value of "headGrouped". So we
++ * must rotate the "matches" bitfield to match up with the actual layout of the
++ * entries within the hashTable */
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
+-ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries)
++ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
+ {
+-    const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
++    const BYTE* const src = tagRow;
+     assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
+     assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
++    assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
+ 
+ #if defined(ZSTD_ARCH_X86_SSE2)
+ 
+-    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head);
++    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
+ 
+ #else /* SW or NEON-LE */
+ 
+ # if defined(ZSTD_ARCH_ARM_NEON)
+   /* This NEON path only works for little endian - otherwise use SWAR below */
+     if (MEM_isLittleEndian()) {
+-        if (rowEntries == 16) {
+-            const uint8x16_t chunk = vld1q_u8(src);
+-            const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
+-            const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
+-            const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
+-            const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
+-            const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
+-            const U16 hi = (U16)vgetq_lane_u8(t3, 8);
+-            const U16 lo = (U16)vgetq_lane_u8(t3, 0);
+-            return ZSTD_rotateRight_U16((hi << 8) | lo, head);
+-        } else if (rowEntries == 32) {
+-            const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
+-            const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
+-            const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
+-            const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
+-            const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
+-            const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
+-            const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
+-            const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
+-            const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
+-            const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
+-            const uint8x8x2_t t3 = vuzp_u8(t2, t0);
+-            const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
+-            const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
+-            return ZSTD_rotateRight_U32(matches, head);
+-        } else { /* rowEntries == 64 */
+-            const uint8x16x4_t chunk = vld4q_u8(src);
+-            const uint8x16_t dup = vdupq_n_u8(tag);
+-            const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
+-            const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
+-            const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
+-            const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
+-
+-            const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
+-            const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
+-            const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
+-            const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
+-            const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
+-            const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
+-            return ZSTD_rotateRight_U64(matches, head);
+-        }
++        return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
+     }
+ # endif /* ZSTD_ARCH_ARM_NEON */
+     /* SWAR */
+-    {   const size_t chunkSize = sizeof(size_t);
++    {   const int chunkSize = sizeof(size_t);
+         const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
+         const size_t xFF = ~((size_t)0);
+         const size_t x01 = xFF / 0xFF;
+@@ -1091,11 +1112,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
+         }
+         matches = ~matches;
+         if (rowEntries == 16) {
+-            return ZSTD_rotateRight_U16((U16)matches, head);
++            return ZSTD_rotateRight_U16((U16)matches, headGrouped);
+         } else if (rowEntries == 32) {
+-            return ZSTD_rotateRight_U32((U32)matches, head);
++            return ZSTD_rotateRight_U32((U32)matches, headGrouped);
+         } else {
+-            return ZSTD_rotateRight_U64((U64)matches, head);
++            return ZSTD_rotateRight_U64((U64)matches, headGrouped);
+         }
+     }
+ #endif
+@@ -1103,20 +1124,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
+ 
+ /* The high-level approach of the SIMD row based match finder is as follows:
+  * - Figure out where to insert the new entry:
+- *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+- *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
++ *      - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
++ *           - The hash is salted by a value that changes on every contex reset, so when the same table is used
++ *             we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
++ *      - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
+  *        which row to insert into.
+- *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+- *        be considered as a circular buffer with a "head" index that resides in the tagTable.
+- *      - Also insert the "tag" into the equivalent row and position in the tagTable.
+- *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+- *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+- *                  for alignment/performance reasons, leaving some bytes unused.
+- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
++ *      - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
++ *        be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
++ *        per row).
++ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
+  *   generate a bitfield that we can cycle through to check the collisions in the hash table.
+  * - Pick the longest match.
++ * - Insert the tag into the equivalent row and position in the tagTable.
+  */
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_RowFindBestMatch(
+                         ZSTD_matchState_t* ms,
+                         const BYTE* const ip, const BYTE* const iLimit,
+@@ -1125,7 +1147,7 @@ size_t ZSTD_RowFindBestMatch(
+                         const U32 rowLog)
+ {
+     U32* const hashTable = ms->hashTable;
+-    U16* const tagTable = ms->tagTable;
++    BYTE* const tagTable = ms->tagTable;
+     U32* const hashCache = ms->hashCache;
+     const U32 hashLog = ms->rowHashLog;
+     const ZSTD_compressionParameters* const cParams = &ms->cParams;
+@@ -1143,8 +1165,11 @@ size_t ZSTD_RowFindBestMatch(
+     const U32 rowEntries = (1U << rowLog);
+     const U32 rowMask = rowEntries - 1;
+     const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
++    const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
++    const U64 hashSalt = ms->hashSalt;
+     U32 nbAttempts = 1U << cappedSearchLog;
+     size_t ml=4-1;
++    U32 hash;
+ 
+     /* DMS/DDS variables that may be referenced laster */
+     const ZSTD_matchState_t* const dms = ms->dictMatchState;
+@@ -1168,7 +1193,7 @@ size_t ZSTD_RowFindBestMatch(
+     if (dictMode == ZSTD_dictMatchState) {
+         /* Prefetch DMS rows */
+         U32* const dmsHashTable = dms->hashTable;
+-        U16* const dmsTagTable = dms->tagTable;
++        BYTE* const dmsTagTable = dms->tagTable;
+         U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+         U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+         dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+@@ -1178,23 +1203,34 @@ size_t ZSTD_RowFindBestMatch(
+     }
+ 
+     /* Update the hashTable and tagTable up to (but not including) ip */
+-    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
++    if (!ms->lazySkipping) {
++        ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
++        hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
++    } else {
++        /* Stop inserting every position when in the lazy skipping mode.
++         * The hash cache is also not kept up to date in this mode.
++         */
++        hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
++        ms->nextToUpdate = curr;
++    }
++    ms->hashSaltEntropy += hash; /* collect salt entropy */
++
+     {   /* Get the hash for ip, compute the appropriate row */
+-        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+         U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+         U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+         U32* const row = hashTable + relRow;
+         BYTE* tagRow = (BYTE*)(tagTable + relRow);
+-        U32 const head = *tagRow & rowMask;
++        U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
+         U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
+         size_t numMatches = 0;
+         size_t currMatch = 0;
+-        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
++        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
+ 
+         /* Cycle through the matches and prefetch */
+-        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+-            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
++        for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
++            U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
+             U32 const matchIndex = row[matchPos];
++            if(matchPos == 0) continue;
+             assert(numMatches < rowEntries);
+             if (matchIndex < lowLimit)
+                 break;
+@@ -1204,13 +1240,14 @@ size_t ZSTD_RowFindBestMatch(
+                 PREFETCH_L1(dictBase + matchIndex);
+             }
+             matchBuffer[numMatches++] = matchIndex;
++            --nbAttempts;
+         }
+ 
+         /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+            in ZSTD_row_update_internal() at the next search. */
+         {
+             U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+-            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
++            tagRow[pos] = (BYTE)tag;
+             row[pos] = ms->nextToUpdate++;
+         }
+ 
+@@ -1224,7 +1261,8 @@ size_t ZSTD_RowFindBestMatch(
+             if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                 const BYTE* const match = base + matchIndex;
+                 assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+-                if (match[ml] == ip[ml])   /* potentially better */
++                /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
++                if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
+                     currentMl = ZSTD_count(ip, match, iLimit);
+             } else {
+                 const BYTE* const match = dictBase + matchIndex;
+@@ -1236,7 +1274,7 @@ size_t ZSTD_RowFindBestMatch(
+             /* Save best solution */
+             if (currentMl > ml) {
+                 ml = currentMl;
+-                *offsetPtr = STORE_OFFSET(curr - matchIndex);
++                *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
+                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+             }
+         }
+@@ -1254,19 +1292,21 @@ size_t ZSTD_RowFindBestMatch(
+         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+         const U32 dmsIndexDelta        = dictLimit - dmsSize;
+ 
+-        {   U32 const head = *dmsTagRow & rowMask;
++        {   U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
+             U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
+             size_t numMatches = 0;
+             size_t currMatch = 0;
+-            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
++            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
+ 
+-            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+-                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
++            for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
++                U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
+                 U32 const matchIndex = dmsRow[matchPos];
++                if(matchPos == 0) continue;
+                 if (matchIndex < dmsLowestIndex)
+                     break;
+                 PREFETCH_L1(dmsBase + matchIndex);
+                 matchBuffer[numMatches++] = matchIndex;
++                --nbAttempts;
+             }
+ 
+             /* Return the longest match */
+@@ -1285,7 +1325,7 @@ size_t ZSTD_RowFindBestMatch(
+                 if (currentMl > ml) {
+                     ml = currentMl;
+                     assert(curr > matchIndex + dmsIndexDelta);
+-                    *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
++                    *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
+                     if (ip+currentMl == iLimit) break;
+                 }
+             }
+@@ -1472,8 +1512,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
+ *  Common parser - lazy strategy
+ *********************************/
+ 
+-FORCE_INLINE_TEMPLATE size_t
+-ZSTD_compressBlock_lazy_generic(
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_compressBlock_lazy_generic(
+                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                         U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+@@ -1491,7 +1532,8 @@ ZSTD_compressBlock_lazy_generic(
+     const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
+     const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
+ 
+-    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
++    U32 offset_1 = rep[0], offset_2 = rep[1];
++    U32 offsetSaved1 = 0, offsetSaved2 = 0;
+ 
+     const int isDMS = dictMode == ZSTD_dictMatchState;
+     const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+@@ -1512,8 +1554,8 @@ ZSTD_compressBlock_lazy_generic(
+         U32 const curr = (U32)(ip - base);
+         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+         U32 const maxRep = curr - windowLow;
+-        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+-        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
++        if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
++        if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
+     }
+     if (isDxS) {
+         /* dictMatchState repCode checks don't currently handle repCode == 0
+@@ -1522,10 +1564,11 @@ ZSTD_compressBlock_lazy_generic(
+         assert(offset_2 <= dictAndPrefixLength);
+     }
+ 
++    /* Reset the lazy skipping state */
++    ms->lazySkipping = 0;
++
+     if (searchMethod == search_rowHash) {
+-        ZSTD_row_fillHashCache(ms, base, rowLog,
+-                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+-                            ms->nextToUpdate, ilimit);
++        ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+     }
+ 
+     /* Match Loop */
+@@ -1537,7 +1580,7 @@ ZSTD_compressBlock_lazy_generic(
+ #endif
+     while (ip < ilimit) {
+         size_t matchLength=0;
+-        size_t offcode=STORE_REPCODE_1;
++        size_t offBase = REPCODE1_TO_OFFBASE;
+         const BYTE* start=ip+1;
+         DEBUGLOG(7, "search baseline (depth 0)");
+ 
+@@ -1562,14 +1605,23 @@ ZSTD_compressBlock_lazy_generic(
+         }
+ 
+         /* first search (depth 0) */
+-        {   size_t offsetFound = 999999999;
+-            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode);
++        {   size_t offbaseFound = 999999999;
++            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
+             if (ml2 > matchLength)
+-                matchLength = ml2, start = ip, offcode=offsetFound;
++                matchLength = ml2, start = ip, offBase = offbaseFound;
+         }
+ 
+         if (matchLength < 4) {
+-            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
++            size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */;
++            ip += step;
++            /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
++             * In this mode we stop inserting every position into our tables, and only insert
++             * positions that we search, which is one in step positions.
++             * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
++             * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
++             * triggered once we've gone 2KB without finding any matches.
++             */
++            ms->lazySkipping = step > kLazySkippingStep;
+             continue;
+         }
+ 
+@@ -1579,12 +1631,12 @@ ZSTD_compressBlock_lazy_generic(
+             DEBUGLOG(7, "search depth 1");
+             ip ++;
+             if ( (dictMode == ZSTD_noDict)
+-              && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
++              && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                 int const gain2 = (int)(mlRep * 3);
+-                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
+                 if ((mlRep >= 4) && (gain2 > gain1))
+-                    matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
++                    matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
+             }
+             if (isDxS) {
+                 const U32 repIndex = (U32)(ip - base) - offset_1;
+@@ -1596,17 +1648,17 @@ ZSTD_compressBlock_lazy_generic(
+                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                     int const gain2 = (int)(mlRep * 3);
+-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
+                     if ((mlRep >= 4) && (gain2 > gain1))
+-                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
++                        matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
+                 }
+             }
+-            {   size_t offset2=999999999;
+-                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
+-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
++            {   size_t ofbCandidate=999999999;
++                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
++                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
++                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
+                 if ((ml2 >= 4) && (gain2 > gain1)) {
+-                    matchLength = ml2, offcode = offset2, start = ip;
++                    matchLength = ml2, offBase = ofbCandidate, start = ip;
+                     continue;   /* search a better one */
+             }   }
+ 
+@@ -1615,12 +1667,12 @@ ZSTD_compressBlock_lazy_generic(
+                 DEBUGLOG(7, "search depth 2");
+                 ip ++;
+                 if ( (dictMode == ZSTD_noDict)
+-                  && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
++                  && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                     int const gain2 = (int)(mlRep * 4);
+-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
+                     if ((mlRep >= 4) && (gain2 > gain1))
+-                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
++                        matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
+                 }
+                 if (isDxS) {
+                     const U32 repIndex = (U32)(ip - base) - offset_1;
+@@ -1632,17 +1684,17 @@ ZSTD_compressBlock_lazy_generic(
+                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                         int const gain2 = (int)(mlRep * 4);
+-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
+                         if ((mlRep >= 4) && (gain2 > gain1))
+-                            matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
++                            matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
+                     }
+                 }
+-                {   size_t offset2=999999999;
+-                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
+-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
++                {   size_t ofbCandidate=999999999;
++                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
++                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
++                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
+                     if ((ml2 >= 4) && (gain2 > gain1)) {
+-                        matchLength = ml2, offcode = offset2, start = ip;
++                        matchLength = ml2, offBase = ofbCandidate, start = ip;
+                         continue;
+             }   }   }
+             break;  /* nothing found : store previous solution */
+@@ -1653,26 +1705,33 @@ ZSTD_compressBlock_lazy_generic(
+          * notably if `value` is unsigned, resulting in a large positive `-value`.
+          */
+         /* catch up */
+-        if (STORED_IS_OFFSET(offcode)) {
++        if (OFFBASE_IS_OFFSET(offBase)) {
+             if (dictMode == ZSTD_noDict) {
+-                while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
+-                     && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
++                while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
++                     && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) )  /* only search for offset within prefix */
+                     { start--; matchLength++; }
+             }
+             if (isDxS) {
+-                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
++                U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
+                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+             }
+-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
++            offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
+         }
+         /* store sequence */
+ _storeSequence:
+         {   size_t const litLength = (size_t)(start - anchor);
+-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
++            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
+             anchor = ip = start + matchLength;
+         }
++        if (ms->lazySkipping) {
++            /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
++            if (searchMethod == search_rowHash) {
++                ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
++            }
++            ms->lazySkipping = 0;
++        }
+ 
+         /* check immediate repcode */
+         if (isDxS) {
+@@ -1686,8 +1745,8 @@ ZSTD_compressBlock_lazy_generic(
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+-                    offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset_2 <=> offset_1 */
+-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
++                    offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset_2 <=> offset_1 */
++                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
+                     ip += matchLength;
+                     anchor = ip;
+                     continue;
+@@ -1701,166 +1760,181 @@ ZSTD_compressBlock_lazy_generic(
+                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                 /* store sequence */
+                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+-                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
+-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
++                offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
++                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
+                 ip += matchLength;
+                 anchor = ip;
+                 continue;   /* faster when present ... (?) */
+     }   }   }
+ 
+-    /* Save reps for next block */
+-    rep[0] = offset_1 ? offset_1 : savedOffset;
+-    rep[1] = offset_2 ? offset_2 : savedOffset;
++    /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
++     * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
++    offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
++
++    /* save reps for next block */
++    rep[0] = offset_1 ? offset_1 : offsetSaved1;
++    rep[1] = offset_2 ? offset_2 : offsetSaved2;
+ 
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+ }
++#endif /* build exclusions */
+ 
+ 
+-size_t ZSTD_compressBlock_btlazy2(
++#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_greedy(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy2(
++size_t ZSTD_compressBlock_greedy_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy(
++size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy(
++size_t ZSTD_compressBlock_greedy_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
++size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy2_dictMatchState(
++size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_lazy_dictMatchState(
++#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy_dictMatchState(
++size_t ZSTD_compressBlock_lazy_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+ }
+ 
+-
+-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
++size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
++size_t ZSTD_compressBlock_lazy_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
++size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+ }
+ 
+-/* Row-based matchfinder */
+-size_t ZSTD_compressBlock_lazy2_row(
++size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_lazy_row(
++#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy2(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy_row(
++size_t ZSTD_compressBlock_lazy2_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy2_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+ }
+ 
+-
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
++#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_btlazy2(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+ }
+ 
+-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
++size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
++    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+ }
++#endif
+ 
++#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_compressBlock_lazy_extDict_generic(
+                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                         U32 rep[ZSTD_REP_NUM],
+@@ -1886,12 +1960,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+ 
+     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
+ 
++    /* Reset the lazy skipping state */
++    ms->lazySkipping = 0;
++
+     /* init */
+     ip += (ip == prefixStart);
+     if (searchMethod == search_rowHash) {
+-        ZSTD_row_fillHashCache(ms, base, rowLog,
+-                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+-                               ms->nextToUpdate, ilimit);
++        ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+     }
+ 
+     /* Match Loop */
+@@ -1903,7 +1978,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+ #endif
+     while (ip < ilimit) {
+         size_t matchLength=0;
+-        size_t offcode=STORE_REPCODE_1;
++        size_t offBase = REPCODE1_TO_OFFBASE;
+         const BYTE* start=ip+1;
+         U32 curr = (U32)(ip-base);
+ 
+@@ -1922,14 +1997,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+         }   }
+ 
+         /* first search (depth 0) */
+-        {   size_t offsetFound = 999999999;
+-            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict);
++        {   size_t ofbCandidate = 999999999;
++            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
+             if (ml2 > matchLength)
+-                matchLength = ml2, start = ip, offcode=offsetFound;
++                matchLength = ml2, start = ip, offBase = ofbCandidate;
+         }
+ 
+         if (matchLength < 4) {
+-            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
++            size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
++            ip += step + 1;   /* jump faster over incompressible sections */
++            /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
++             * In this mode we stop inserting every position into our tables, and only insert
++             * positions that we search, which is one in step positions.
++             * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
++             * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
++             * triggered once we've gone 2KB without finding any matches.
++             */
++            ms->lazySkipping = step > kLazySkippingStep;
+             continue;
+         }
+ 
+@@ -1939,7 +2023,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+             ip ++;
+             curr++;
+             /* check repCode */
+-            if (offcode) {
++            if (offBase) {
+                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                 const U32 repIndex = (U32)(curr - offset_1);
+                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+@@ -1951,18 +2035,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                     int const gain2 = (int)(repLength * 3);
+-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
+                     if ((repLength >= 4) && (gain2 > gain1))
+-                        matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
++                        matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
+             }   }
+ 
+             /* search match, depth 1 */
+-            {   size_t offset2=999999999;
+-                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
+-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
++            {   size_t ofbCandidate = 999999999;
++                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
++                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
++                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
+                 if ((ml2 >= 4) && (gain2 > gain1)) {
+-                    matchLength = ml2, offcode = offset2, start = ip;
++                    matchLength = ml2, offBase = ofbCandidate, start = ip;
+                     continue;   /* search a better one */
+             }   }
+ 
+@@ -1971,7 +2055,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+                 ip ++;
+                 curr++;
+                 /* check repCode */
+-                if (offcode) {
++                if (offBase) {
+                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                     const U32 repIndex = (U32)(curr - offset_1);
+                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+@@ -1983,38 +2067,45 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                         int const gain2 = (int)(repLength * 4);
+-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
++                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
+                         if ((repLength >= 4) && (gain2 > gain1))
+-                            matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
++                            matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
+                 }   }
+ 
+                 /* search match, depth 2 */
+-                {   size_t offset2=999999999;
+-                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
+-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
++                {   size_t ofbCandidate = 999999999;
++                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
++                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
++                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
+                     if ((ml2 >= 4) && (gain2 > gain1)) {
+-                        matchLength = ml2, offcode = offset2, start = ip;
++                        matchLength = ml2, offBase = ofbCandidate, start = ip;
+                         continue;
+             }   }   }
+             break;  /* nothing found : store previous solution */
+         }
+ 
+         /* catch up */
+-        if (STORED_IS_OFFSET(offcode)) {
+-            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
++        if (OFFBASE_IS_OFFSET(offBase)) {
++            U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
+             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
++            offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
+         }
+ 
+         /* store sequence */
+ _storeSequence:
+         {   size_t const litLength = (size_t)(start - anchor);
+-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
++            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
+             anchor = ip = start + matchLength;
+         }
++        if (ms->lazySkipping) {
++            /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
++            if (searchMethod == search_rowHash) {
++                ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
++            }
++            ms->lazySkipping = 0;
++        }
+ 
+         /* check immediate repcode */
+         while (ip <= ilimit) {
+@@ -2029,8 +2120,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+                 /* repcode detected we should take it */
+                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+-                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset history */
+-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
++                offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset history */
++                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
+                 ip += matchLength;
+                 anchor = ip;
+                 continue;   /* faster when present ... (?) */
+@@ -2045,8 +2136,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+ }
++#endif /* build exclusions */
+ 
+-
++#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+ size_t ZSTD_compressBlock_greedy_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+@@ -2054,49 +2146,55 @@ size_t ZSTD_compressBlock_greedy_extDict(
+     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy_extDict(
++size_t ZSTD_compressBlock_greedy_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+-
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_lazy2_extDict(
++#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ 
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+ }
+ 
+-size_t ZSTD_compressBlock_btlazy2_extDict(
++size_t ZSTD_compressBlock_lazy_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ 
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_greedy_extDict_row(
++#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy2_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
++
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+ }
+ 
+-size_t ZSTD_compressBlock_lazy_extDict_row(
++size_t ZSTD_compressBlock_lazy2_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+-
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_lazy2_extDict_row(
++#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_btlazy2_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize)
+ 
+ {
+-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
++    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+ }
++#endif
+diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
+index e5bdf4df8dde..22c9201f4e63 100644
+--- a/lib/zstd/compress/zstd_lazy.h
++++ b/lib/zstd/compress/zstd_lazy.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -22,98 +23,175 @@
+  */
+ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+ 
++#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
++
++#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
+ 
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+ 
+ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
++#endif
+ 
+-size_t ZSTD_compressBlock_btlazy2(
++#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_greedy(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2(
++size_t ZSTD_compressBlock_greedy_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy(
++size_t ZSTD_compressBlock_greedy_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy(
++size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2_row(
++size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_row(
++size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_row(
++size_t ZSTD_compressBlock_greedy_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-
+-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
++size_t ZSTD_compressBlock_greedy_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2_dictMatchState(
++
++#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
++#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
++#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
++#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
++#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
++#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
++#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
++#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
++#else
++#define ZSTD_COMPRESSBLOCK_GREEDY NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
++#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
++#endif
++
++#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_dictMatchState(
++size_t ZSTD_compressBlock_lazy_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_dictMatchState(
++size_t ZSTD_compressBlock_lazy_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
++size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-
+-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
++size_t ZSTD_compressBlock_lazy_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
++size_t ZSTD_compressBlock_lazy_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
++
++#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
++#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
++#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
++#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
++#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
++#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
++#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
++#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
++#else
++#define ZSTD_COMPRESSBLOCK_LAZY NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
++#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
++#endif
++
++#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_lazy2(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
++size_t ZSTD_compressBlock_lazy2_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
++size_t ZSTD_compressBlock_lazy2_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
++size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-
+-size_t ZSTD_compressBlock_greedy_extDict(
++size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_extDict(
++size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+ size_t ZSTD_compressBlock_lazy2_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_greedy_extDict_row(
++size_t ZSTD_compressBlock_lazy2_extDict_row(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy_extDict_row(
++
++#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
++#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
++#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
++#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
++#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
++#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
++#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
++#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
++#else
++#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
++#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
++#endif
++
++#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_btlazy2(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_lazy2_extDict_row(
++size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+ size_t ZSTD_compressBlock_btlazy2_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-        
++
++#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
++#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
++#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
++#else
++#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
++#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
++#endif
++
+ 
+ 
+ #endif /* ZSTD_LAZY_H */
+diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
+index dd86fc83e7dd..07f3bc6437ce 100644
+--- a/lib/zstd/compress/zstd_ldm.c
++++ b/lib/zstd/compress/zstd_ldm.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -242,11 +243,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+     switch(ms->cParams.strategy)
+     {
+     case ZSTD_fast:
+-        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
++        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
+         break;
+ 
+     case ZSTD_dfast:
+-        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
++#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
++        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
++#else
++        assert(0); /* shouldn't be called: cparams should've been adjusted. */
++#endif
+         break;
+ 
+     case ZSTD_greedy:
+@@ -318,7 +323,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+     }
+ }
+ 
+-static size_t ZSTD_ldm_generateSequences_internal(
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_ldm_generateSequences_internal(
+         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+         ldmParams_t const* params, void const* src, size_t srcSize)
+ {
+@@ -549,7 +556,7 @@ size_t ZSTD_ldm_generateSequences(
+          * the window through early invalidation.
+          * TODO: * Test the chunk size.
+          *       * Try invalidation after the sequence generation and test the
+-         *         the offset against maxDist directly.
++         *         offset against maxDist directly.
+          *
+          * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+          * that any offset used is valid at the END of the sequence, since it may
+@@ -689,7 +696,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+         /* maybeSplitSequence updates rawSeqStore->pos */
+         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                    (U32)(iend - ip), minMatch);
+-        int i;
+         /* End signal */
+         if (sequence.offset == 0)
+             break;
+@@ -702,6 +708,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+         /* Run the block compressor */
+         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+         {
++            int i;
+             size_t const newLitLength =
+                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+             ip += sequence.litLength;
+@@ -711,7 +718,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+             rep[0] = sequence.offset;
+             /* Store the sequence */
+             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+-                          STORE_OFFSET(sequence.offset),
++                          OFFSET_TO_OFFBASE(sequence.offset),
+                           sequence.matchLength);
+             ip += sequence.matchLength;
+         }
+diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
+index fbc6a5e88fd7..c540731abde7 100644
+--- a/lib/zstd/compress/zstd_ldm.h
++++ b/lib/zstd/compress/zstd_ldm.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
+index 647f865be290..cfccfc46f6f7 100644
+--- a/lib/zstd/compress/zstd_ldm_geartab.h
++++ b/lib/zstd/compress/zstd_ldm_geartab.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
+index fd82acfda62f..a87b66ac8d24 100644
+--- a/lib/zstd/compress/zstd_opt.c
++++ b/lib/zstd/compress/zstd_opt.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -12,11 +13,14 @@
+ #include "hist.h"
+ #include "zstd_opt.h"
+ 
++#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
+ 
+ #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+ #define ZSTD_MAX_PRICE     (1<<30)
+ 
+-#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
++#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+ 
+ 
+ /*-*************************************
+@@ -26,27 +30,35 @@
+ #if 0    /* approximation at bit level (for tests) */
+ #  define BITCOST_ACCURACY 0
+ #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+-#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
++#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
+ #elif 0  /* fractional bit accuracy (for tests) */
+ #  define BITCOST_ACCURACY 8
+ #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+-#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
++#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
+ #else    /* opt==approx, ultra==accurate */
+ #  define BITCOST_ACCURACY 8
+ #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+-#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
++#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+ #endif
+ 
++/* ZSTD_bitWeight() :
++ * provide estimated "cost" of a stat in full bits only */
+ MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+ {
+     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+ }
+ 
++/* ZSTD_fracWeight() :
++ * provide fractional-bit "cost" of a stat,
++ * using linear interpolation approximation */
+ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+ {
+     U32 const stat = rawStat + 1;
+     U32 const hb = ZSTD_highbit32(stat);
+     U32 const BWeight = hb * BITCOST_MULTIPLIER;
++    /* Fweight was meant for "Fractional weight"
++     * but it's effectively a value between 1 and 2
++     * using fixed point arithmetic */
+     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+     U32 const weight = BWeight + FWeight;
+     assert(hb + BITCOST_ACCURACY < 31);
+@@ -57,7 +69,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+ /* debugging function,
+  * @return price in bytes as fractional value
+  * for debug messages only */
+-MEM_STATIC double ZSTD_fCost(U32 price)
++MEM_STATIC double ZSTD_fCost(int price)
+ {
+     return (double)price / (BITCOST_MULTIPLIER*8);
+ }
+@@ -88,20 +100,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
+     return total;
+ }
+ 
+-static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
++typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
++
++static U32
++ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
+ {
+     U32 s, sum=0;
+-    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
++    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
++            (unsigned)lastEltIndex+1, (unsigned)shift );
+     assert(shift < 30);
+     for (s=0; s<lastEltIndex+1; s++) {
+-        table[s] = 1 + (table[s] >> shift);
+-        sum += table[s];
++        unsigned const base = base1 ? 1 : (table[s]>0);
++        unsigned const newStat = base + (table[s] >> shift);
++        sum += newStat;
++        table[s] = newStat;
+     }
+     return sum;
+ }
+ 
+ /* ZSTD_scaleStats() :
+- * reduce all elements in table is sum too large
++ * reduce all elt frequencies in table if sum too large
+  * return the resulting sum of elements */
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
+ {
+@@ -110,7 +128,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
+     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
+     assert(logTarget < 30);
+     if (factor <= 1) return prevsum;
+-    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
++    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
+ }
+ 
+ /* ZSTD_rescaleFreqs() :
+@@ -129,18 +147,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
+     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+     optPtr->priceType = zop_dynamic;
+ 
+-    if (optPtr->litLengthSum == 0) {  /* first block : init */
+-        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+-            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
++    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
++
++        /* heuristic: use pre-defined stats for too small inputs */
++        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
++            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
+             optPtr->priceType = zop_predef;
+         }
+ 
+         assert(optPtr->symbolCosts != NULL);
+         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+-            /* huffman table presumed generated by dictionary */
++
++            /* huffman stats covering the full value set : table presumed generated by dictionary */
+             optPtr->priceType = zop_dynamic;
+ 
+             if (compressedLiterals) {
++                /* generate literals statistics from huffman table */
+                 unsigned lit;
+                 assert(optPtr->litFreq != NULL);
+                 optPtr->litSum = 0;
+@@ -188,13 +210,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
+                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
+             }   }
+ 
+-        } else {  /* not a dictionary */
++        } else {  /* first block, no dictionary */
+ 
+             assert(optPtr->litFreq != NULL);
+             if (compressedLiterals) {
++                /* base initial cost of literals on direct frequency within src */
+                 unsigned lit = MaxLit;
+                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+-                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
++                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
+             }
+ 
+             {   unsigned const baseLLfreqs[MaxLL+1] = {
+@@ -224,10 +247,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
+                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
+             }
+ 
+-
+         }
+ 
+-    } else {   /* new block : re-use previous statistics, scaled down */
++    } else {   /* new block : scale down accumulated statistics */
+ 
+         if (compressedLiterals)
+             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
+@@ -246,6 +268,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                 const optState_t* const optPtr,
+                                 int optLevel)
+ {
++    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
+     if (litLength == 0) return 0;
+ 
+     if (!ZSTD_compressedLiterals(optPtr))
+@@ -255,11 +278,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+ 
+     /* dynamic statistics */
+-    {   U32 price = litLength * optPtr->litSumBasePrice;
++    {   U32 price = optPtr->litSumBasePrice * litLength;
++        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
+         U32 u;
++        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
+         for (u=0; u < litLength; u++) {
+-            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+-            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
++            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
++            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
++            price -= litPrice;
+         }
+         return price;
+     }
+@@ -272,10 +298,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
+     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
+     if (optPtr->priceType == zop_predef)
+         return WEIGHT(litLength, optLevel);
+-    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
+-     * because it isn't representable in the zstd format. So instead just
+-     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
+-     * would be all literals.
++
++    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
++     * because it isn't representable in the zstd format.
++     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
++     * In such a case, the block would be all literals.
+      */
+     if (litLength == ZSTD_BLOCKSIZE_MAX)
+         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
+@@ -289,24 +316,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
+ }
+ 
+ /* ZSTD_getMatchPrice() :
+- * Provides the cost of the match part (offset + matchLength) of a sequence
++ * Provides the cost of the match part (offset + matchLength) of a sequence.
+  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
++ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
+  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
+  */
+ FORCE_INLINE_TEMPLATE U32
+-ZSTD_getMatchPrice(U32 const offcode,
++ZSTD_getMatchPrice(U32 const offBase,
+                    U32 const matchLength,
+              const optState_t* const optPtr,
+                    int const optLevel)
+ {
+     U32 price;
+-    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
++    U32 const offCode = ZSTD_highbit32(offBase);
+     U32 const mlBase = matchLength - MINMATCH;
+     assert(matchLength >= MINMATCH);
+ 
+-    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+-        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
++    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
++        return WEIGHT(mlBase, optLevel)
++             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
+ 
+     /* dynamic statistics */
+     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+@@ -325,10 +353,10 @@ ZSTD_getMatchPrice(U32 const offcode,
+ }
+ 
+ /* ZSTD_updateStats() :
+- * assumption : literals + litLengtn <= iend */
++ * assumption : literals + litLength <= iend */
+ static void ZSTD_updateStats(optState_t* const optPtr,
+                              U32 litLength, const BYTE* literals,
+-                             U32 offsetCode, U32 matchLength)
++                             U32 offBase, U32 matchLength)
+ {
+     /* literals */
+     if (ZSTD_compressedLiterals(optPtr)) {
+@@ -344,8 +372,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
+         optPtr->litLengthSum++;
+     }
+ 
+-    /* offset code : expected to follow storeSeq() numeric representation */
+-    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
++    /* offset code : follows storeSeq() numeric representation */
++    {   U32 const offCode = ZSTD_highbit32(offBase);
+         assert(offCode <= MaxOff);
+         optPtr->offCodeFreq[offCode]++;
+         optPtr->offCodeSum++;
+@@ -379,9 +407,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+ 
+ /* Update hashTable3 up to ip (excluded)
+    Assumption : always within prefix (i.e. not within extDict) */
+-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+-                                              U32* nextToUpdate3,
+-                                              const BYTE* const ip)
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
++                                       U32* nextToUpdate3,
++                                       const BYTE* const ip)
+ {
+     U32* const hashTable3 = ms->hashTable3;
+     U32 const hashLog3 = ms->hashLog3;
+@@ -408,7 +438,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+  * @param ip assumed <= iend-8 .
+  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
+  * @return : nb of positions added */
+-static U32 ZSTD_insertBt1(
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_insertBt1(
+                 const ZSTD_matchState_t* ms,
+                 const BYTE* const ip, const BYTE* const iend,
+                 U32 const target,
+@@ -527,6 +559,7 @@ static U32 ZSTD_insertBt1(
+ }
+ 
+ FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ void ZSTD_updateTree_internal(
+                 ZSTD_matchState_t* ms,
+                 const BYTE* const ip, const BYTE* const iend,
+@@ -535,7 +568,7 @@ void ZSTD_updateTree_internal(
+     const BYTE* const base = ms->window.base;
+     U32 const target = (U32)(ip - base);
+     U32 idx = ms->nextToUpdate;
+-    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
++    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                 idx, target, dictMode);
+ 
+     while(idx < target) {
+@@ -553,15 +586,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+ }
+ 
+ FORCE_INLINE_TEMPLATE
+-U32 ZSTD_insertBtAndGetAllMatches (
+-                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+-                    ZSTD_matchState_t* ms,
+-                    U32* nextToUpdate3,
+-                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+-                    const U32 rep[ZSTD_REP_NUM],
+-                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+-                    const U32 lengthToBeat,
+-                    U32 const mls /* template */)
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32
++ZSTD_insertBtAndGetAllMatches (
++                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
++                ZSTD_matchState_t* ms,
++                U32* nextToUpdate3,
++                const BYTE* const ip, const BYTE* const iLimit,
++                const ZSTD_dictMode_e dictMode,
++                const U32 rep[ZSTD_REP_NUM],
++                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
++                const U32 lengthToBeat,
++                const U32 mls /* template */)
+ {
+     const ZSTD_compressionParameters* const cParams = &ms->cParams;
+     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+@@ -644,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
+                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                             repCode, ll0, repOffset, repLen);
+                 bestLength = repLen;
+-                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
++                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
+                 matches[mnum].len = (U32)repLen;
+                 mnum++;
+                 if ( (repLen > sufficient_len)
+@@ -673,7 +709,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
+                 bestLength = mlen;
+                 assert(curr > matchIndex3);
+                 assert(mnum==0);  /* no prior solution */
+-                matches[0].off = STORE_OFFSET(curr - matchIndex3);
++                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
+                 matches[0].len = (U32)mlen;
+                 mnum = 1;
+                 if ( (mlen > sufficient_len) |
+@@ -706,13 +742,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
+         }
+ 
+         if (matchLength > bestLength) {
+-            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+-                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
++            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
++                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
+             assert(matchEndIdx > matchIndex);
+             if (matchLength > matchEndIdx - matchIndex)
+                 matchEndIdx = matchIndex + (U32)matchLength;
+             bestLength = matchLength;
+-            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
++            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
+             matches[mnum].len = (U32)matchLength;
+             mnum++;
+             if ( (matchLength > ZSTD_OPT_NUM)
+@@ -754,12 +790,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
+ 
+             if (matchLength > bestLength) {
+                 matchIndex = dictMatchIndex + dmsIndexDelta;
+-                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+-                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
++                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
++                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
+                 if (matchLength > matchEndIdx - matchIndex)
+                     matchEndIdx = matchIndex + (U32)matchLength;
+                 bestLength = matchLength;
+-                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
++                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
+                 matches[mnum].len = (U32)matchLength;
+                 mnum++;
+                 if ( (matchLength > ZSTD_OPT_NUM)
+@@ -792,7 +828,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
+     U32 const ll0,
+     U32 const lengthToBeat);
+ 
+-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++U32 ZSTD_btGetAllMatches_internal(
+         ZSTD_match_t* matches,
+         ZSTD_matchState_t* ms,
+         U32* nextToUpdate3,
+@@ -960,7 +998,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+ {
+     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
+-    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
++    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
+     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+ 
+     /* Ensure that current block position is not outside of the match */
+@@ -971,11 +1009,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+     }
+ 
+     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+-        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
+-        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+-                 candidateOffCode, candidateMatchLength, currPosInBlock);
++        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
++        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
++                 candidateOffBase, candidateMatchLength, currPosInBlock);
+         matches[*nbMatches].len = candidateMatchLength;
+-        matches[*nbMatches].off = candidateOffCode;
++        matches[*nbMatches].off = candidateOffBase;
+         (*nbMatches)++;
+     }
+ }
+@@ -1011,11 +1049,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
+ *  Optimal parser
+ *********************************/
+ 
+-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+-{
+-    return sol.litlen + sol.mlen;
+-}
+-
+ #if 0 /* debug */
+ 
+ static void
+@@ -1033,7 +1066,13 @@ listStats(const U32* table, int lastEltID)
+ 
+ #endif
+ 
+-FORCE_INLINE_TEMPLATE size_t
++#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
++#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
++#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
++
++FORCE_INLINE_TEMPLATE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t
+ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                                seqStore_t* seqStore,
+                                U32 rep[ZSTD_REP_NUM],
+@@ -1059,9 +1098,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+ 
+     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+     ZSTD_match_t* const matches = optStatePtr->matchTable;
+-    ZSTD_optimal_t lastSequence;
++    ZSTD_optimal_t lastStretch;
+     ZSTD_optLdm_t optLdm;
+ 
++    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
++
+     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
+@@ -1082,103 +1123,139 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+             U32 const ll0 = !litlen;
+             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
+             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+-                                              (U32)(ip-istart), (U32)(iend - ip));
+-            if (!nbMatches) { ip++; continue; }
++                                              (U32)(ip-istart), (U32)(iend-ip));
++            if (!nbMatches) {
++                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
++                ip++;
++                continue;
++            }
++
++            /* Match found: let's store this solution, and eventually find more candidates.
++             * During this forward pass, @opt is used to store stretches,
++             * defined as "a match followed by N literals".
++             * Note how this is different from a Sequence, which is "N literals followed by a match".
++             * Storing stretches allows us to store different match predecessors
++             * for each literal position part of a literals run. */
+ 
+             /* initialize opt[0] */
+-            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+-            opt[0].mlen = 0;  /* means is_a_literal */
++            opt[0].mlen = 0;  /* there are only literals so far */
+             opt[0].litlen = litlen;
+-            /* We don't need to include the actual price of the literals because
+-             * it is static for the duration of the forward pass, and is included
+-             * in every price. We include the literal length to avoid negative
+-             * prices when we subtract the previous literal length.
++            /* No need to include the actual price of the literals before the first match
++             * because it is static for the duration of the forward pass, and is included
++             * in every subsequent price. But, we include the literal length because
++             * the cost variation of litlen depends on the value of litlen.
+              */
+-            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
++            opt[0].price = LL_PRICE(litlen);
++            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
++            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
+ 
+             /* large match -> immediate encoding */
+             {   U32 const maxML = matches[nbMatches-1].len;
+-                U32 const maxOffcode = matches[nbMatches-1].off;
+-                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+-                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
++                U32 const maxOffBase = matches[nbMatches-1].off;
++                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
++                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
+ 
+                 if (maxML > sufficient_len) {
+-                    lastSequence.litlen = litlen;
+-                    lastSequence.mlen = maxML;
+-                    lastSequence.off = maxOffcode;
+-                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
++                    lastStretch.litlen = 0;
++                    lastStretch.mlen = maxML;
++                    lastStretch.off = maxOffBase;
++                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
+                                 maxML, sufficient_len);
+                     cur = 0;
+-                    last_pos = ZSTD_totalLen(lastSequence);
++                    last_pos = maxML;
+                     goto _shortestPath;
+             }   }
+ 
+             /* set prices for first matches starting position == 0 */
+             assert(opt[0].price >= 0);
+-            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+-                U32 pos;
++            {   U32 pos;
+                 U32 matchNb;
+                 for (pos = 1; pos < minMatch; pos++) {
+-                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
++                    opt[pos].price = ZSTD_MAX_PRICE;
++                    opt[pos].mlen = 0;
++                    opt[pos].litlen = litlen + pos;
+                 }
+                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+-                    U32 const offcode = matches[matchNb].off;
++                    U32 const offBase = matches[matchNb].off;
+                     U32 const end = matches[matchNb].len;
+                     for ( ; pos <= end ; pos++ ) {
+-                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
+-                        U32 const sequencePrice = literalsPrice + matchPrice;
++                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
++                        int const sequencePrice = opt[0].price + matchPrice;
+                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                     pos, ZSTD_fCost(sequencePrice));
+                         opt[pos].mlen = pos;
+-                        opt[pos].off = offcode;
+-                        opt[pos].litlen = litlen;
+-                        opt[pos].price = (int)sequencePrice;
+-                }   }
++                        opt[pos].off = offBase;
++                        opt[pos].litlen = 0; /* end of match */
++                        opt[pos].price = sequencePrice + LL_PRICE(0);
++                    }
++                }
+                 last_pos = pos-1;
++                opt[pos].price = ZSTD_MAX_PRICE;
+             }
+         }
+ 
+         /* check further positions */
+         for (cur = 1; cur <= last_pos; cur++) {
+             const BYTE* const inr = ip + cur;
+-            assert(cur < ZSTD_OPT_NUM);
+-            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
++            assert(cur <= ZSTD_OPT_NUM);
++            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
+ 
+             /* Fix current position with one literal if cheaper */
+-            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
++            {   U32 const litlen = opt[cur-1].litlen + 1;
+                 int const price = opt[cur-1].price
+-                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+-                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+-                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
++                                + LIT_PRICE(ip+cur-1)
++                                + LL_INCPRICE(litlen);
+                 assert(price < 1000000000); /* overflow check */
+                 if (price <= opt[cur].price) {
++                    ZSTD_optimal_t const prevMatch = opt[cur];
+                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+-                    opt[cur].mlen = 0;
+-                    opt[cur].off = 0;
++                    opt[cur] = opt[cur-1];
+                     opt[cur].litlen = litlen;
+                     opt[cur].price = price;
++                    if ( (optLevel >= 1) /* additional check only for higher modes */
++                      && (prevMatch.litlen == 0) /* replace a match */
++                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
++                      && LIKELY(ip + cur < iend)
++                    ) {
++                        /* check next position, in case it would be cheaper */
++                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
++                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
++                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
++                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
++                        if ( (with1literal < withMoreLiterals)
++                          && (with1literal < opt[cur+1].price) ) {
++                            /* update offset history - before it disappears */
++                            U32 const prev = cur - prevMatch.mlen;
++                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
++                            assert(cur >= prevMatch.mlen);
++                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
++                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
++                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
++                            opt[cur+1] = prevMatch;  /* mlen & offbase */
++                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
++                            opt[cur+1].litlen = 1;
++                            opt[cur+1].price = with1literal;
++                            if (last_pos < cur+1) last_pos = cur+1;
++                        }
++                    }
+                 } else {
+-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+-                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
++                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
++                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
+                 }
+             }
+ 
+-            /* Set the repcodes of the current position. We must do it here
+-             * because we rely on the repcodes of the 2nd to last sequence being
+-             * correct to set the next chunks repcodes during the backward
+-             * traversal.
++            /* Offset history is not updated during match comparison.
++             * Do it here, now that the match is selected and confirmed.
+              */
+             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+             assert(cur >= opt[cur].mlen);
+-            if (opt[cur].mlen != 0) {
++            if (opt[cur].litlen == 0) {
++                /* just finished a match => alter offset history */
+                 U32 const prev = cur - opt[cur].mlen;
+-                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
++                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
+                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+-            } else {
+-                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+             }
+ 
+             /* last match must start at a minimum distance of 8 from oend */
+@@ -1188,15 +1265,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+ 
+             if ( (optLevel==0) /*static_test*/
+               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+-                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
++                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
+                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+             }
+ 
+             assert(opt[cur].price >= 0);
+-            {   U32 const ll0 = (opt[cur].mlen != 0);
+-                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+-                U32 const previousPrice = (U32)opt[cur].price;
+-                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
++            {   U32 const ll0 = (opt[cur].litlen == 0);
++                int const previousPrice = opt[cur].price;
++                int const basePrice = previousPrice + LL_PRICE(0);
+                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
+                 U32 matchNb;
+ 
+@@ -1208,18 +1284,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                     continue;
+                 }
+ 
+-                {   U32 const maxML = matches[nbMatches-1].len;
+-                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+-                                inr-istart, cur, nbMatches, maxML);
+-
+-                    if ( (maxML > sufficient_len)
+-                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+-                        lastSequence.mlen = maxML;
+-                        lastSequence.off = matches[nbMatches-1].off;
+-                        lastSequence.litlen = litlen;
+-                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+-                        last_pos = cur + ZSTD_totalLen(lastSequence);
+-                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
++                {   U32 const longestML = matches[nbMatches-1].len;
++                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
++                                inr-istart, cur, nbMatches, longestML);
++
++                    if ( (longestML > sufficient_len)
++                      || (cur + longestML >= ZSTD_OPT_NUM)
++                      || (ip + cur + longestML >= iend) ) {
++                        lastStretch.mlen = longestML;
++                        lastStretch.off = matches[nbMatches-1].off;
++                        lastStretch.litlen = 0;
++                        last_pos = cur + longestML;
+                         goto _shortestPath;
+                 }   }
+ 
+@@ -1230,20 +1305,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                     U32 mlen;
+ 
+-                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+-                                matchNb, matches[matchNb].off, lastML, litlen);
++                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
++                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
+ 
+                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                         U32 const pos = cur + mlen;
+-                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
++                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+ 
+                         if ((pos > last_pos) || (price < opt[pos].price)) {
+                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+-                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
++                            while (last_pos < pos) {
++                                /* fill empty positions, for future comparisons */
++                                last_pos++;
++                                opt[last_pos].price = ZSTD_MAX_PRICE;
++                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
++                            }
+                             opt[pos].mlen = mlen;
+                             opt[pos].off = offset;
+-                            opt[pos].litlen = litlen;
++                            opt[pos].litlen = 0;
+                             opt[pos].price = price;
+                         } else {
+                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+@@ -1251,52 +1331,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                         }
+             }   }   }
++            opt[last_pos+1].price = ZSTD_MAX_PRICE;
+         }  /* for (cur = 1; cur <= last_pos; cur++) */
+ 
+-        lastSequence = opt[last_pos];
+-        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+-        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
++        lastStretch = opt[last_pos];
++        assert(cur >= lastStretch.mlen);
++        cur = last_pos - lastStretch.mlen;
+ 
+ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+         assert(opt[0].mlen == 0);
++        assert(last_pos >= lastStretch.mlen);
++        assert(cur == last_pos - lastStretch.mlen);
+ 
+-        /* Set the next chunk's repcodes based on the repcodes of the beginning
+-         * of the last match, and the last sequence. This avoids us having to
+-         * update them while traversing the sequences.
+-         */
+-        if (lastSequence.mlen != 0) {
+-            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+-            ZSTD_memcpy(rep, &reps, sizeof(reps));
++        if (lastStretch.mlen==0) {
++            /* no solution : all matches have been converted into literals */
++            assert(lastStretch.litlen == (ip - anchor) + last_pos);
++            ip += last_pos;
++            continue;
++        }
++        assert(lastStretch.off > 0);
++
++        /* Update offset history */
++        if (lastStretch.litlen == 0) {
++            /* finishing on a match : update offset history */
++            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
++            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
+         } else {
+-            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
++            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
++            assert(cur >= lastStretch.litlen);
++            cur -= lastStretch.litlen;
+         }
+ 
+-        {   U32 const storeEnd = cur + 1;
++        /* Let's write the shortest path solution.
++         * It is stored in @opt in reverse order,
++         * starting from @storeEnd (==cur+2),
++         * effectively partially @opt overwriting.
++         * Content is changed too:
++         * - So far, @opt stored stretches, aka a match followed by literals
++         * - Now, it will store sequences, aka literals followed by a match
++         */
++        {   U32 const storeEnd = cur + 2;
+             U32 storeStart = storeEnd;
+-            U32 seqPos = cur;
++            U32 stretchPos = cur;
+ 
+             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                         last_pos, cur); (void)last_pos;
+-            assert(storeEnd < ZSTD_OPT_NUM);
+-            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+-                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+-            opt[storeEnd] = lastSequence;
+-            while (seqPos > 0) {
+-                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
++            assert(storeEnd < ZSTD_OPT_SIZE);
++            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
++                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
++            if (lastStretch.litlen > 0) {
++                /* last "sequence" is unfinished: just a bunch of literals */
++                opt[storeEnd].litlen = lastStretch.litlen;
++                opt[storeEnd].mlen = 0;
++                storeStart = storeEnd-1;
++                opt[storeStart] = lastStretch;
++            } {
++                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
++                storeStart = storeEnd;
++            }
++            while (1) {
++                ZSTD_optimal_t nextStretch = opt[stretchPos];
++                opt[storeStart].litlen = nextStretch.litlen;
++                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
++                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
++                if (nextStretch.mlen == 0) {
++                    /* reaching beginning of segment */
++                    break;
++                }
+                 storeStart--;
+-                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+-                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+-                opt[storeStart] = opt[seqPos];
+-                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
++                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
++                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
++                stretchPos -= nextStretch.litlen + nextStretch.mlen;
+             }
+ 
+             /* save sequences */
+-            DEBUGLOG(6, "sending selected sequences into seqStore")
++            DEBUGLOG(6, "sending selected sequences into seqStore");
+             {   U32 storePos;
+                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                     U32 const llen = opt[storePos].litlen;
+                     U32 const mlen = opt[storePos].mlen;
+-                    U32 const offCode = opt[storePos].off;
++                    U32 const offBase = opt[storePos].off;
+                     U32 const advance = llen + mlen;
+                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
+@@ -1308,11 +1422,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                     }
+ 
+                     assert(anchor + llen <= iend);
+-                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+-                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
++                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
++                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
+                     anchor += advance;
+                     ip = anchor;
+             }   }
++            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
++
++            /* update all costs */
+             ZSTD_setBasePrices(optStatePtr, optLevel);
+         }
+     }   /* while (ip < ilimit) */
+@@ -1320,21 +1437,27 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+     /* Return the last literals size */
+     return (size_t)(iend - anchor);
+ }
++#endif /* build exclusions */
+ 
++#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ static size_t ZSTD_compressBlock_opt0(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
+ {
+     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
+ }
++#endif
+ 
++#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+ static size_t ZSTD_compressBlock_opt2(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
+ {
+     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
+ }
++#endif
+ 
++#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ size_t ZSTD_compressBlock_btopt(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize)
+@@ -1342,20 +1465,23 @@ size_t ZSTD_compressBlock_btopt(
+     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
+ }
++#endif
+ 
+ 
+ 
+ 
++#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+ /* ZSTD_initStats_ultra():
+  * make a first compression pass, just to seed stats with more accurate starting values.
+  * only works on first block, with no dictionary and no ldm.
+- * this function cannot error, hence its contract must be respected.
++ * this function cannot error out, its narrow contract must be respected.
+  */
+-static void
+-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+-                     seqStore_t* seqStore,
+-                     U32 rep[ZSTD_REP_NUM],
+-               const void* src, size_t srcSize)
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
++                          seqStore_t* seqStore,
++                          U32 rep[ZSTD_REP_NUM],
++                    const void* src, size_t srcSize)
+ {
+     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
+@@ -1368,7 +1494,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+ 
+     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
+ 
+-    /* invalidate first scan from history */
++    /* invalidate first scan from history, only keep entropy stats */
+     ZSTD_resetSeqStore(seqStore);
+     ms->window.base -= srcSize;
+     ms->window.dictLimit += (U32)srcSize;
+@@ -1392,10 +1518,10 @@ size_t ZSTD_compressBlock_btultra2(
+     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
+     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+ 
+-    /* 2-pass strategy:
++    /* 2-passes strategy:
+      * this strategy makes a first pass over first block to collect statistics
+-     * and seed next round's statistics with it.
+-     * After 1st pass, function forgets everything, and starts a new block.
++     * in order to seed next round's statistics with it.
++     * After 1st pass, function forgets history, and starts a new block.
+      * Consequently, this can only work if no data has been previously loaded in tables,
+      * aka, no dictionary, no prefix, no ldm preprocessing.
+      * The compression ratio gain is generally small (~0.5% on first block),
+@@ -1404,15 +1530,17 @@ size_t ZSTD_compressBlock_btultra2(
+     if ( (ms->opt.litLengthSum==0)   /* first block */
+       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+-      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+-      && (srcSize > ZSTD_PREDEF_THRESHOLD)
++      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
++      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
+       ) {
+         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+     }
+ 
+     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
+ }
++#endif
+ 
++#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ size_t ZSTD_compressBlock_btopt_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize)
+@@ -1420,18 +1548,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
+     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
+ }
+ 
+-size_t ZSTD_compressBlock_btultra_dictMatchState(
++size_t ZSTD_compressBlock_btopt_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
++    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
+ }
++#endif
+ 
+-size_t ZSTD_compressBlock_btopt_extDict(
++#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_btultra_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         const void* src, size_t srcSize)
+ {
+-    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
++    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
+ }
+ 
+ size_t ZSTD_compressBlock_btultra_extDict(
+@@ -1440,6 +1570,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
+ {
+     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
+ }
++#endif
+ 
+ /* note : no btultra2 variant for extDict nor dictMatchState,
+  * because btultra2 is not meant to work with dictionaries
+diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h
+index 22b862858ba7..ac1b743d27cd 100644
+--- a/lib/zstd/compress/zstd_opt.h
++++ b/lib/zstd/compress/zstd_opt.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -14,30 +15,40 @@
+ 
+ #include "zstd_compress_internal.h"
+ 
++#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
++ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
+ /* used in ZSTD_loadDictionaryContent() */
+ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
++#endif
+ 
++#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ size_t ZSTD_compressBlock_btopt(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_btultra(
++size_t ZSTD_compressBlock_btopt_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-size_t ZSTD_compressBlock_btultra2(
++size_t ZSTD_compressBlock_btopt_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+ 
++#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
++#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
++#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
++#else
++#define ZSTD_COMPRESSBLOCK_BTOPT NULL
++#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
++#endif
+ 
+-size_t ZSTD_compressBlock_btopt_dictMatchState(
++#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
++size_t ZSTD_compressBlock_btultra(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+-
+-size_t ZSTD_compressBlock_btopt_extDict(
+-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+-        void const* src, size_t srcSize);
+ size_t ZSTD_compressBlock_btultra_extDict(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         void const* src, size_t srcSize);
+@@ -45,6 +56,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
+         /* note : no btultra2 variant for extDict nor dictMatchState,
+          * because btultra2 is not meant to work with dictionaries
+          * and is only specific for the first block (no prefix) */
++size_t ZSTD_compressBlock_btultra2(
++        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
++        void const* src, size_t srcSize);
++
++#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
++#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
++#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
++#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
++#else
++#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
++#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
++#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
++#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
++#endif
+ 
+ 
+ #endif /* ZSTD_OPT_H */
+diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
+index 60958afebc41..ac8b87f48f84 100644
+--- a/lib/zstd/decompress/huf_decompress.c
++++ b/lib/zstd/decompress/huf_decompress.c
+@@ -1,7 +1,8 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /* ******************************************************************
+  * huff0 huffman decoder,
+  * part of Finite State Entropy library
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  *
+  *  You can contact the author at :
+  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+@@ -19,10 +20,10 @@
+ #include "../common/compiler.h"
+ #include "../common/bitstream.h"  /* BIT_* */
+ #include "../common/fse.h"        /* to compress headers */
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include "../common/error_private.h"
+ #include "../common/zstd_internal.h"
++#include "../common/bits.h"       /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
+ 
+ /* **************************************************************
+ *  Constants
+@@ -34,6 +35,12 @@
+ *  Macros
+ ****************************************************************/
+ 
++#ifdef HUF_DISABLE_FAST_DECODE
++# define HUF_ENABLE_FAST_DECODE 0
++#else
++# define HUF_ENABLE_FAST_DECODE 1
++#endif
++
+ /* These two optional macros force the use one way or another of the two
+  * Huffman decompression implementations. You can't force in both directions
+  * at the same time.
+@@ -43,27 +50,25 @@
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
+ #endif
+ 
+-#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
+-# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
++/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
++ * supported at runtime, so we can add the BMI2 target attribute.
++ * When it is disabled, we will still get BMI2 if it is enabled statically.
++ */
++#if DYNAMIC_BMI2
++# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
+ #else
+-# define HUF_ASM_X86_64_BMI2_ATTRS
++# define HUF_FAST_BMI2_ATTRS
+ #endif
+ 
+ #define HUF_EXTERN_C
+ #define HUF_ASM_DECL HUF_EXTERN_C
+ 
+-#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
++#if DYNAMIC_BMI2
+ # define HUF_NEED_BMI2_FUNCTION 1
+ #else
+ # define HUF_NEED_BMI2_FUNCTION 0
+ #endif
+ 
+-#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
+-# define HUF_NEED_DEFAULT_FUNCTION 1
+-#else
+-# define HUF_NEED_DEFAULT_FUNCTION 0
+-#endif
+-
+ /* **************************************************************
+ *  Error Management
+ ****************************************************************/
+@@ -80,6 +85,11 @@
+ /* **************************************************************
+ *  BMI2 Variant Wrappers
+ ****************************************************************/
++typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
++                                              const void *cSrc,
++                                              size_t cSrcSize,
++                                              const HUF_DTable *DTable);
++
+ #if DYNAMIC_BMI2
+ 
+ #define HUF_DGEN(fn)                                                        \
+@@ -101,9 +111,9 @@
+     }                                                                       \
+                                                                             \
+     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
++                     size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
+     {                                                                       \
+-        if (bmi2) {                                                         \
++        if (flags & HUF_flags_bmi2) {                                       \
+             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+         }                                                                   \
+         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+@@ -113,9 +123,9 @@
+ 
+ #define HUF_DGEN(fn)                                                        \
+     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
++                     size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
+     {                                                                       \
+-        (void)bmi2;                                                         \
++        (void)flags;                                                        \
+         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+     }
+ 
+@@ -134,43 +144,66 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+     return dtd;
+ }
+ 
+-#if ZSTD_ENABLE_ASM_X86_64_BMI2
+-
+-static size_t HUF_initDStream(BYTE const* ip) {
++static size_t HUF_initFastDStream(BYTE const* ip) {
+     BYTE const lastByte = ip[7];
+-    size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
++    size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
+     size_t const value = MEM_readLEST(ip) | 1;
+     assert(bitsConsumed <= 8);
++    assert(sizeof(size_t) == 8);
+     return value << bitsConsumed;
+ }
++
++
++/*
++ * The input/output arguments to the Huffman fast decoding loop:
++ *
++ * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
++ * op [in/out] - The output pointers, must be updated to reflect what is written.
++ * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
++ * dt [in] - The decoding table.
++ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
++ *                down to this pointer. It may be below iend[0].
++ * oend [in] - The end of the output stream. op[3] must not cross oend.
++ * iend [in] - The end of each input stream. ip[i] may cross iend[i],
++ *             as long as it is above ilowest, but that indicates corruption.
++ */
+ typedef struct {
+     BYTE const* ip[4];
+     BYTE* op[4];
+     U64 bits[4];
+     void const* dt;
+-    BYTE const* ilimit;
++    BYTE const* ilowest;
+     BYTE* oend;
+     BYTE const* iend[4];
+-} HUF_DecompressAsmArgs;
++} HUF_DecompressFastArgs;
++
++typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
+ 
+ /*
+- * Initializes args for the asm decoding loop.
+- * @returns 0 on success
+- *          1 if the fallback implementation should be used.
++ * Initializes args for the fast decoding loop.
++ * @returns 1 on success
++ *          0 if the fallback implementation should be used.
+  *          Or an error code on failure.
+  */
+-static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
++static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
+ {
+     void const* dt = DTable + 1;
+     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
+ 
+-    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
++    const BYTE* const istart = (const BYTE*)src;
+ 
+-    BYTE* const oend = (BYTE*)dst + dstSize;
++    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
+ 
+-    /* The following condition is false on x32 platform,
+-     * but HUF_asm is not compatible with this ABI */
+-    if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
++    /* The fast decoding loop assumes 64-bit little-endian.
++     * This condition is false on x32.
++     */
++    if (!MEM_isLittleEndian() || MEM_32bits())
++        return 0;
++
++    /* Avoid nullptr addition */
++    if (dstSize == 0)
++        return 0;
++    assert(dst != NULL);
+ 
+     /* strict minimum : jump table + 1 byte per stream */
+     if (srcSize < 10)
+@@ -181,11 +214,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
+      * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
+      */
+     if (dtLog != HUF_DECODER_FAST_TABLELOG)
+-        return 1;
++        return 0;
+ 
+     /* Read the jump table. */
+     {
+-        const BYTE* const istart = (const BYTE*)src;
+         size_t const length1 = MEM_readLE16(istart);
+         size_t const length2 = MEM_readLE16(istart+2);
+         size_t const length3 = MEM_readLE16(istart+4);
+@@ -195,13 +227,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
+         args->iend[2] = args->iend[1] + length2;
+         args->iend[3] = args->iend[2] + length3;
+ 
+-        /* HUF_initDStream() requires this, and this small of an input
++        /* HUF_initFastDStream() requires this, and this small of an input
+          * won't benefit from the ASM loop anyways.
+-         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
+-         * starts.
+          */
+-        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+-            return 1;
++        if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
++            return 0;
+         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
+     }
+     /* ip[] contains the position that is currently loaded into bits[]. */
+@@ -218,7 +248,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
+ 
+     /* No point to call the ASM loop for tiny outputs. */
+     if (args->op[3] >= oend)
+-        return 1;
++        return 0;
+ 
+     /* bits[] is the bit container.
+         * It is read from the MSB down to the LSB.
+@@ -227,24 +257,25 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
+         * set, so that CountTrailingZeros(bits[]) can be used
+         * to count how many bits we've consumed.
+         */
+-    args->bits[0] = HUF_initDStream(args->ip[0]);
+-    args->bits[1] = HUF_initDStream(args->ip[1]);
+-    args->bits[2] = HUF_initDStream(args->ip[2]);
+-    args->bits[3] = HUF_initDStream(args->ip[3]);
+-
+-    /* If ip[] >= ilimit, it is guaranteed to be safe to
+-        * reload bits[]. It may be beyond its section, but is
+-        * guaranteed to be valid (>= istart).
+-        */
+-    args->ilimit = ilimit;
++    args->bits[0] = HUF_initFastDStream(args->ip[0]);
++    args->bits[1] = HUF_initFastDStream(args->ip[1]);
++    args->bits[2] = HUF_initFastDStream(args->ip[2]);
++    args->bits[3] = HUF_initFastDStream(args->ip[3]);
++
++    /* The decoders must be sure to never read beyond ilowest.
++     * This is lower than iend[0], but allowing decoders to read
++     * down to ilowest can allow an extra iteration or two in the
++     * fast loop.
++     */
++    args->ilowest = istart;
+ 
+     args->oend = oend;
+     args->dt = dt;
+ 
+-    return 0;
++    return 1;
+ }
+ 
+-static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
++static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
+ {
+     /* Validate that we haven't overwritten. */
+     if (args->op[stream] > segmentEnd)
+@@ -258,15 +289,33 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
+         return ERROR(corruption_detected);
+ 
+     /* Construct the BIT_DStream_t. */
+-    bit->bitContainer = MEM_readLE64(args->ip[stream]);
+-    bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
+-    bit->start = (const char*)args->iend[0];
++    assert(sizeof(size_t) == 8);
++    bit->bitContainer = MEM_readLEST(args->ip[stream]);
++    bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
++    bit->start = (const char*)args->ilowest;
+     bit->limitPtr = bit->start + sizeof(size_t);
+     bit->ptr = (const char*)args->ip[stream];
+ 
+     return 0;
+ }
+-#endif
++
++/* Calls X(N) for each stream 0, 1, 2, 3. */
++#define HUF_4X_FOR_EACH_STREAM(X) \
++    do {                          \
++        X(0);                     \
++        X(1);                     \
++        X(2);                     \
++        X(3);                     \
++    } while (0)
++
++/* Calls X(N, var) for each stream 0, 1, 2, 3. */
++#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
++    do {                                        \
++        X(0, (var));                            \
++        X(1, (var));                            \
++        X(2, (var));                            \
++        X(3, (var));                            \
++    } while (0)
+ 
+ 
+ #ifndef HUF_FORCE_DECOMPRESS_X2
+@@ -283,10 +332,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decodi
+ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+     U64 D4;
+     if (MEM_isLittleEndian()) {
+-        D4 = (symbol << 8) + nbBits;
++        D4 = (U64)((symbol << 8) + nbBits);
+     } else {
+-        D4 = symbol + (nbBits << 8);
++        D4 = (U64)(symbol + (nbBits << 8));
+     }
++    assert(D4 < (1U << 16));
+     D4 *= 0x0001000100010001ULL;
+     return D4;
+ }
+@@ -329,13 +379,7 @@ typedef struct {
+         BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+ } HUF_ReadDTableX1_Workspace;
+ 
+-
+-size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+-{
+-    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+-}
+-
+-size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
++size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
+ {
+     U32 tableLog = 0;
+     U32 nbSymbols = 0;
+@@ -350,7 +394,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
+     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+     /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+ 
+-    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
++    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
+     if (HUF_isError(iSize)) return iSize;
+ 
+ 
+@@ -377,9 +421,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
+      * rankStart[0] is not filled because there are no entries in the table for
+      * weight 0.
+      */
+-    {
+-        int n;
+-        int nextRankStart = 0;
++    {   int n;
++        U32 nextRankStart = 0;
+         int const unroll = 4;
+         int const nLimit = (int)nbSymbols - unroll + 1;
+         for (n=0; n<(int)tableLog+1; n++) {
+@@ -406,10 +449,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
+      * We can switch based on the length to a different inner loop which is
+      * optimized for that particular case.
+      */
+-    {
+-        U32 w;
+-        int symbol=wksp->rankVal[0];
+-        int rankStart=0;
++    {   U32 w;
++        int symbol = wksp->rankVal[0];
++        int rankStart = 0;
+         for (w=1; w<tableLog+1; ++w) {
+             int const symbolCount = wksp->rankVal[w];
+             int const length = (1 << w) >> 1;
+@@ -483,15 +525,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
+ }
+ 
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+-    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
++    do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
+ 
+-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
++#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
++    do {                                            \
++        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
++            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
++    } while (0)
+ 
+-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+-    if (MEM_64bits()) \
+-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
++#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
++    do {                                            \
++        if (MEM_64bits())                           \
++            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
++    } while (0)
+ 
+ HINT_INLINE size_t
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+@@ -519,7 +565,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
+     while (p < pEnd)
+         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+ 
+-    return pEnd-pStart;
++    return (size_t)(pEnd-pStart);
+ }
+ 
+ FORCE_INLINE_TEMPLATE size_t
+@@ -529,7 +575,7 @@ HUF_decompress1X1_usingDTable_internal_body(
+     const HUF_DTable* DTable)
+ {
+     BYTE* op = (BYTE*)dst;
+-    BYTE* const oend = op + dstSize;
++    BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
+     const void* dtPtr = DTable + 1;
+     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+     BIT_DStream_t bitD;
+@@ -545,6 +591,10 @@ HUF_decompress1X1_usingDTable_internal_body(
+     return dstSize;
+ }
+ 
++/* HUF_decompress4X1_usingDTable_internal_body():
++ * Conditions :
++ * @dstSize >= 6
++ */
+ FORCE_INLINE_TEMPLATE size_t
+ HUF_decompress4X1_usingDTable_internal_body(
+           void* dst,  size_t dstSize,
+@@ -553,6 +603,7 @@ HUF_decompress4X1_usingDTable_internal_body(
+ {
+     /* Check */
+     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
++    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+ 
+     {   const BYTE* const istart = (const BYTE*) cSrc;
+         BYTE* const ostart = (BYTE*) dst;
+@@ -588,6 +639,7 @@ HUF_decompress4X1_usingDTable_internal_body(
+ 
+         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
++        assert(dstSize >= 6); /* validated above */
+         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+@@ -650,52 +702,173 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
+ }
+ #endif
+ 
+-#if HUF_NEED_DEFAULT_FUNCTION
+ static
+ size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+                     size_t cSrcSize, HUF_DTable const* DTable) {
+     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+-#endif
+ 
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
+ 
+-HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
++HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
++
++#endif
++
++static HUF_FAST_BMI2_ATTRS
++void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
++{
++    U64 bits[4];
++    BYTE const* ip[4];
++    BYTE* op[4];
++    U16 const* const dtable = (U16 const*)args->dt;
++    BYTE* const oend = args->oend;
++    BYTE const* const ilowest = args->ilowest;
++
++    /* Copy the arguments to local variables */
++    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
++    ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
++    ZSTD_memcpy(&op, &args->op, sizeof(op));
++
++    assert(MEM_isLittleEndian());
++    assert(!MEM_32bits());
++
++    for (;;) {
++        BYTE* olimit;
++        int stream;
++
++        /* Assert loop preconditions */
++#ifndef NDEBUG
++        for (stream = 0; stream < 4; ++stream) {
++            assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
++            assert(ip[stream] >= ilowest);
++        }
++#endif
++        /* Compute olimit */
++        {
++            /* Each iteration produces 5 output symbols per stream */
++            size_t const oiters = (size_t)(oend - op[3]) / 5;
++            /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
++             * per stream.
++             */
++            size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
++            /* We can safely run iters iterations before running bounds checks */
++            size_t const iters = MIN(oiters, iiters);
++            size_t const symbols = iters * 5;
++
++            /* We can simply check that op[3] < olimit, instead of checking all
++             * of our bounds, since we can't hit the other bounds until we've run
++             * iters iterations, which only happens when op[3] == olimit.
++             */
++            olimit = op[3] + symbols;
++
++            /* Exit fast decoding loop once we reach the end. */
++            if (op[3] == olimit)
++                break;
++
++            /* Exit the decoding loop if any input pointer has crossed the
++             * previous one. This indicates corruption, and a precondition
++             * to our loop is that ip[i] >= ip[0].
++             */
++            for (stream = 1; stream < 4; ++stream) {
++                if (ip[stream] < ip[stream - 1])
++                    goto _out;
++            }
++        }
++
++#ifndef NDEBUG
++        for (stream = 1; stream < 4; ++stream) {
++            assert(ip[stream] >= ip[stream - 1]);
++        }
++#endif
++
++#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
++    do {                                                        \
++        int const index = (int)(bits[(_stream)] >> 53);         \
++        int const entry = (int)dtable[index];                   \
++        bits[(_stream)] <<= (entry & 0x3F);                     \
++        op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
++    } while (0)
++
++#define HUF_4X1_RELOAD_STREAM(_stream)                              \
++    do {                                                            \
++        int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
++        int const nbBits = ctz & 7;                                 \
++        int const nbBytes = ctz >> 3;                               \
++        op[(_stream)] += 5;                                         \
++        ip[(_stream)] -= nbBytes;                                   \
++        bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
++        bits[(_stream)] <<= nbBits;                                 \
++    } while (0)
++
++        /* Manually unroll the loop because compilers don't consistently
++         * unroll the inner loops, which destroys performance.
++         */
++        do {
++            /* Decode 5 symbols in each of the 4 streams */
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
++
++            /* Reload each of the 4 the bitstreams */
++            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
++        } while (op[3] < olimit);
++
++#undef HUF_4X1_DECODE_SYMBOL
++#undef HUF_4X1_RELOAD_STREAM
++    }
+ 
+-static HUF_ASM_X86_64_BMI2_ATTRS
++_out:
++
++    /* Save the final values of each of the state variables back to args. */
++    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
++    ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
++    ZSTD_memcpy(&args->op, &op, sizeof(op));
++}
++
++/*
++ * @returns @p dstSize on success (>= 6)
++ *          0 if the fallback implementation should be used
++ *          An error if an error occurred
++ */
++static HUF_FAST_BMI2_ATTRS
+ size_t
+-HUF_decompress4X1_usingDTable_internal_bmi2_asm(
++HUF_decompress4X1_usingDTable_internal_fast(
+           void* dst,  size_t dstSize,
+     const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable)
++    const HUF_DTable* DTable,
++    HUF_DecompressFastLoopFn loopFn)
+ {
+     void const* dt = DTable + 1;
+-    const BYTE* const iend = (const BYTE*)cSrc + 6;
+-    BYTE* const oend = (BYTE*)dst + dstSize;
+-    HUF_DecompressAsmArgs args;
+-    {
+-        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+-        FORWARD_IF_ERROR(ret, "Failed to init asm args");
+-        if (ret != 0)
+-            return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
++    BYTE const* const ilowest = (BYTE const*)cSrc;
++    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
++    HUF_DecompressFastArgs args;
++    {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
++        FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
++        if (ret == 0)
++            return 0;
+     }
+ 
+-    assert(args.ip[0] >= args.ilimit);
+-    HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
++    assert(args.ip[0] >= args.ilowest);
++    loopFn(&args);
+ 
+-    /* Our loop guarantees that ip[] >= ilimit and that we haven't
++    /* Our loop guarantees that ip[] >= ilowest and that we haven't
+     * overwritten any op[].
+     */
+-    assert(args.ip[0] >= iend);
+-    assert(args.ip[1] >= iend);
+-    assert(args.ip[2] >= iend);
+-    assert(args.ip[3] >= iend);
++    assert(args.ip[0] >= ilowest);
++    assert(args.ip[0] >= ilowest);
++    assert(args.ip[1] >= ilowest);
++    assert(args.ip[2] >= ilowest);
++    assert(args.ip[3] >= ilowest);
+     assert(args.op[3] <= oend);
+-    (void)iend;
++
++    assert(ilowest == args.ilowest);
++    assert(ilowest + 6 == args.iend[0]);
++    (void)ilowest;
+ 
+     /* finish bit streams one by one. */
+-    {
+-        size_t const segmentSize = (dstSize+3) / 4;
++    {   size_t const segmentSize = (dstSize+3) / 4;
+         BYTE* segmentEnd = (BYTE*)dst;
+         int i;
+         for (i = 0; i < 4; ++i) {
+@@ -712,97 +885,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
+     }
+ 
+     /* decoded size */
++    assert(dstSize != 0);
+     return dstSize;
+ }
+-#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
+-
+-typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+-                                               const void *cSrc,
+-                                               size_t cSrcSize,
+-                                               const HUF_DTable *DTable);
+ 
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+ 
+ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+-                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
++                    size_t cSrcSize, HUF_DTable const* DTable, int flags)
+ {
++    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
++    HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
++
+ #if DYNAMIC_BMI2
+-    if (bmi2) {
++    if (flags & HUF_flags_bmi2) {
++        fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
+ # if ZSTD_ENABLE_ASM_X86_64_BMI2
+-        return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+-# else
+-        return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
++        if (!(flags & HUF_flags_disableAsm)) {
++            loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
++        }
+ # endif
++    } else {
++        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+     }
+-#else
+-    (void)bmi2;
+ #endif
+ 
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+-    return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+-#else
+-    return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
++    if (!(flags & HUF_flags_disableAsm)) {
++        loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
++    }
+ #endif
+-}
+-
+-
+-size_t HUF_decompress1X1_usingDTable(
+-          void* dst,  size_t dstSize,
+-    const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable)
+-{
+-    DTableDesc dtd = HUF_getDTableDesc(DTable);
+-    if (dtd.tableType != 0) return ERROR(GENERIC);
+-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-}
+ 
+-size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+-                                   const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize)
+-{
+-    const BYTE* ip = (const BYTE*) cSrc;
+-
+-    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+-    if (HUF_isError(hSize)) return hSize;
+-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+-    ip += hSize; cSrcSize -= hSize;
+-
+-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+-}
+-
+-
+-size_t HUF_decompress4X1_usingDTable(
+-          void* dst,  size_t dstSize,
+-    const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable)
+-{
+-    DTableDesc dtd = HUF_getDTableDesc(DTable);
+-    if (dtd.tableType != 0) return ERROR(GENERIC);
+-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
++    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
++        size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
++        if (ret != 0)
++            return ret;
++    }
++    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+ 
+-static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
++static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                    const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize, int bmi2)
++                                   void* workSpace, size_t wkspSize, int flags)
+ {
+     const BYTE* ip = (const BYTE*) cSrc;
+ 
+-    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
++    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+     if (HUF_isError(hSize)) return hSize;
+     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+     ip += hSize; cSrcSize -= hSize;
+ 
+-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+-}
+-
+-size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+-                                   const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize)
+-{
+-    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
++    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+ }
+ 
+-
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
+ 
+ 
+@@ -985,7 +1120,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
+ 
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                            const sortedSymbol_t* sortedList,
+-                           const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight,
++                           const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
+                            const U32 nbBitsBaseline)
+ {
+     U32* const rankVal = rankValOrigin[0];
+@@ -1040,14 +1175,7 @@ typedef struct {
+ 
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                        const void* src, size_t srcSize,
+-                             void* workSpace, size_t wkspSize)
+-{
+-    return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+-}
+-
+-size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
+-                       const void* src, size_t srcSize,
+-                             void* workSpace, size_t wkspSize, int bmi2)
++                             void* workSpace, size_t wkspSize, int flags)
+ {
+     U32 tableLog, maxW, nbSymbols;
+     DTableDesc dtd = HUF_getDTableDesc(DTable);
+@@ -1069,7 +1197,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
+     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+ 
+-    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
++    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
+     if (HUF_isError(iSize)) return iSize;
+ 
+     /* check result */
+@@ -1159,15 +1287,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
+ }
+ 
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+-    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
++    do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
+ 
+-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
++#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
++    do {                                                           \
++        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
++            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
++    } while (0)
+ 
+-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+-    if (MEM_64bits()) \
+-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
++#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
++    do {                                                           \
++        if (MEM_64bits())                                          \
++            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
++    } while (0)
+ 
+ HINT_INLINE size_t
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+@@ -1227,7 +1359,7 @@ HUF_decompress1X2_usingDTable_internal_body(
+ 
+     /* decode */
+     {   BYTE* const ostart = (BYTE*) dst;
+-        BYTE* const oend = ostart + dstSize;
++        BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
+         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+         DTableDesc const dtd = HUF_getDTableDesc(DTable);
+@@ -1240,6 +1372,11 @@ HUF_decompress1X2_usingDTable_internal_body(
+     /* decoded size */
+     return dstSize;
+ }
++
++/* HUF_decompress4X2_usingDTable_internal_body():
++ * Conditions:
++ * @dstSize >= 6
++ */
+ FORCE_INLINE_TEMPLATE size_t
+ HUF_decompress4X2_usingDTable_internal_body(
+           void* dst,  size_t dstSize,
+@@ -1247,6 +1384,7 @@ HUF_decompress4X2_usingDTable_internal_body(
+     const HUF_DTable* DTable)
+ {
+     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
++    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+ 
+     {   const BYTE* const istart = (const BYTE*) cSrc;
+         BYTE* const ostart = (BYTE*) dst;
+@@ -1280,8 +1418,9 @@ HUF_decompress4X2_usingDTable_internal_body(
+         DTableDesc const dtd = HUF_getDTableDesc(DTable);
+         U32 const dtLog = dtd.tableLog;
+ 
+-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+-        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
++        if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
++        if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
++        assert(dstSize >= 6 /* validated above */);
+         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+@@ -1366,44 +1505,191 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
+ }
+ #endif
+ 
+-#if HUF_NEED_DEFAULT_FUNCTION
+ static
+ size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+                     size_t cSrcSize, HUF_DTable const* DTable) {
+     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+-#endif
+ 
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
+ 
+-HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
++HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
++
++#endif
++
++static HUF_FAST_BMI2_ATTRS
++void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
++{
++    U64 bits[4];
++    BYTE const* ip[4];
++    BYTE* op[4];
++    BYTE* oend[4];
++    HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
++    BYTE const* const ilowest = args->ilowest;
++
++    /* Copy the arguments to local registers. */
++    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
++    ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
++    ZSTD_memcpy(&op, &args->op, sizeof(op));
++
++    oend[0] = op[1];
++    oend[1] = op[2];
++    oend[2] = op[3];
++    oend[3] = args->oend;
++
++    assert(MEM_isLittleEndian());
++    assert(!MEM_32bits());
++
++    for (;;) {
++        BYTE* olimit;
++        int stream;
++
++        /* Assert loop preconditions */
++#ifndef NDEBUG
++        for (stream = 0; stream < 4; ++stream) {
++            assert(op[stream] <= oend[stream]);
++            assert(ip[stream] >= ilowest);
++        }
++#endif
++        /* Compute olimit */
++        {
++            /* Each loop does 5 table lookups for each of the 4 streams.
++             * Each table lookup consumes up to 11 bits of input, and produces
++             * up to 2 bytes of output.
++             */
++            /* We can consume up to 7 bytes of input per iteration per stream.
++             * We also know that each input pointer is >= ip[0]. So we can run
++             * iters loops before running out of input.
++             */
++            size_t iters = (size_t)(ip[0] - ilowest) / 7;
++            /* Each iteration can produce up to 10 bytes of output per stream.
++             * Each output stream my advance at different rates. So take the
++             * minimum number of safe iterations among all the output streams.
++             */
++            for (stream = 0; stream < 4; ++stream) {
++                size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
++                iters = MIN(iters, oiters);
++            }
++
++            /* Each iteration produces at least 5 output symbols. So until
++             * op[3] crosses olimit, we know we haven't executed iters
++             * iterations yet. This saves us maintaining an iters counter,
++             * at the expense of computing the remaining # of iterations
++             * more frequently.
++             */
++            olimit = op[3] + (iters * 5);
++
++            /* Exit the fast decoding loop once we reach the end. */
++            if (op[3] == olimit)
++                break;
++
++            /* Exit the decoding loop if any input pointer has crossed the
++             * previous one. This indicates corruption, and a precondition
++             * to our loop is that ip[i] >= ip[0].
++             */
++            for (stream = 1; stream < 4; ++stream) {
++                if (ip[stream] < ip[stream - 1])
++                    goto _out;
++            }
++        }
++
++#ifndef NDEBUG
++        for (stream = 1; stream < 4; ++stream) {
++            assert(ip[stream] >= ip[stream - 1]);
++        }
++#endif
+ 
+-static HUF_ASM_X86_64_BMI2_ATTRS size_t
+-HUF_decompress4X2_usingDTable_internal_bmi2_asm(
++#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
++    do {                                                              \
++        if ((_decode3) || (_stream) != 3) {                           \
++            int const index = (int)(bits[(_stream)] >> 53);           \
++            HUF_DEltX2 const entry = dtable[index];                   \
++            MEM_write16(op[(_stream)], entry.sequence); \
++            bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
++            op[(_stream)] += (entry.length);                          \
++        }                                                             \
++    } while (0)
++
++#define HUF_4X2_RELOAD_STREAM(_stream)                                  \
++    do {                                                                \
++        HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
++        {                                                               \
++            int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
++            int const nbBits = ctz & 7;                                 \
++            int const nbBytes = ctz >> 3;                               \
++            ip[(_stream)] -= nbBytes;                                   \
++            bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
++            bits[(_stream)] <<= nbBits;                                 \
++        }                                                               \
++    } while (0)
++
++        /* Manually unroll the loop because compilers don't consistently
++         * unroll the inner loops, which destroys performance.
++         */
++        do {
++            /* Decode 5 symbols from each of the first 3 streams.
++             * The final stream will be decoded during the reload phase
++             * to reduce register pressure.
++             */
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
++            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
++
++            /* Decode one symbol from the final stream */
++            HUF_4X2_DECODE_SYMBOL(3, 1);
++
++            /* Decode 4 symbols from the final stream & reload bitstreams.
++             * The final stream is reloaded last, meaning that all 5 symbols
++             * are decoded from the final stream before it is reloaded.
++             */
++            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
++        } while (op[3] < olimit);
++    }
++
++#undef HUF_4X2_DECODE_SYMBOL
++#undef HUF_4X2_RELOAD_STREAM
++
++_out:
++
++    /* Save the final values of each of the state variables back to args. */
++    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
++    ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
++    ZSTD_memcpy(&args->op, &op, sizeof(op));
++}
++
++
++static HUF_FAST_BMI2_ATTRS size_t
++HUF_decompress4X2_usingDTable_internal_fast(
+           void* dst,  size_t dstSize,
+     const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable) {
++    const HUF_DTable* DTable,
++    HUF_DecompressFastLoopFn loopFn) {
+     void const* dt = DTable + 1;
+-    const BYTE* const iend = (const BYTE*)cSrc + 6;
+-    BYTE* const oend = (BYTE*)dst + dstSize;
+-    HUF_DecompressAsmArgs args;
++    const BYTE* const ilowest = (const BYTE*)cSrc;
++    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
++    HUF_DecompressFastArgs args;
+     {
+-        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
++        size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+         FORWARD_IF_ERROR(ret, "Failed to init asm args");
+-        if (ret != 0)
+-            return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
++        if (ret == 0)
++            return 0;
+     }
+ 
+-    assert(args.ip[0] >= args.ilimit);
+-    HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
++    assert(args.ip[0] >= args.ilowest);
++    loopFn(&args);
+ 
+     /* note : op4 already verified within main loop */
+-    assert(args.ip[0] >= iend);
+-    assert(args.ip[1] >= iend);
+-    assert(args.ip[2] >= iend);
+-    assert(args.ip[3] >= iend);
++    assert(args.ip[0] >= ilowest);
++    assert(args.ip[1] >= ilowest);
++    assert(args.ip[2] >= ilowest);
++    assert(args.ip[3] >= ilowest);
+     assert(args.op[3] <= oend);
+-    (void)iend;
++
++    assert(ilowest == args.ilowest);
++    assert(ilowest + 6 == args.iend[0]);
++    (void)ilowest;
+ 
+     /* finish bitStreams one by one */
+     {
+@@ -1426,91 +1712,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
+     /* decoded size */
+     return dstSize;
+ }
+-#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
+ 
+ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+-                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
++                    size_t cSrcSize, HUF_DTable const* DTable, int flags)
+ {
++    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
++    HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
++
+ #if DYNAMIC_BMI2
+-    if (bmi2) {
++    if (flags & HUF_flags_bmi2) {
++        fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
+ # if ZSTD_ENABLE_ASM_X86_64_BMI2
+-        return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+-# else
+-        return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
++        if (!(flags & HUF_flags_disableAsm)) {
++            loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
++        }
+ # endif
++    } else {
++        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+     }
+-#else
+-    (void)bmi2;
+ #endif
+ 
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+-    return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+-#else
+-    return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
++    if (!(flags & HUF_flags_disableAsm)) {
++        loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
++    }
+ #endif
++
++    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
++        size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
++        if (ret != 0)
++            return ret;
++    }
++    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+ }
+ 
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+ 
+-size_t HUF_decompress1X2_usingDTable(
+-          void* dst,  size_t dstSize,
+-    const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable)
+-{
+-    DTableDesc dtd = HUF_getDTableDesc(DTable);
+-    if (dtd.tableType != 1) return ERROR(GENERIC);
+-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-}
+-
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                    const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize)
++                                   void* workSpace, size_t wkspSize, int flags)
+ {
+     const BYTE* ip = (const BYTE*) cSrc;
+ 
+     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+-                                               workSpace, wkspSize);
++                                               workSpace, wkspSize, flags);
+     if (HUF_isError(hSize)) return hSize;
+     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+     ip += hSize; cSrcSize -= hSize;
+ 
+-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
++    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
+ }
+ 
+-
+-size_t HUF_decompress4X2_usingDTable(
+-          void* dst,  size_t dstSize,
+-    const void* cSrc, size_t cSrcSize,
+-    const HUF_DTable* DTable)
+-{
+-    DTableDesc dtd = HUF_getDTableDesc(DTable);
+-    if (dtd.tableType != 1) return ERROR(GENERIC);
+-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-}
+-
+-static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
++static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                    const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize, int bmi2)
++                                   void* workSpace, size_t wkspSize, int flags)
+ {
+     const BYTE* ip = (const BYTE*) cSrc;
+ 
+     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+-                                         workSpace, wkspSize);
++                                         workSpace, wkspSize, flags);
+     if (HUF_isError(hSize)) return hSize;
+     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+     ip += hSize; cSrcSize -= hSize;
+ 
+-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
++    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+ }
+ 
+-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+-                                   const void* cSrc, size_t cSrcSize,
+-                                   void* workSpace, size_t wkspSize)
+-{
+-    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+-}
+-
+-
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
+ 
+ 
+@@ -1518,44 +1785,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ /* Universal decompression selectors */
+ /* ***********************************/
+ 
+-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+-                                    const void* cSrc, size_t cSrcSize,
+-                                    const HUF_DTable* DTable)
+-{
+-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+-#if defined(HUF_FORCE_DECOMPRESS_X1)
+-    (void)dtd;
+-    assert(dtd.tableType == 0);
+-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#elif defined(HUF_FORCE_DECOMPRESS_X2)
+-    (void)dtd;
+-    assert(dtd.tableType == 1);
+-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#else
+-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#endif
+-}
+-
+-size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+-                                    const void* cSrc, size_t cSrcSize,
+-                                    const HUF_DTable* DTable)
+-{
+-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+-#if defined(HUF_FORCE_DECOMPRESS_X1)
+-    (void)dtd;
+-    assert(dtd.tableType == 0);
+-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#elif defined(HUF_FORCE_DECOMPRESS_X2)
+-    (void)dtd;
+-    assert(dtd.tableType == 1);
+-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#else
+-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+-#endif
+-}
+-
+ 
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+@@ -1610,36 +1839,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+ #endif
+ }
+ 
+-
+-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+-                                     size_t dstSize, const void* cSrc,
+-                                     size_t cSrcSize, void* workSpace,
+-                                     size_t wkspSize)
+-{
+-    /* validation checks */
+-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+-    if (cSrcSize == 0) return ERROR(corruption_detected);
+-
+-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+-#if defined(HUF_FORCE_DECOMPRESS_X1)
+-        (void)algoNb;
+-        assert(algoNb == 0);
+-        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+-#elif defined(HUF_FORCE_DECOMPRESS_X2)
+-        (void)algoNb;
+-        assert(algoNb == 1);
+-        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+-#else
+-        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+-                            cSrcSize, workSpace, wkspSize):
+-                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+-#endif
+-    }
+-}
+-
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+-                                  void* workSpace, size_t wkspSize)
++                                  void* workSpace, size_t wkspSize, int flags)
+ {
+     /* validation checks */
+     if (dstSize == 0) return ERROR(dstSize_tooSmall);
+@@ -1652,71 +1854,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+         (void)algoNb;
+         assert(algoNb == 0);
+         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+-                                cSrcSize, workSpace, wkspSize);
++                                cSrcSize, workSpace, wkspSize, flags);
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
+         (void)algoNb;
+         assert(algoNb == 1);
+         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+-                                cSrcSize, workSpace, wkspSize);
++                                cSrcSize, workSpace, wkspSize, flags);
+ #else
+         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+-                                cSrcSize, workSpace, wkspSize):
++                                cSrcSize, workSpace, wkspSize, flags):
+                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+-                                cSrcSize, workSpace, wkspSize);
++                                cSrcSize, workSpace, wkspSize, flags);
+ #endif
+     }
+ }
+ 
+ 
+-size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
++size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
+ {
+     DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
+     (void)dtd;
+     assert(dtd.tableType == 0);
+-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
+     (void)dtd;
+     assert(dtd.tableType == 1);
+-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #else
+-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
++                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #endif
+ }
+ 
+ #ifndef HUF_FORCE_DECOMPRESS_X2
+-size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
++size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
+ {
+     const BYTE* ip = (const BYTE*) cSrc;
+ 
+-    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
++    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+     if (HUF_isError(hSize)) return hSize;
+     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+     ip += hSize; cSrcSize -= hSize;
+ 
+-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
++    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+ }
+ #endif
+ 
+-size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
++size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
+ {
+     DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
+     (void)dtd;
+     assert(dtd.tableType == 0);
+-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
+     (void)dtd;
+     assert(dtd.tableType == 1);
+-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #else
+-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
++    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
++                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+ #endif
+ }
+ 
+-size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
++size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
+ {
+     /* validation checks */
+     if (dstSize == 0) return ERROR(dstSize_tooSmall);
+@@ -1726,15 +1928,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
+         (void)algoNb;
+         assert(algoNb == 0);
+-        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
++        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
+         (void)algoNb;
+         assert(algoNb == 1);
+-        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
++        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
+ #else
+-        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+-                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
++        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
++                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
+ #endif
+     }
+ }
+-
+diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
+index dbbc7919de53..30ef65e1ab5c 100644
+--- a/lib/zstd/decompress/zstd_ddict.c
++++ b/lib/zstd/decompress/zstd_ddict.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -14,12 +15,12 @@
+ /*-*******************************************************
+ *  Dependencies
+ *********************************************************/
++#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
+ #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+ #include "../common/cpu.h"         /* bmi2 */
+ #include "../common/mem.h"         /* low level memory routines */
+ #define FSE_STATIC_LINKING_ONLY
+ #include "../common/fse.h"
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include "zstd_decompress_internal.h"
+ #include "zstd_ddict.h"
+@@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+         ZSTD_memcpy(internalBuffer, dict, dictSize);
+     }
+     ddict->dictSize = dictSize;
+-    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
++    ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
+ 
+     /* parse dictionary content */
+     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+@@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+ {
+     if (ddict==NULL) return 0;
+-    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
++    return ddict->dictID;
+ }
+diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
+index 8c1a79d666f8..de459a0dacd1 100644
+--- a/lib/zstd/decompress/zstd_ddict.h
++++ b/lib/zstd/decompress/zstd_ddict.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
+index 6b3177c94711..c9cbc45f6ed9 100644
+--- a/lib/zstd/decompress/zstd_decompress.c
++++ b/lib/zstd/decompress/zstd_decompress.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -53,13 +54,15 @@
+ *  Dependencies
+ *********************************************************/
+ #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
++#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
++#include "../common/error_private.h"
++#include "../common/zstd_internal.h"  /* blockProperties_t */
+ #include "../common/mem.h"         /* low level memory routines */
++#include "../common/bits.h"  /* ZSTD_highbit32 */
+ #define FSE_STATIC_LINKING_ONLY
+ #include "../common/fse.h"
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
+-#include "../common/zstd_internal.h"  /* blockProperties_t */
+ #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+ #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+ #include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
+@@ -72,11 +75,11 @@
+  *************************************/
+ 
+ #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+-#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+-                                                     * Currently, that means a 0.75 load factor.
+-                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+-                                                     * the load factor of the ddict hash set.
+-                                                     */
++#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3  /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
++                                                    * Currently, that means a 0.75 load factor.
++                                                    * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
++                                                    * the load factor of the ddict hash set.
++                                                    */
+ 
+ #define DDICT_HASHSET_TABLE_BASE_SIZE 64
+ #define DDICT_HASHSET_RESIZE_FACTOR 2
+@@ -237,6 +240,8 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+     dctx->outBufferMode = ZSTD_bm_buffered;
+     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+     dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
++    dctx->disableHufAsm = 0;
++    dctx->maxBlockSizeParam = 0;
+ }
+ 
+ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+@@ -253,6 +258,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+     dctx->streamStage = zdss_init;
+     dctx->noForwardProgress = 0;
+     dctx->oversizedDuration = 0;
++    dctx->isFrameDecompression = 1;
+ #if DYNAMIC_BMI2
+     dctx->bmi2 = ZSTD_cpuSupportsBmi2();
+ #endif
+@@ -421,16 +427,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+  * @return : 0, `zfhPtr` is correctly filled,
+  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+- *           or an error code, which can be tested using ZSTD_isError() */
++**           or an error code, which can be tested using ZSTD_isError() */
+ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+ {
+     const BYTE* ip = (const BYTE*)src;
+     size_t const minInputSize = ZSTD_startingInputLength(format);
+ 
+-    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+-    if (srcSize < minInputSize) return minInputSize;
+-    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
++    DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
++
++    if (srcSize > 0) {
++        /* note : technically could be considered an assert(), since it's an invalid entry */
++        RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
++    }
++    if (srcSize < minInputSize) {
++        if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
++            /* when receiving less than @minInputSize bytes,
++             * control these bytes at least correspond to a supported magic number
++             * in order to error out early if they don't.
++            **/
++            size_t const toCopy = MIN(4, srcSize);
++            unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
++            assert(src != NULL);
++            ZSTD_memcpy(hbuf, src, toCopy);
++            if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
++                /* not a zstd frame : let's check if it's a skippable frame */
++                MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
++                ZSTD_memcpy(hbuf, src, toCopy);
++                if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
++                    RETURN_ERROR(prefix_unknown,
++                                "first bytes don't correspond to any supported magic number");
++        }   }   }
++        return minInputSize;
++    }
+ 
++    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
+     if ( (format != ZSTD_f_zstd1_magicless)
+       && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+         if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+@@ -540,61 +570,62 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                     frameParameter_unsupported, "");
+-    {
+-        size_t const skippableSize = skippableHeaderSize + sizeU32;
++    {   size_t const skippableSize = skippableHeaderSize + sizeU32;
+         RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+         return skippableSize;
+     }
+ }
+ 
+ /*! ZSTD_readSkippableFrame() :
+- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
++ * Retrieves content of a skippable frame, and writes it to dst buffer.
+  *
+  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
+  * in the magicVariant.
+  *
+- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
++ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
+  *
+  * @return : number of bytes written or a ZSTD error.
+  */
+-ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
+-                                            const void* src, size_t srcSize)
++size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
++                               unsigned* magicVariant,  /* optional, can be NULL */
++                         const void* src, size_t srcSize)
+ {
+-    U32 const magicNumber = MEM_readLE32(src);
+-    size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
+-    size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
+-
+-    /* check input validity */
+-    RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
+-    RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
+-    RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
++    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+ 
+-    /* deliver payload */
+-    if (skippableContentSize > 0  && dst != NULL)
+-        ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
+-    if (magicVariant != NULL)
+-        *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
+-    return skippableContentSize;
++    {   U32 const magicNumber = MEM_readLE32(src);
++        size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
++        size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
++
++        /* check input validity */
++        RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
++        RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
++        RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
++
++        /* deliver payload */
++        if (skippableContentSize > 0  && dst != NULL)
++            ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
++        if (magicVariant != NULL)
++            *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
++        return skippableContentSize;
++    }
+ }
+ 
+ /* ZSTD_findDecompressedSize() :
+- *  compatible with legacy mode
+  *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+  *      skippable frames
+- *  @return : decompressed size of the frames contained */
++ *  note: compatible with legacy mode
++ * @return : decompressed size of the frames contained */
+ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+ {
+-    unsigned long long totalDstSize = 0;
++    U64 totalDstSize = 0;
+ 
+     while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+         U32 const magicNumber = MEM_readLE32(src);
+ 
+         if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+             size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+-            if (ZSTD_isError(skippableSize)) {
+-                return ZSTD_CONTENTSIZE_ERROR;
+-            }
++            if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
+             assert(skippableSize <= srcSize);
+ 
+             src = (const BYTE *)src + skippableSize;
+@@ -602,17 +633,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+             continue;
+         }
+ 
+-        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+-            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
++        {   unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
++            if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
+ 
+-            /* check for overflow */
+-            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+-            totalDstSize += ret;
++            if (U64_MAX - totalDstSize < fcs)
++                return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
++            totalDstSize += fcs;
+         }
++        /* skip to next frame */
+         {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+-            if (ZSTD_isError(frameSrcSize)) {
+-                return ZSTD_CONTENTSIZE_ERROR;
+-            }
++            if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
++            assert(frameSrcSize <= srcSize);
+ 
+             src = (const BYTE *)src + frameSrcSize;
+             srcSize -= frameSrcSize;
+@@ -676,13 +707,13 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+     return frameSizeInfo;
+ }
+ 
+-static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
++static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
+ {
+     ZSTD_frameSizeInfo frameSizeInfo;
+     ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+ 
+ 
+-    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
++    if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+         assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+@@ -696,7 +727,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
+         ZSTD_frameHeader zfh;
+ 
+         /* Extract Frame Header */
+-        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
++        {   size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
+             if (ZSTD_isError(ret))
+                 return ZSTD_errorFrameSizeInfo(ret);
+             if (ret > 0)
+@@ -730,23 +761,26 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
+             ip += 4;
+         }
+ 
++        frameSizeInfo.nbBlocks = nbBlocks;
+         frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                         ? zfh.frameContentSize
+-                                        : nbBlocks * zfh.blockSizeMax;
++                                        : (unsigned long long)nbBlocks * zfh.blockSizeMax;
+         return frameSizeInfo;
+     }
+ }
+ 
++static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
++    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
++    return frameSizeInfo.compressedSize;
++}
++
+ /* ZSTD_findFrameCompressedSize() :
+- *  compatible with legacy mode
+- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+- *  `srcSize` must be at least as large as the frame contained
+- *  @return : the compressed size of the frame starting at `src` */
++ * See docs in zstd.h
++ * Note: compatible with legacy mode */
+ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+ {
+-    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+-    return frameSizeInfo.compressedSize;
++    return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
+ }
+ 
+ /* ZSTD_decompressBound() :
+@@ -760,7 +794,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+     unsigned long long bound = 0;
+     /* Iterate over each frame */
+     while (srcSize > 0) {
+-        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
++        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
+         size_t const compressedSize = frameSizeInfo.compressedSize;
+         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+@@ -773,6 +807,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+     return bound;
+ }
+ 
++size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
++{
++    size_t margin = 0;
++    unsigned maxBlockSize = 0;
++
++    /* Iterate over each frame */
++    while (srcSize > 0) {
++        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
++        size_t const compressedSize = frameSizeInfo.compressedSize;
++        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
++        ZSTD_frameHeader zfh;
++
++        FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
++        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
++            return ERROR(corruption_detected);
++
++        if (zfh.frameType == ZSTD_frame) {
++            /* Add the frame header to our margin */
++            margin += zfh.headerSize;
++            /* Add the checksum to our margin */
++            margin += zfh.checksumFlag ? 4 : 0;
++            /* Add 3 bytes per block */
++            margin += 3 * frameSizeInfo.nbBlocks;
++
++            /* Compute the max block size */
++            maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
++        } else {
++            assert(zfh.frameType == ZSTD_skippableFrame);
++            /* Add the entire skippable frame size to our margin. */
++            margin += compressedSize;
++        }
++
++        assert(srcSize >= compressedSize);
++        src = (const BYTE*)src + compressedSize;
++        srcSize -= compressedSize;
++    }
++
++    /* Add the max block size back to the margin. */
++    margin += maxBlockSize;
++
++    return margin;
++}
+ 
+ /*-*************************************************************
+  *   Frame decoding
+@@ -856,6 +932,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+     }
+ 
++    /* Shrink the blockSizeMax if enabled */
++    if (dctx->maxBlockSizeParam != 0)
++        dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
++
+     /* Loop on each block */
+     while (1) {
+         BYTE* oBlockEnd = oend;
+@@ -888,7 +968,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+         switch(blockProperties.blockType)
+         {
+         case bt_compressed:
+-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
++            assert(dctx->isFrameDecompression == 1);
++            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
+             break;
+         case bt_raw :
+             /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
+@@ -901,12 +982,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+         default:
+             RETURN_ERROR(corruption_detected, "invalid block type");
+         }
+-
+-        if (ZSTD_isError(decodedSize)) return decodedSize;
+-        if (dctx->validateChecksum)
++        FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
++        DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
++        if (dctx->validateChecksum) {
+             xxh64_update(&dctx->xxhState, op, decodedSize);
+-        if (decodedSize != 0)
++        }
++        if (decodedSize) /* support dst = NULL,0 */ {
+             op += decodedSize;
++        }
+         assert(ip != NULL);
+         ip += cBlockSize;
+         remainingSrcSize -= cBlockSize;
+@@ -930,12 +1013,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+     }
+     ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+     /* Allow caller to get size read */
++    DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
+     *srcPtr = ip;
+     *srcSizePtr = remainingSrcSize;
+     return (size_t)(op-ostart);
+ }
+ 
+-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
++static
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
++size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                         void* dst, size_t dstCapacity,
+                                   const void* src, size_t srcSize,
+                                   const void* dict, size_t dictSize,
+@@ -955,17 +1041,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+     while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+ 
+ 
+-        {   U32 const magicNumber = MEM_readLE32(src);
+-            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+-                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
++        if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
++            U32 const magicNumber = MEM_readLE32(src);
++            DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
+             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
++                /* skippable frame detected : skip it */
+                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+-                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
++                FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
+                 assert(skippableSize <= srcSize);
+ 
+                 src = (const BYTE *)src + skippableSize;
+                 srcSize -= skippableSize;
+-                continue;
++                continue; /* check next frame */
+         }   }
+ 
+         if (ddict) {
+@@ -1061,8 +1148,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
+ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+ 
+ /*
+- * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed,
+- * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
++ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
++ * allow taking a partial block as the input. Currently only raw uncompressed blocks can
+  * be streamed.
+  *
+  * For blocks that can be streamed, this allows us to reduce the latency until we produce
+@@ -1181,7 +1268,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
+             {
+             case bt_compressed:
+                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+-                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
++                assert(dctx->isFrameDecompression == 1);
++                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
+                 dctx->expected = 0;  /* Streaming not supported */
+                 break;
+             case bt_raw :
+@@ -1250,6 +1338,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
+     case ZSTDds_decodeSkippableHeader:
+         assert(src != NULL);
+         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
++        assert(dctx->format != ZSTD_f_zstd1_magicless);
+         ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+         dctx->stage = ZSTDds_skipFrame;
+@@ -1262,7 +1351,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
+ 
+     default:
+         assert(0);   /* impossible */
+-        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
++        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
+     }
+ }
+ 
+@@ -1303,11 +1392,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+         /* in minimal huffman, we always use X1 variants */
+         size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                 dictPtr, dictEnd - dictPtr,
+-                                                workspace, workspaceSize);
++                                                workspace, workspaceSize, /* flags */ 0);
+ #else
+         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                 dictPtr, (size_t)(dictEnd - dictPtr),
+-                                                workspace, workspaceSize);
++                                                workspace, workspaceSize, /* flags */ 0);
+ #endif
+         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+         dictPtr += hSize;
+@@ -1403,10 +1492,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+     dctx->prefixStart = NULL;
+     dctx->virtualStart = NULL;
+     dctx->dictEnd = NULL;
+-    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
++    dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
+     dctx->litEntropy = dctx->fseEntropy = 0;
+     dctx->dictID = 0;
+     dctx->bType = bt_reserved;
++    dctx->isFrameDecompression = 1;
+     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+     ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+     dctx->LLTptr = dctx->entropy.LLTable;
+@@ -1465,7 +1555,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+  *  This could for one of the following reasons :
+  *  - The frame does not require a dictionary (most common case).
+  *  - The frame was built with dictID intentionally removed.
+- *    Needed dictionary is a hidden information.
++ *    Needed dictionary is a hidden piece of information.
+  *    Note : this use case also happens when using a non-conformant dictionary.
+  *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+  *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+@@ -1474,7 +1564,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+ {
+-    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
++    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
+     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+     if (ZSTD_isError(hError)) return 0;
+     return zfp.dictID;
+@@ -1581,7 +1671,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
+ size_t ZSTD_initDStream(ZSTD_DStream* zds)
+ {
+     DEBUGLOG(4, "ZSTD_initDStream");
+-    return ZSTD_initDStream_usingDDict(zds, NULL);
++    FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
++    FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
++    return ZSTD_startingInputLength(zds->format);
+ }
+ 
+ /* ZSTD_initDStream_usingDDict() :
+@@ -1589,6 +1681,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
+  * this function cannot fail */
+ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+ {
++    DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
+     FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+     FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+     return ZSTD_startingInputLength(dctx->format);
+@@ -1599,6 +1692,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+  * this function cannot fail */
+ size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+ {
++    DEBUGLOG(4, "ZSTD_resetDStream");
+     FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+     return ZSTD_startingInputLength(dctx->format);
+ }
+@@ -1670,6 +1764,15 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+             bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+             bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+             return bounds;
++        case ZSTD_d_disableHuffmanAssembly:
++            bounds.lowerBound = 0;
++            bounds.upperBound = 1;
++            return bounds;
++        case ZSTD_d_maxBlockSize:
++            bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
++            bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
++            return bounds;
++
+         default:;
+     }
+     bounds.error = ERROR(parameter_unsupported);
+@@ -1710,6 +1813,12 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
+         case ZSTD_d_refMultipleDDicts:
+             *value = (int)dctx->refMultipleDDicts;
+             return 0;
++        case ZSTD_d_disableHuffmanAssembly:
++            *value = (int)dctx->disableHufAsm;
++            return 0;
++        case ZSTD_d_maxBlockSize:
++            *value = dctx->maxBlockSizeParam;
++            return 0;
+         default:;
+     }
+     RETURN_ERROR(parameter_unsupported, "");
+@@ -1743,6 +1852,14 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
+             }
+             dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+             return 0;
++        case ZSTD_d_disableHuffmanAssembly:
++            CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
++            dctx->disableHufAsm = value != 0;
++            return 0;
++        case ZSTD_d_maxBlockSize:
++            if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
++            dctx->maxBlockSizeParam = value;
++            return 0;
+         default:;
+     }
+     RETURN_ERROR(parameter_unsupported, "");
+@@ -1754,6 +1871,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+       || (reset == ZSTD_reset_session_and_parameters) ) {
+         dctx->streamStage = zdss_init;
+         dctx->noForwardProgress = 0;
++        dctx->isFrameDecompression = 1;
+     }
+     if ( (reset == ZSTD_reset_parameters)
+       || (reset == ZSTD_reset_session_and_parameters) ) {
+@@ -1770,11 +1888,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+     return ZSTD_sizeof_DCtx(dctx);
+ }
+ 
+-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
++static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
+ {
+-    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+-    /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
+-    unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
++    size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
++    /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
++     * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
++     * the block at the beginning of the output buffer, and maintain a full window.
++     *
++     * We need another blockSize worth of buffer so that we can store split
++     * literals at the end of the block without overwriting the extDict window.
++     */
++    unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
+     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+     size_t const minRBSize = (size_t) neededSize;
+     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+@@ -1782,6 +1906,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
+     return minRBSize;
+ }
+ 
++size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
++{
++    return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
++}
++
+ size_t ZSTD_estimateDStreamSize(size_t windowSize)
+ {
+     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+@@ -1918,7 +2047,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                 if (zds->refMultipleDDicts && zds->ddictSet) {
+                     ZSTD_DCtx_selectFrameDDict(zds);
+                 }
+-                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                 if (ZSTD_isError(hSize)) {
+                     return hSize;   /* error */
+                 }
+@@ -1932,6 +2060,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                             zds->lhSize += remainingInput;
+                         }
+                         input->pos = input->size;
++                        /* check first few bytes */
++                        FORWARD_IF_ERROR(
++                            ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
++                            "First few bytes detected incorrect" );
++                        /* return hint input size */
+                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                     }
+                     assert(ip != NULL);
+@@ -1943,14 +2076,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && zds->fParams.frameType != ZSTD_skippableFrame
+                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
++                size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
+                 if (cSize <= (size_t)(iend-istart)) {
+                     /* shortcut : using single-pass mode */
+                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
+-                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
++                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
++                    assert(istart != NULL);
+                     ip = istart + cSize;
+-                    op += decompressedSize;
++                    op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
+                     zds->expected = 0;
+                     zds->streamStage = zdss_init;
+                     someMoreWork = 0;
+@@ -1969,7 +2103,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+             DEBUGLOG(4, "Consume header");
+             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+ 
+-            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
++            if (zds->format == ZSTD_f_zstd1
++                && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                 zds->stage = ZSTDds_skipFrame;
+             } else {
+@@ -1985,11 +2120,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                             frameParameter_windowTooLarge, "");
++            if (zds->maxBlockSizeParam != 0)
++                zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
+ 
+             /* Adapt buffer sizes to frame header instructions */
+             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+-                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
++                        ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
+                         : 0;
+ 
+                 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+@@ -2034,6 +2171,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                 }
+                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                     FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
++                    assert(ip != NULL);
+                     ip += neededInSize;
+                     /* Function modifies the stage so we must break */
+                     break;
+@@ -2048,7 +2186,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                 int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                 size_t loadedSize;
+                 /* At this point we shouldn't be decompressing a block that we can stream. */
+-                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
++                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
+                 if (isSkipFrame) {
+                     loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                 } else {
+@@ -2057,8 +2195,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                                     "should never happen");
+                     loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+                 }
+-                ip += loadedSize;
+-                zds->inPos += loadedSize;
++                if (loadedSize != 0) {
++                    /* ip may be NULL */
++                    ip += loadedSize;
++                    zds->inPos += loadedSize;
++                }
+                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+ 
+                 /* decode loaded input */
+@@ -2068,14 +2209,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+                 break;
+             }
+         case zdss_flush:
+-            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
++            {
++                size_t const toFlushSize = zds->outEnd - zds->outStart;
+                 size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+-                op += flushedSize;
++
++                op = op ? op + flushedSize : op;
++
+                 zds->outStart += flushedSize;
+                 if (flushedSize == toFlushSize) {  /* flush completed */
+                     zds->streamStage = zdss_read;
+                     if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+-                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
++                        && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                         DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                 (int)(zds->outBuffSize - zds->outStart),
+                                 (U32)zds->fParams.blockSizeMax);
+@@ -2089,7 +2233,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+ 
+         default:
+             assert(0);    /* impossible */
+-            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
++            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
+     }   }
+ 
+     /* result */
+@@ -2102,8 +2246,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
+     if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+         zds->noForwardProgress ++;
+         if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+-            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+-            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
++            RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
++            RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
+             assert(0);
+         }
+     } else {
+@@ -2140,11 +2284,17 @@ size_t ZSTD_decompressStream_simpleArgs (
+                             void* dst, size_t dstCapacity, size_t* dstPos,
+                       const void* src, size_t srcSize, size_t* srcPos)
+ {
+-    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+-    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+-    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+-    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+-    *dstPos = output.pos;
+-    *srcPos = input.pos;
+-    return cErr;
++    ZSTD_outBuffer output;
++    ZSTD_inBuffer  input;
++    output.dst = dst;
++    output.size = dstCapacity;
++    output.pos = *dstPos;
++    input.src = src;
++    input.size = srcSize;
++    input.pos = *srcPos;
++    {   size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
++        *dstPos = output.pos;
++        *srcPos = input.pos;
++        return cErr;
++    }
+ }
+diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
+index c1913b8e7c89..9fe9a12c8a2c 100644
+--- a/lib/zstd/decompress/zstd_decompress_block.c
++++ b/lib/zstd/decompress/zstd_decompress_block.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -20,12 +21,12 @@
+ #include "../common/mem.h"         /* low level memory routines */
+ #define FSE_STATIC_LINKING_ONLY
+ #include "../common/fse.h"
+-#define HUF_STATIC_LINKING_ONLY
+ #include "../common/huf.h"
+ #include "../common/zstd_internal.h"
+ #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+ #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+ #include "zstd_decompress_block.h"
++#include "../common/bits.h"  /* ZSTD_highbit32 */
+ 
+ /*_*******************************************************
+ *  Macros
+@@ -51,6 +52,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+  *   Block decoding
+  ***************************************************************/
+ 
++static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
++{
++    size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
++    assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
++    return blockSizeMax;
++}
++
+ /*! ZSTD_getcBlockSize() :
+  *  Provides the size of compressed block from block header `src` */
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+@@ -73,41 +81,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
+     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
+ {
+-    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
+-    {
+-        /* room for litbuffer to fit without read faulting */
+-        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
++    size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
++    assert(litSize <= blockSizeMax);
++    assert(dctx->isFrameDecompression || streaming == not_streaming);
++    assert(expectedWriteSize <= blockSizeMax);
++    if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
++        /* If we aren't streaming, we can just put the literals after the output
++         * of the current block. We don't need to worry about overwriting the
++         * extDict of our window, because it doesn't exist.
++         * So if we have space after the end of the block, just put it there.
++         */
++        dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
+         dctx->litBufferEnd = dctx->litBuffer + litSize;
+         dctx->litBufferLocation = ZSTD_in_dst;
+-    }
+-    else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
+-    {
+-        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
++    } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
++        /* Literals fit entirely within the extra buffer, put them there to avoid
++         * having to split the literals.
++         */
++        dctx->litBuffer = dctx->litExtraBuffer;
++        dctx->litBufferEnd = dctx->litBuffer + litSize;
++        dctx->litBufferLocation = ZSTD_not_in_dst;
++    } else {
++        assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
++        /* Literals must be split between the output block and the extra lit
++         * buffer. We fill the extra lit buffer with the tail of the literals,
++         * and put the rest of the literals at the end of the block, with
++         * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
++         * This MUST not write more than our maxBlockSize beyond dst, because in
++         * streaming mode, that could overwrite part of our extDict window.
++         */
+         if (splitImmediately) {
+             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
+-        }
+-        else {
+-            /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
++        } else {
++            /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
+             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
+             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
+         }
+         dctx->litBufferLocation = ZSTD_split;
+-    }
+-    else
+-    {
+-        /* fits entirely within litExtraBuffer, so no split is necessary */
+-        dctx->litBuffer = dctx->litExtraBuffer;
+-        dctx->litBufferEnd = dctx->litBuffer + litSize;
+-        dctx->litBufferLocation = ZSTD_not_in_dst;
++        assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
+     }
+ }
+ 
+-/* Hidden declaration for fullbench */
+-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+-                          const void* src, size_t srcSize,
+-                          void* dst, size_t dstCapacity, const streaming_operation streaming);
+ /*! ZSTD_decodeLiteralsBlock() :
+  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
+  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
+@@ -116,7 +132,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+  *
+  * @return : nb of bytes read from src (< srcSize )
+  *  note : symbol not declared but exposed for fullbench */
+-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
++static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
+                           void* dst, size_t dstCapacity, const streaming_operation streaming)
+ {
+@@ -125,6 +141,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+ 
+     {   const BYTE* const istart = (const BYTE*) src;
+         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
++        size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
+ 
+         switch(litEncType)
+         {
+@@ -134,13 +151,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+             ZSTD_FALLTHROUGH;
+ 
+         case set_compressed:
+-            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
++            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
+             {   size_t lhSize, litSize, litCSize;
+                 U32 singleStream=0;
+                 U32 const lhlCode = (istart[0] >> 2) & 3;
+                 U32 const lhc = MEM_readLE32(istart);
+                 size_t hufSuccess;
+-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
++                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
++                int const flags = 0
++                    | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
++                    | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
+                 switch(lhlCode)
+                 {
+                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+@@ -164,7 +184,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                     break;
+                 }
+                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
++                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
++                if (!singleStream)
++                    RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
++                        "Not enough literals (%zu) for the 4-streams mode (min %u)",
++                        litSize, MIN_LITERALS_FOR_4_STREAMS);
+                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+                 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
+                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
+@@ -176,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+ 
+                 if (litEncType==set_repeat) {
+                     if (singleStream) {
+-                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
++                        hufSuccess = HUF_decompress1X_usingDTable(
+                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
+-                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
++                            dctx->HUFptr, flags);
+                     } else {
+-                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
++                        assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
++                        hufSuccess = HUF_decompress4X_usingDTable(
+                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
+-                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
++                            dctx->HUFptr, flags);
+                     }
+                 } else {
+                     if (singleStream) {
+@@ -190,26 +215,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                         hufSuccess = HUF_decompress1X_DCtx_wksp(
+                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                             istart+lhSize, litCSize, dctx->workspace,
+-                            sizeof(dctx->workspace));
++                            sizeof(dctx->workspace), flags);
+ #else
+-                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
++                        hufSuccess = HUF_decompress1X1_DCtx_wksp(
+                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                             istart+lhSize, litCSize, dctx->workspace,
+-                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
++                            sizeof(dctx->workspace), flags);
+ #endif
+                     } else {
+-                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
++                        hufSuccess = HUF_decompress4X_hufOnly_wksp(
+                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                             istart+lhSize, litCSize, dctx->workspace,
+-                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
++                            sizeof(dctx->workspace), flags);
+                     }
+                 }
+                 if (dctx->litBufferLocation == ZSTD_split)
+                 {
++                    assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
+                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
+                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
+                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
++                    assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
+                 }
+ 
+                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+@@ -224,7 +251,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+         case set_basic:
+             {   size_t litSize, lhSize;
+                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
+-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
++                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
+                 switch(lhlCode)
+                 {
+                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+@@ -237,11 +264,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                     break;
+                 case 3:
+                     lhSize = 3;
++                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
+                     litSize = MEM_readLE24(istart) >> 4;
+                     break;
+                 }
+ 
+                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
++                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
+                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
+                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+@@ -270,7 +299,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+         case set_rle:
+             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                 size_t litSize, lhSize;
+-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
++                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
+                 switch(lhlCode)
+                 {
+                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+@@ -279,16 +308,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                     break;
+                 case 1:
+                     lhSize = 2;
++                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
+                     litSize = MEM_readLE16(istart) >> 4;
+                     break;
+                 case 3:
+                     lhSize = 3;
++                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
+                     litSize = MEM_readLE24(istart) >> 4;
+-                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                     break;
+                 }
+                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
++                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
+                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
+                 if (dctx->litBufferLocation == ZSTD_split)
+@@ -310,6 +340,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+     }
+ }
+ 
++/* Hidden declaration for fullbench */
++size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
++                          const void* src, size_t srcSize,
++                          void* dst, size_t dstCapacity);
++size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
++                          const void* src, size_t srcSize,
++                          void* dst, size_t dstCapacity)
++{
++    dctx->isFrameDecompression = 0;
++    return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
++}
++
+ /* Default FSE distribution tables.
+  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+@@ -506,14 +548,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+                 for (i = 8; i < n; i += 8) {
+                     MEM_write64(spread + pos + i, sv);
+                 }
+-                pos += n;
++                assert(n>=0);
++                pos += (size_t)n;
+             }
+         }
+         /* Now we spread those positions across the table.
+-         * The benefit of doing it in two stages is that we avoid the the
++         * The benefit of doing it in two stages is that we avoid the
+          * variable size inner loop, which caused lots of branch misses.
+          * Now we can run through all the positions without any branch misses.
+-         * We unroll the loop twice, since that is what emperically worked best.
++         * We unroll the loop twice, since that is what empirically worked best.
+          */
+         {
+             size_t position = 0;
+@@ -540,7 +583,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+             for (i=0; i<n; i++) {
+                 tableDecode[position].baseValue = s;
+                 position = (position + step) & tableMask;
+-                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
++                while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask;   /* lowprob area */
+         }   }
+         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+     }
+@@ -551,7 +594,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+         for (u=0; u<tableSize; u++) {
+             U32 const symbol = tableDecode[u].baseValue;
+             U32 const nextState = symbolNext[symbol]++;
+-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
++            tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
+             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+             assert(nbAdditionalBits[symbol] < 255);
+             tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
+@@ -664,11 +707,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+ 
+     /* SeqHead */
+     nbSeq = *ip++;
+-    if (!nbSeq) {
+-        *nbSeqPtr=0;
+-        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+-        return 1;
+-    }
+     if (nbSeq > 0x7F) {
+         if (nbSeq == 0xFF) {
+             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+@@ -681,8 +719,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+     }
+     *nbSeqPtr = nbSeq;
+ 
++    if (nbSeq == 0) {
++        /* No sequence : section ends immediately */
++        RETURN_ERROR_IF(ip != iend, corruption_detected,
++            "extraneous data present in the Sequences section");
++        return (size_t)(ip - istart);
++    }
++
+     /* FSE table descriptors */
+     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
++    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
+     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+@@ -829,7 +875,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt
+ /* ZSTD_safecopyDstBeforeSrc():
+  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
+  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
+-static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
++static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
+     ptrdiff_t const diff = op - ip;
+     BYTE* const oend = op + length;
+ 
+@@ -858,6 +904,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length
+  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+  */
+ FORCE_NOINLINE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_execSequenceEnd(BYTE* op,
+     BYTE* const oend, seq_t sequence,
+     const BYTE** litPtr, const BYTE* const litLimit,
+@@ -905,6 +952,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
+  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
+  */
+ FORCE_NOINLINE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
+     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+     const BYTE** litPtr, const BYTE* const litLimit,
+@@ -950,6 +998,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
+ }
+ 
+ HINT_INLINE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_execSequence(BYTE* op,
+     BYTE* const oend, seq_t sequence,
+     const BYTE** litPtr, const BYTE* const litLimit,
+@@ -964,6 +1013,11 @@ size_t ZSTD_execSequence(BYTE* op,
+ 
+     assert(op != NULL /* Precondition */);
+     assert(oend_w < oend /* No underflow */);
++
++#if defined(__aarch64__)
++    /* prefetch sequence starting from match that will be used for copy later */
++    PREFETCH_L1(match);
++#endif
+     /* Handle edge cases in a slow path:
+      *   - Read beyond end of literals
+      *   - Match end is within WILDCOPY_OVERLIMIT of oend
+@@ -1043,6 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op,
+ }
+ 
+ HINT_INLINE
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
+     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+     const BYTE** litPtr, const BYTE* const litLimit,
+@@ -1154,7 +1209,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
+ }
+ 
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
++ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
+  * bits before reloading. This value is the maximum number of bytes we read
+  * after reloading when we are decoding long offsets.
+  */
+@@ -1165,13 +1220,37 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
+ 
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+ 
++/*
++ * ZSTD_decodeSequence():
++ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
++ *                  only used in 32-bit mode
++ * @return : Sequence (litL + matchL + offset)
++ */
+ FORCE_INLINE_TEMPLATE seq_t
+-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
++ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
+ {
+     seq_t seq;
++    /*
++     * ZSTD_seqSymbol is a 64 bits wide structure.
++     * It can be loaded in one operation
++     * and its fields extracted by simply shifting or bit-extracting on aarch64.
++     * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
++     * operations that cause performance drop. This can be avoided by using this
++     * ZSTD_memcpy hack.
++     */
++#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
++    ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
++    ZSTD_seqSymbol* const llDInfo = &llDInfoS;
++    ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
++    ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
++    ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
++    ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
++    ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
++#else
+     const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
+     const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
+     const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
++#endif
+     seq.matchLength = mlDInfo->baseValue;
+     seq.litLength = llDInfo->baseValue;
+     {   U32 const ofBase = ofDInfo->baseValue;
+@@ -1186,28 +1265,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+         U32 const llnbBits = llDInfo->nbBits;
+         U32 const mlnbBits = mlDInfo->nbBits;
+         U32 const ofnbBits = ofDInfo->nbBits;
++
++        assert(llBits <= MaxLLBits);
++        assert(mlBits <= MaxMLBits);
++        assert(ofBits <= MaxOff);
+         /*
+          * As gcc has better branch and block analyzers, sometimes it is only
+-         * valuable to mark likelyness for clang, it gives around 3-4% of
++         * valuable to mark likeliness for clang, it gives around 3-4% of
+          * performance.
+          */
+ 
+         /* sequence */
+         {   size_t offset;
+-    #if defined(__clang__)
+-            if (LIKELY(ofBits > 1)) {
+-    #else
+             if (ofBits > 1) {
+-    #endif
+                 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+                 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+-                assert(ofBits <= MaxOff);
++                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
++                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
+                 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+-                    U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
++                    /* Always read extra bits, this keeps the logic simple,
++                     * avoids branches, and avoids accidentally reading 0 bits.
++                     */
++                    U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
+                     offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                     BIT_reloadDStream(&seqState->DStream);
+-                    if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+-                    assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
++                    offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                 } else {
+                     offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+@@ -1224,7 +1306,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+                 } else {
+                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+-                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
++                        temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
+                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                         seqState->prevOffset[1] = seqState->prevOffset[0];
+                         seqState->prevOffset[0] = offset = temp;
+@@ -1232,11 +1314,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+             seq.offset = offset;
+         }
+ 
+-    #if defined(__clang__)
+-        if (UNLIKELY(mlBits > 0))
+-    #else
+         if (mlBits > 0)
+-    #endif
+             seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+ 
+         if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+@@ -1246,11 +1324,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+         /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+         ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+ 
+-    #if defined(__clang__)
+-        if (UNLIKELY(llBits > 0))
+-    #else
+         if (llBits > 0)
+-    #endif
+             seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+ 
+         if (MEM_32bits())
+@@ -1259,17 +1333,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+ 
+-        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
+-        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
+-        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+-        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
++        if (!isLastSeq) {
++            /* don't update FSE state for last Sequence */
++            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
++            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
++            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
++            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
++            BIT_reloadDStream(&seqState->DStream);
++        }
+     }
+ 
+     return seq;
+ }
+ 
+-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+-MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
++#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
++#if DEBUGLEVEL >= 1
++static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+ {
+     size_t const windowSize = dctx->fParams.windowSize;
+     /* No dictionary used. */
+@@ -1283,30 +1362,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix
+     /* Dictionary is active. */
+     return 1;
+ }
++#endif
+ 
+-MEM_STATIC void ZSTD_assertValidSequence(
++static void ZSTD_assertValidSequence(
+         ZSTD_DCtx const* dctx,
+         BYTE const* op, BYTE const* oend,
+         seq_t const seq,
+         BYTE const* prefixStart, BYTE const* virtualStart)
+ {
+ #if DEBUGLEVEL >= 1
+-    size_t const windowSize = dctx->fParams.windowSize;
+-    size_t const sequenceSize = seq.litLength + seq.matchLength;
+-    BYTE const* const oLitEnd = op + seq.litLength;
+-    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+-            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+-    assert(op <= oend);
+-    assert((size_t)(oend - op) >= sequenceSize);
+-    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+-    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+-        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+-        /* Offset must be within the dictionary. */
+-        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+-        assert(seq.offset <= windowSize + dictSize);
+-    } else {
+-        /* Offset must be within our window. */
+-        assert(seq.offset <= windowSize);
++    if (dctx->isFrameDecompression) {
++        size_t const windowSize = dctx->fParams.windowSize;
++        size_t const sequenceSize = seq.litLength + seq.matchLength;
++        BYTE const* const oLitEnd = op + seq.litLength;
++        DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
++                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
++        assert(op <= oend);
++        assert((size_t)(oend - op) >= sequenceSize);
++        assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
++        if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
++            size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
++            /* Offset must be within the dictionary. */
++            assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
++            assert(seq.offset <= windowSize + dictSize);
++        } else {
++            /* Offset must be within our window. */
++            assert(seq.offset <= windowSize);
++        }
+     }
+ #else
+     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+@@ -1322,23 +1404,21 @@ DONT_VECTORIZE
+ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+                                void* dst, size_t maxDstSize,
+                          const void* seqStart, size_t seqSize, int nbSeq,
+-                         const ZSTD_longOffset_e isLongOffset,
+-                         const int frame)
++                         const ZSTD_longOffset_e isLongOffset)
+ {
+     const BYTE* ip = (const BYTE*)seqStart;
+     const BYTE* const iend = ip + seqSize;
+     BYTE* const ostart = (BYTE*)dst;
+-    BYTE* const oend = ostart + maxDstSize;
++    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
+     BYTE* op = ostart;
+     const BYTE* litPtr = dctx->litPtr;
+     const BYTE* litBufferEnd = dctx->litBufferEnd;
+     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+-    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
+-    (void)frame;
++    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
+ 
+-    /* Regen sequences */
++    /* Literals are split between internal buffer & output buffer */
+     if (nbSeq) {
+         seqState_t seqState;
+         dctx->fseEntropy = 1;
+@@ -1357,8 +1437,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+                 BIT_DStream_completed < BIT_DStream_overflow);
+ 
+         /* decompress without overrunning litPtr begins */
+-        {
+-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
++        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
+             /* Align the decompression loop to 32 + 16 bytes.
+                 *
+                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+@@ -1420,27 +1499,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+ #endif
+ 
+             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
+-            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
+-                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
++            for ( ; nbSeq; nbSeq--) {
++                sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
++                if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
++                {   size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+-                assert(!ZSTD_isError(oneSeqSize));
+-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
++                    assert(!ZSTD_isError(oneSeqSize));
++                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ #endif
+-                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+-                    return oneSeqSize;
+-                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+-                op += oneSeqSize;
+-                if (UNLIKELY(!--nbSeq))
+-                    break;
+-                BIT_reloadDStream(&(seqState.DStream));
+-                sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+-            }
++                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
++                        return oneSeqSize;
++                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
++                    op += oneSeqSize;
++            }   }
++            DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
+ 
+             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
+             if (nbSeq > 0) {
+                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+-                if (leftoverLit)
+-                {
++                DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
++                if (leftoverLit) {
+                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                     sequence.litLength -= leftoverLit;
+@@ -1449,24 +1527,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+                 litPtr = dctx->litExtraBuffer;
+                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                 dctx->litBufferLocation = ZSTD_not_in_dst;
+-                {
+-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
++                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                     assert(!ZSTD_isError(oneSeqSize));
+-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
++                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ #endif
+                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                         return oneSeqSize;
+                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                     op += oneSeqSize;
+-                    if (--nbSeq)
+-                        BIT_reloadDStream(&(seqState.DStream));
+                 }
++                nbSeq--;
+             }
+         }
+ 
+-        if (nbSeq > 0) /* there is remaining lit from extra buffer */
+-        {
++        if (nbSeq > 0) {
++            /* there is remaining lit from extra buffer */
+ 
+ #if defined(__x86_64__)
+             __asm__(".p2align 6");
+@@ -1485,35 +1561,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+ #  endif
+ #endif
+ 
+-            for (; ; ) {
+-                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
++            for ( ; nbSeq ; nbSeq--) {
++                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
+                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                 assert(!ZSTD_isError(oneSeqSize));
+-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
++                ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ #endif
+                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                     return oneSeqSize;
+                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                 op += oneSeqSize;
+-                if (UNLIKELY(!--nbSeq))
+-                    break;
+-                BIT_reloadDStream(&(seqState.DStream));
+             }
+         }
+ 
+         /* check if reached exact end */
+         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
+         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
++        DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
++        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+         /* save reps for next block */
+         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+     }
+ 
+     /* last literal segment */
+-    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+-    {
+-        size_t const lastLLSize = litBufferEnd - litPtr;
++    if (dctx->litBufferLocation == ZSTD_split) {
++        /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
++        size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
++        DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
+         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+         if (op != NULL) {
+             ZSTD_memmove(op, litPtr, lastLLSize);
+@@ -1523,15 +1598,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
+         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+         dctx->litBufferLocation = ZSTD_not_in_dst;
+     }
+-    {   size_t const lastLLSize = litBufferEnd - litPtr;
++    /* copy last literals from internal buffer */
++    {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
++        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
+         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+         if (op != NULL) {
+             ZSTD_memcpy(op, litPtr, lastLLSize);
+             op += lastLLSize;
+-        }
+-    }
++    }   }
+ 
+-    return op-ostart;
++    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
++    return (size_t)(op - ostart);
+ }
+ 
+ FORCE_INLINE_TEMPLATE size_t
+@@ -1539,21 +1616,19 @@ DONT_VECTORIZE
+ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
+     void* dst, size_t maxDstSize,
+     const void* seqStart, size_t seqSize, int nbSeq,
+-    const ZSTD_longOffset_e isLongOffset,
+-    const int frame)
++    const ZSTD_longOffset_e isLongOffset)
+ {
+     const BYTE* ip = (const BYTE*)seqStart;
+     const BYTE* const iend = ip + seqSize;
+     BYTE* const ostart = (BYTE*)dst;
+-    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
++    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
+     BYTE* op = ostart;
+     const BYTE* litPtr = dctx->litPtr;
+     const BYTE* const litEnd = litPtr + dctx->litSize;
+     const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
+     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
+     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
+-    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+-    (void)frame;
++    DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
+ 
+     /* Regen sequences */
+     if (nbSeq) {
+@@ -1568,11 +1643,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
+         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+         assert(dst != NULL);
+ 
+-        ZSTD_STATIC_ASSERT(
+-            BIT_DStream_unfinished < BIT_DStream_completed &&
+-            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+-            BIT_DStream_completed < BIT_DStream_overflow);
+-
+ #if defined(__x86_64__)
+             __asm__(".p2align 6");
+             __asm__("nop");
+@@ -1587,73 +1657,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
+ #  endif
+ #endif
+ 
+-        for ( ; ; ) {
+-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
++        for ( ; nbSeq ; nbSeq--) {
++            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
+             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+             assert(!ZSTD_isError(oneSeqSize));
+-            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
++            ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+ #endif
+             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                 return oneSeqSize;
+             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+             op += oneSeqSize;
+-            if (UNLIKELY(!--nbSeq))
+-                break;
+-            BIT_reloadDStream(&(seqState.DStream));
+         }
+ 
+         /* check if reached exact end */
+-        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+-        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
++        assert(nbSeq == 0);
++        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+         /* save reps for next block */
+         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+     }
+ 
+     /* last literal segment */
+-    {   size_t const lastLLSize = litEnd - litPtr;
++    {   size_t const lastLLSize = (size_t)(litEnd - litPtr);
++        DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
+         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+         if (op != NULL) {
+             ZSTD_memcpy(op, litPtr, lastLLSize);
+             op += lastLLSize;
+-        }
+-    }
++    }   }
+ 
+-    return op-ostart;
++    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
++    return (size_t)(op - ostart);
+ }
+ 
+ static size_t
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                  void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+-                           const ZSTD_longOffset_e isLongOffset,
+-                           const int frame)
++                           const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ 
+ static size_t
+ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
+                                                void* dst, size_t maxDstSize,
+                                          const void* seqStart, size_t seqSize, int nbSeq,
+-                                         const ZSTD_longOffset_e isLongOffset,
+-                                         const int frame)
++                                         const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+ 
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+ 
+-FORCE_INLINE_TEMPLATE size_t
+-ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
++FORCE_INLINE_TEMPLATE
++
++size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                    const BYTE* const prefixStart, const BYTE* const dictEnd)
+ {
+     prefetchPos += sequence.litLength;
+     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+-        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+-                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
++        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
++         * No consequence though : memory address is only used for prefetching, not for dereferencing */
++        const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
+         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+     }
+     return prefetchPos + sequence.matchLength;
+@@ -1668,20 +1735,18 @@ ZSTD_decompressSequencesLong_body(
+                                ZSTD_DCtx* dctx,
+                                void* dst, size_t maxDstSize,
+                          const void* seqStart, size_t seqSize, int nbSeq,
+-                         const ZSTD_longOffset_e isLongOffset,
+-                         const int frame)
++                         const ZSTD_longOffset_e isLongOffset)
+ {
+     const BYTE* ip = (const BYTE*)seqStart;
+     const BYTE* const iend = ip + seqSize;
+     BYTE* const ostart = (BYTE*)dst;
+-    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
++    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
+     BYTE* op = ostart;
+     const BYTE* litPtr = dctx->litPtr;
+     const BYTE* litBufferEnd = dctx->litBufferEnd;
+     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+-    (void)frame;
+ 
+     /* Regen sequences */
+     if (nbSeq) {
+@@ -1706,20 +1771,17 @@ ZSTD_decompressSequencesLong_body(
+         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ 
+         /* prepare in advance */
+-        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
++        for (seqNb=0; seqNb<seqAdvance; seqNb++) {
++            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
+             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+             sequences[seqNb] = sequence;
+         }
+-        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+ 
+         /* decompress without stomping litBuffer */
+-        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
+-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+-            size_t oneSeqSize;
++        for (; seqNb < nbSeq; seqNb++) {
++            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
+ 
+-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
+-            {
++            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
+                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
+                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                 if (leftoverLit)
+@@ -1732,26 +1794,26 @@ ZSTD_decompressSequencesLong_body(
+                 litPtr = dctx->litExtraBuffer;
+                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                 dctx->litBufferLocation = ZSTD_not_in_dst;
+-                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
++                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+-                assert(!ZSTD_isError(oneSeqSize));
+-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
++                    assert(!ZSTD_isError(oneSeqSize));
++                    ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+ #endif
+-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
++                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+ 
+-                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+-                sequences[seqNb & STORED_SEQS_MASK] = sequence;
+-                op += oneSeqSize;
+-            }
++                    prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
++                    sequences[seqNb & STORED_SEQS_MASK] = sequence;
++                    op += oneSeqSize;
++            }   }
+             else
+             {
+                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
+-                oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
++                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
+                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                 assert(!ZSTD_isError(oneSeqSize));
+-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
++                ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+ #endif
+                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+ 
+@@ -1760,17 +1822,15 @@ ZSTD_decompressSequencesLong_body(
+                 op += oneSeqSize;
+             }
+         }
+-        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
++        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+ 
+         /* finish queue */
+         seqNb -= seqAdvance;
+         for ( ; seqNb<nbSeq ; seqNb++) {
+             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
+-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
+-            {
++            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
+                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+-                if (leftoverLit)
+-                {
++                if (leftoverLit) {
+                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                     sequence->litLength -= leftoverLit;
+@@ -1779,11 +1839,10 @@ ZSTD_decompressSequencesLong_body(
+                 litPtr = dctx->litExtraBuffer;
+                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                 dctx->litBufferLocation = ZSTD_not_in_dst;
+-                {
+-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
++                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                     assert(!ZSTD_isError(oneSeqSize));
+-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
++                    ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+ #endif
+                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                     op += oneSeqSize;
+@@ -1796,7 +1855,7 @@ ZSTD_decompressSequencesLong_body(
+                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                 assert(!ZSTD_isError(oneSeqSize));
+-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
++                ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+ #endif
+                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                 op += oneSeqSize;
+@@ -1808,8 +1867,7 @@ ZSTD_decompressSequencesLong_body(
+     }
+ 
+     /* last literal segment */
+-    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
+-    {
++    if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
+         size_t const lastLLSize = litBufferEnd - litPtr;
+         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+         if (op != NULL) {
+@@ -1827,17 +1885,16 @@ ZSTD_decompressSequencesLong_body(
+         }
+     }
+ 
+-    return op-ostart;
++    return (size_t)(op - ostart);
+ }
+ 
+ static size_t
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                  void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+-                           const ZSTD_longOffset_e isLongOffset,
+-                           const int frame)
++                           const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+ 
+@@ -1851,20 +1908,18 @@ DONT_VECTORIZE
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                  void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+-                           const ZSTD_longOffset_e isLongOffset,
+-                           const int frame)
++                           const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ static BMI2_TARGET_ATTRIBUTE size_t
+ DONT_VECTORIZE
+ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
+                                  void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+-                           const ZSTD_longOffset_e isLongOffset,
+-                           const int frame)
++                           const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+ 
+@@ -1873,10 +1928,9 @@ static BMI2_TARGET_ATTRIBUTE size_t
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                  void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+-                           const ZSTD_longOffset_e isLongOffset,
+-                           const int frame)
++                           const ZSTD_longOffset_e isLongOffset)
+ {
+-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+ 
+@@ -1886,37 +1940,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
+                             ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+-                            const ZSTD_longOffset_e isLongOffset,
+-                            const int frame);
++                            const ZSTD_longOffset_e isLongOffset);
+ 
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+ static size_t
+ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                    const void* seqStart, size_t seqSize, int nbSeq,
+-                   const ZSTD_longOffset_e isLongOffset,
+-                   const int frame)
++                   const ZSTD_longOffset_e isLongOffset)
+ {
+     DEBUGLOG(5, "ZSTD_decompressSequences");
+ #if DYNAMIC_BMI2
+     if (ZSTD_DCtx_get_bmi2(dctx)) {
+-        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+     }
+ #endif
+-    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ static size_t
+ ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                                  const void* seqStart, size_t seqSize, int nbSeq,
+-                                 const ZSTD_longOffset_e isLongOffset,
+-                                 const int frame)
++                                 const ZSTD_longOffset_e isLongOffset)
+ {
+     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
+ #if DYNAMIC_BMI2
+     if (ZSTD_DCtx_get_bmi2(dctx)) {
+-        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+     }
+ #endif
+-    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+ 
+@@ -1931,69 +1982,114 @@ static size_t
+ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                              void* dst, size_t maxDstSize,
+                              const void* seqStart, size_t seqSize, int nbSeq,
+-                             const ZSTD_longOffset_e isLongOffset,
+-                             const int frame)
++                             const ZSTD_longOffset_e isLongOffset)
+ {
+     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+ #if DYNAMIC_BMI2
+     if (ZSTD_DCtx_get_bmi2(dctx)) {
+-        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+     }
+ #endif
+-  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
++  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+ 
+ 
++/*
++ * @returns The total size of the history referenceable by zstd, including
++ * both the prefix and the extDict. At @p op any offset larger than this
++ * is invalid.
++ */
++static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
++{
++    return (size_t)(op - virtualStart);
++}
++
++typedef struct {
++    unsigned longOffsetShare;
++    unsigned maxNbAdditionalBits;
++} ZSTD_OffsetInfo;
+ 
+-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+-/* ZSTD_getLongOffsetsShare() :
++/* ZSTD_getOffsetInfo() :
+  * condition : offTable must be valid
+  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+- *           compared to maximum possible of (1<<OffFSELog) */
+-static unsigned
+-ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
++ *           compared to maximum possible of (1<<OffFSELog),
++ *           as well as the maximum number additional bits required.
++ */
++static ZSTD_OffsetInfo
++ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
+ {
+-    const void* ptr = offTable;
+-    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+-    const ZSTD_seqSymbol* table = offTable + 1;
+-    U32 const max = 1 << tableLog;
+-    U32 u, total = 0;
+-    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+-
+-    assert(max <= (1 << OffFSELog));  /* max not too large */
+-    for (u=0; u<max; u++) {
+-        if (table[u].nbAdditionalBits > 22) total += 1;
++    ZSTD_OffsetInfo info = {0, 0};
++    /* If nbSeq == 0, then the offTable is uninitialized, but we have
++     * no sequences, so both values should be 0.
++     */
++    if (nbSeq != 0) {
++        const void* ptr = offTable;
++        U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
++        const ZSTD_seqSymbol* table = offTable + 1;
++        U32 const max = 1 << tableLog;
++        U32 u;
++        DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
++
++        assert(max <= (1 << OffFSELog));  /* max not too large */
++        for (u=0; u<max; u++) {
++            info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
++            if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
++        }
++
++        assert(tableLog <= OffFSELog);
++        info.longOffsetShare <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+     }
+ 
+-    assert(tableLog <= OffFSELog);
+-    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
++    return info;
++}
+ 
+-    return total;
++/*
++ * @returns The maximum offset we can decode in one read of our bitstream, without
++ * reloading more bits in the middle of the offset bits read. Any offsets larger
++ * than this must use the long offset decoder.
++ */
++static size_t ZSTD_maxShortOffset(void)
++{
++    if (MEM_64bits()) {
++        /* We can decode any offset without reloading bits.
++         * This might change if the max window size grows.
++         */
++        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
++        return (size_t)-1;
++    } else {
++        /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
++         * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
++         * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
++         */
++        size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
++        size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
++        assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
++        return maxOffset;
++    }
+ }
+-#endif
+ 
+ size_t
+ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+-                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
++                        const void* src, size_t srcSize, const streaming_operation streaming)
+ {   /* blockType == blockCompressed */
+     const BYTE* ip = (const BYTE*)src;
+-    /* isLongOffset must be true if there are long offsets.
+-     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+-     * We don't expect that to be the case in 64-bit mode.
+-     * In block mode, window size is not known, so we have to be conservative.
+-     * (note: but it could be evaluated from current-lowLimit)
+-     */
+-    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+-    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+-
+-    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
++    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
++
++    /* Note : the wording of the specification
++     * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
++     * This generally does not happen, as it makes little sense,
++     * since an uncompressed block would feature same size and have no decompression cost.
++     * Also, note that decoder from reference libzstd before < v1.5.4
++     * would consider this edge case as an error.
++     * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
++     * for broader compatibility with the deployed ecosystem of zstd decoders */
++    RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
+ 
+     /* Decode literals section */
+     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
+-        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
++        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
+         if (ZSTD_isError(litCSize)) return litCSize;
+         ip += litCSize;
+         srcSize -= litCSize;
+@@ -2001,6 +2097,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+ 
+     /* Build Decoding Tables */
+     {
++        /* Compute the maximum block size, which must also work when !frame and fParams are unset.
++         * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
++         */
++        size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
++        size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
++        /* isLongOffset must be true if there are long offsets.
++         * Offsets are long if they are larger than ZSTD_maxShortOffset().
++         * We don't expect that to be the case in 64-bit mode.
++         *
++         * We check here to see if our history is large enough to allow long offsets.
++         * If it isn't, then we can't possible have (valid) long offsets. If the offset
++         * is invalid, then it is okay to read it incorrectly.
++         *
++         * If isLongOffsets is true, then we will later check our decoding table to see
++         * if it is even possible to generate long offsets.
++         */
++        ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
+         /* These macros control at build-time which decompressor implementation
+          * we use. If neither is defined, we do some inspection and dispatch at
+          * runtime.
+@@ -2008,6 +2121,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+         int usePrefetchDecoder = dctx->ddictIsCold;
++#else
++        /* Set to 1 to avoid computing offset info if we don't need to.
++         * Otherwise this value is ignored.
++         */
++        int usePrefetchDecoder = 1;
+ #endif
+         int nbSeq;
+         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+@@ -2015,40 +2133,55 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+         ip += seqHSize;
+         srcSize -= seqHSize;
+ 
+-        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
++        RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
++        RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
++                "invalid dst");
+ 
+-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+-        if ( !usePrefetchDecoder
+-          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+-          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+-            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+-            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+-            usePrefetchDecoder = (shareLongOffsets >= minShare);
++        /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
++         * compute information about the share of long offsets, and the maximum nbAdditionalBits.
++         * NOTE: could probably use a larger nbSeq limit
++         */
++        if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
++            ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
++            if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
++                /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
++                 * enough, then we know it is impossible to have too long an offset in this block, so we can
++                 * use the regular offset decoder.
++                 */
++                isLongOffset = ZSTD_lo_isRegularOffset;
++            }
++            if (!usePrefetchDecoder) {
++                U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
++                usePrefetchDecoder = (info.longOffsetShare >= minShare);
++            }
+         }
+-#endif
+ 
+         dctx->ddictIsCold = 0;
+ 
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+-        if (usePrefetchDecoder)
++        if (usePrefetchDecoder) {
++#else
++        (void)usePrefetchDecoder;
++        {
+ #endif
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+-            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
++            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ #endif
++        }
+ 
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+         /* else */
+         if (dctx->litBufferLocation == ZSTD_split)
+-            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
++            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+         else
+-            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
++            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ #endif
+     }
+ }
+ 
+ 
++ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+ {
+     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
+@@ -2060,13 +2193,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+ }
+ 
+ 
+-size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+-                            void* dst, size_t dstCapacity,
+-                      const void* src, size_t srcSize)
++size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
++                                       void* dst, size_t dstCapacity,
++                                 const void* src, size_t srcSize)
+ {
+     size_t dSize;
++    dctx->isFrameDecompression = 0;
+     ZSTD_checkContinuity(dctx, dst, dstCapacity);
+-    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
++    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
++    FORWARD_IF_ERROR(dSize, "");
+     dctx->previousDstEnd = (char*)dst + dSize;
+     return dSize;
+ }
++
++
++/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
++size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
++                            void* dst, size_t dstCapacity,
++                      const void* src, size_t srcSize)
++{
++    return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
++}
+diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
+index 3d2d57a5d25a..becffbd89364 100644
+--- a/lib/zstd/decompress/zstd_decompress_block.h
++++ b/lib/zstd/decompress/zstd_decompress_block.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -47,7 +48,7 @@ typedef enum {
+  */
+ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                                void* dst, size_t dstCapacity,
+-                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
++                         const void* src, size_t srcSize, const streaming_operation streaming);
+ 
+ /* ZSTD_buildFSETable() :
+  * generate FSE decoding table for one symbol (ll, ml or off)
+@@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+                    unsigned tableLog, void* wksp, size_t wkspSize,
+                    int bmi2);
+ 
++/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
++size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
++                            void* dst, size_t dstCapacity,
++                      const void* src, size_t srcSize);
++
+ 
+ #endif /* ZSTD_DEC_BLOCK_H */
+diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
+index 98102edb6a83..0f02526be774 100644
+--- a/lib/zstd/decompress/zstd_decompress_internal.h
++++ b/lib/zstd/decompress/zstd_decompress_internal.h
+@@ -1,5 +1,6 @@
++/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Yann Collet, Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+ 
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+ #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
++#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
+ 
+ typedef struct {
+     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+-    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
++    HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)];  /* can accommodate HUF_decompress4X */
+     U32 rep[ZSTD_REP_NUM];
+     U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+ } ZSTD_entropyDTables_t;
+@@ -152,6 +154,7 @@ struct ZSTD_DCtx_s
+     size_t litSize;
+     size_t rleSize;
+     size_t staticSize;
++    int isFrameDecompression;
+ #if DYNAMIC_BMI2 != 0
+     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+ #endif
+@@ -164,6 +167,8 @@ struct ZSTD_DCtx_s
+     ZSTD_dictUses_e dictUses;
+     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
++    int disableHufAsm;
++    int maxBlockSizeParam;
+ 
+     /* streaming */
+     ZSTD_dStreamStage streamStage;
+diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
+index a06ca187aab5..8a47eb2a4514 100644
+--- a/lib/zstd/decompress_sources.h
++++ b/lib/zstd/decompress_sources.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c
+index 22686e367e6f..466828e35752 100644
+--- a/lib/zstd/zstd_common_module.c
++++ b/lib/zstd/zstd_common_module.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
+ EXPORT_SYMBOL_GPL(ZSTD_isError);
+ EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
+ EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
+-EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
+-EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
+-EXPORT_SYMBOL_GPL(ZSTD_customFree);
+ 
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_DESCRIPTION("Zstd Common");
+diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
+index 04e1b5c01d9b..8ecf43226af2 100644
+--- a/lib/zstd/zstd_compress_module.c
++++ b/lib/zstd/zstd_compress_module.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
+index f4ed952ed485..7d31518e9d5a 100644
+--- a/lib/zstd/zstd_decompress_module.c
++++ b/lib/zstd/zstd_decompress_module.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+ /*
+- * Copyright (c) Facebook, Inc.
++ * Copyright (c) Meta Platforms, Inc. and affiliates.
+  * All rights reserved.
+  *
+  * This source code is licensed under both the BSD-style license (found in the
+@@ -77,7 +77,7 @@ EXPORT_SYMBOL(zstd_init_dstream);
+ 
+ size_t zstd_reset_dstream(zstd_dstream *dstream)
+ {
+-	return ZSTD_resetDStream(dstream);
++	return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
+ }
+ EXPORT_SYMBOL(zstd_reset_dstream);
+ 
+-- 
+2.47.0.rc0
diff --git a/patches/0002-sched-ext.patch b/patches/0002-sched-ext.patch
new file mode 100644
index 0000000..3d1b009
--- /dev/null
+++ b/patches/0002-sched-ext.patch
@@ -0,0 +1,17925 @@
+From c0d9f38dcc2b6bb16e54e7f438c9c449319ebef4 Mon Sep 17 00:00:00 2001
+From: Peter Jung <admin@ptr1337.dev>
+Date: Thu, 10 Oct 2024 12:47:12 +0200
+Subject: [PATCH] sched-ext
+
+Signed-off-by: Peter Jung <admin@ptr1337.dev>
+---
+ Documentation/scheduler/index.rst             |    1 +
+ Documentation/scheduler/sched-ext.rst         |  326 +
+ MAINTAINERS                                   |   13 +
+ drivers/tty/sysrq.c                           |    1 +
+ include/asm-generic/vmlinux.lds.h             |    1 +
+ include/linux/cgroup.h                        |    4 +-
+ include/linux/sched.h                         |    5 +
+ include/linux/sched/ext.h                     |  216 +
+ include/linux/sched/task.h                    |    8 +-
+ include/trace/events/sched_ext.h              |   32 +
+ include/uapi/linux/sched.h                    |    1 +
+ init/Kconfig                                  |   10 +
+ init/init_task.c                              |   12 +
+ kernel/Kconfig.preempt                        |   27 +-
+ kernel/fork.c                                 |   17 +-
+ kernel/sched/build_policy.c                   |   11 +
+ kernel/sched/core.c                           |  288 +-
+ kernel/sched/cpufreq_schedutil.c              |   50 +-
+ kernel/sched/debug.c                          |    3 +
+ kernel/sched/ext.c                            | 7281 +++++++++++++++++
+ kernel/sched/ext.h                            |   91 +
+ kernel/sched/fair.c                           |   21 +-
+ kernel/sched/idle.c                           |    2 +
+ kernel/sched/sched.h                          |  203 +-
+ kernel/sched/syscalls.c                       |   26 +
+ lib/dump_stack.c                              |    1 +
+ tools/Makefile                                |   10 +-
+ tools/sched_ext/.gitignore                    |    2 +
+ tools/sched_ext/Makefile                      |  246 +
+ tools/sched_ext/README.md                     |  270 +
+ .../sched_ext/include/bpf-compat/gnu/stubs.h  |   11 +
+ tools/sched_ext/include/scx/common.bpf.h      |  427 +
+ tools/sched_ext/include/scx/common.h          |   75 +
+ tools/sched_ext/include/scx/compat.bpf.h      |   47 +
+ tools/sched_ext/include/scx/compat.h          |  186 +
+ tools/sched_ext/include/scx/user_exit_info.h  |  115 +
+ tools/sched_ext/scx_central.bpf.c             |  361 +
+ tools/sched_ext/scx_central.c                 |  135 +
+ tools/sched_ext/scx_flatcg.bpf.c              |  957 +++
+ tools/sched_ext/scx_flatcg.c                  |  233 +
+ tools/sched_ext/scx_flatcg.h                  |   51 +
+ tools/sched_ext/scx_qmap.bpf.c                |  813 ++
+ tools/sched_ext/scx_qmap.c                    |  153 +
+ tools/sched_ext/scx_show_state.py             |   40 +
+ tools/sched_ext/scx_simple.bpf.c              |  156 +
+ tools/sched_ext/scx_simple.c                  |  107 +
+ tools/testing/selftests/sched_ext/.gitignore  |    6 +
+ tools/testing/selftests/sched_ext/Makefile    |  218 +
+ tools/testing/selftests/sched_ext/config      |    9 +
+ .../selftests/sched_ext/create_dsq.bpf.c      |   58 +
+ .../testing/selftests/sched_ext/create_dsq.c  |   57 +
+ .../sched_ext/ddsp_bogus_dsq_fail.bpf.c       |   42 +
+ .../selftests/sched_ext/ddsp_bogus_dsq_fail.c |   57 +
+ .../sched_ext/ddsp_vtimelocal_fail.bpf.c      |   39 +
+ .../sched_ext/ddsp_vtimelocal_fail.c          |   56 +
+ .../selftests/sched_ext/dsp_local_on.bpf.c    |   65 +
+ .../selftests/sched_ext/dsp_local_on.c        |   58 +
+ .../sched_ext/enq_last_no_enq_fails.bpf.c     |   21 +
+ .../sched_ext/enq_last_no_enq_fails.c         |   60 +
+ .../sched_ext/enq_select_cpu_fails.bpf.c      |   43 +
+ .../sched_ext/enq_select_cpu_fails.c          |   61 +
+ tools/testing/selftests/sched_ext/exit.bpf.c  |   84 +
+ tools/testing/selftests/sched_ext/exit.c      |   55 +
+ tools/testing/selftests/sched_ext/exit_test.h |   20 +
+ .../testing/selftests/sched_ext/hotplug.bpf.c |   61 +
+ tools/testing/selftests/sched_ext/hotplug.c   |  168 +
+ .../selftests/sched_ext/hotplug_test.h        |   15 +
+ .../sched_ext/init_enable_count.bpf.c         |   53 +
+ .../selftests/sched_ext/init_enable_count.c   |  166 +
+ .../testing/selftests/sched_ext/maximal.bpf.c |  164 +
+ tools/testing/selftests/sched_ext/maximal.c   |   51 +
+ .../selftests/sched_ext/maybe_null.bpf.c      |   36 +
+ .../testing/selftests/sched_ext/maybe_null.c  |   49 +
+ .../sched_ext/maybe_null_fail_dsp.bpf.c       |   25 +
+ .../sched_ext/maybe_null_fail_yld.bpf.c       |   28 +
+ .../testing/selftests/sched_ext/minimal.bpf.c |   21 +
+ tools/testing/selftests/sched_ext/minimal.c   |   58 +
+ .../selftests/sched_ext/prog_run.bpf.c        |   33 +
+ tools/testing/selftests/sched_ext/prog_run.c  |   78 +
+ .../testing/selftests/sched_ext/reload_loop.c |   75 +
+ tools/testing/selftests/sched_ext/runner.c    |  201 +
+ tools/testing/selftests/sched_ext/scx_test.h  |  131 +
+ .../selftests/sched_ext/select_cpu_dfl.bpf.c  |   40 +
+ .../selftests/sched_ext/select_cpu_dfl.c      |   72 +
+ .../sched_ext/select_cpu_dfl_nodispatch.bpf.c |   89 +
+ .../sched_ext/select_cpu_dfl_nodispatch.c     |   72 +
+ .../sched_ext/select_cpu_dispatch.bpf.c       |   41 +
+ .../selftests/sched_ext/select_cpu_dispatch.c |   70 +
+ .../select_cpu_dispatch_bad_dsq.bpf.c         |   37 +
+ .../sched_ext/select_cpu_dispatch_bad_dsq.c   |   56 +
+ .../select_cpu_dispatch_dbl_dsp.bpf.c         |   38 +
+ .../sched_ext/select_cpu_dispatch_dbl_dsp.c   |   56 +
+ .../sched_ext/select_cpu_vtime.bpf.c          |   92 +
+ .../selftests/sched_ext/select_cpu_vtime.c    |   59 +
+ .../selftests/sched_ext/test_example.c        |   49 +
+ tools/testing/selftests/sched_ext/util.c      |   71 +
+ tools/testing/selftests/sched_ext/util.h      |   13 +
+ 97 files changed, 16193 insertions(+), 130 deletions(-)
+ create mode 100644 Documentation/scheduler/sched-ext.rst
+ create mode 100644 include/linux/sched/ext.h
+ create mode 100644 include/trace/events/sched_ext.h
+ create mode 100644 kernel/sched/ext.c
+ create mode 100644 kernel/sched/ext.h
+ create mode 100644 tools/sched_ext/.gitignore
+ create mode 100644 tools/sched_ext/Makefile
+ create mode 100644 tools/sched_ext/README.md
+ create mode 100644 tools/sched_ext/include/bpf-compat/gnu/stubs.h
+ create mode 100644 tools/sched_ext/include/scx/common.bpf.h
+ create mode 100644 tools/sched_ext/include/scx/common.h
+ create mode 100644 tools/sched_ext/include/scx/compat.bpf.h
+ create mode 100644 tools/sched_ext/include/scx/compat.h
+ create mode 100644 tools/sched_ext/include/scx/user_exit_info.h
+ create mode 100644 tools/sched_ext/scx_central.bpf.c
+ create mode 100644 tools/sched_ext/scx_central.c
+ create mode 100644 tools/sched_ext/scx_flatcg.bpf.c
+ create mode 100644 tools/sched_ext/scx_flatcg.c
+ create mode 100644 tools/sched_ext/scx_flatcg.h
+ create mode 100644 tools/sched_ext/scx_qmap.bpf.c
+ create mode 100644 tools/sched_ext/scx_qmap.c
+ create mode 100644 tools/sched_ext/scx_show_state.py
+ create mode 100644 tools/sched_ext/scx_simple.bpf.c
+ create mode 100644 tools/sched_ext/scx_simple.c
+ create mode 100644 tools/testing/selftests/sched_ext/.gitignore
+ create mode 100644 tools/testing/selftests/sched_ext/Makefile
+ create mode 100644 tools/testing/selftests/sched_ext/config
+ create mode 100644 tools/testing/selftests/sched_ext/create_dsq.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/create_dsq.c
+ create mode 100644 tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
+ create mode 100644 tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
+ create mode 100644 tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/dsp_local_on.c
+ create mode 100644 tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+ create mode 100644 tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+ create mode 100644 tools/testing/selftests/sched_ext/exit.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/exit.c
+ create mode 100644 tools/testing/selftests/sched_ext/exit_test.h
+ create mode 100644 tools/testing/selftests/sched_ext/hotplug.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/hotplug.c
+ create mode 100644 tools/testing/selftests/sched_ext/hotplug_test.h
+ create mode 100644 tools/testing/selftests/sched_ext/init_enable_count.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/init_enable_count.c
+ create mode 100644 tools/testing/selftests/sched_ext/maximal.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/maximal.c
+ create mode 100644 tools/testing/selftests/sched_ext/maybe_null.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/maybe_null.c
+ create mode 100644 tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/minimal.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/minimal.c
+ create mode 100644 tools/testing/selftests/sched_ext/prog_run.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/prog_run.c
+ create mode 100644 tools/testing/selftests/sched_ext/reload_loop.c
+ create mode 100644 tools/testing/selftests/sched_ext/runner.c
+ create mode 100644 tools/testing/selftests/sched_ext/scx_test.h
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dfl.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+ create mode 100644 tools/testing/selftests/sched_ext/select_cpu_vtime.c
+ create mode 100644 tools/testing/selftests/sched_ext/test_example.c
+ create mode 100644 tools/testing/selftests/sched_ext/util.c
+ create mode 100644 tools/testing/selftests/sched_ext/util.h
+
+diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
+index 43bd8a145b7a..0611dc3dda8e 100644
+--- a/Documentation/scheduler/index.rst
++++ b/Documentation/scheduler/index.rst
+@@ -20,6 +20,7 @@ Scheduler
+     sched-nice-design
+     sched-rt-group
+     sched-stats
++    sched-ext
+     sched-debug
+ 
+     text_files
+diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
+new file mode 100644
+index 000000000000..6c0d70e2e27d
+--- /dev/null
++++ b/Documentation/scheduler/sched-ext.rst
+@@ -0,0 +1,326 @@
++==========================
++Extensible Scheduler Class
++==========================
++
++sched_ext is a scheduler class whose behavior can be defined by a set of BPF
++programs - the BPF scheduler.
++
++* sched_ext exports a full scheduling interface so that any scheduling
++  algorithm can be implemented on top.
++
++* The BPF scheduler can group CPUs however it sees fit and schedule them
++  together, as tasks aren't tied to specific CPUs at the time of wakeup.
++
++* The BPF scheduler can be turned on and off dynamically anytime.
++
++* The system integrity is maintained no matter what the BPF scheduler does.
++  The default scheduling behavior is restored anytime an error is detected,
++  a runnable task stalls, or on invoking the SysRq key sequence
++  :kbd:`SysRq-S`.
++
++* When the BPF scheduler triggers an error, debug information is dumped to
++  aid debugging. The debug dump is passed to and printed out by the
++  scheduler binary. The debug dump can also be accessed through the
++  `sched_ext_dump` tracepoint. The SysRq key sequence :kbd:`SysRq-D`
++  triggers a debug dump. This doesn't terminate the BPF scheduler and can
++  only be read through the tracepoint.
++
++Switching to and from sched_ext
++===============================
++
++``CONFIG_SCHED_CLASS_EXT`` is the config option to enable sched_ext and
++``tools/sched_ext`` contains the example schedulers. The following config
++options should be enabled to use sched_ext:
++
++.. code-block:: none
++
++    CONFIG_BPF=y
++    CONFIG_SCHED_CLASS_EXT=y
++    CONFIG_BPF_SYSCALL=y
++    CONFIG_BPF_JIT=y
++    CONFIG_DEBUG_INFO_BTF=y
++    CONFIG_BPF_JIT_ALWAYS_ON=y
++    CONFIG_BPF_JIT_DEFAULT_ON=y
++    CONFIG_PAHOLE_HAS_SPLIT_BTF=y
++    CONFIG_PAHOLE_HAS_BTF_TAG=y
++
++sched_ext is used only when the BPF scheduler is loaded and running.
++
++If a task explicitly sets its scheduling policy to ``SCHED_EXT``, it will be
++treated as ``SCHED_NORMAL`` and scheduled by CFS until the BPF scheduler is
++loaded.
++
++When the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is not set
++in ``ops->flags``, all ``SCHED_NORMAL``, ``SCHED_BATCH``, ``SCHED_IDLE``, and
++``SCHED_EXT`` tasks are scheduled by sched_ext.
++
++However, when the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is
++set in ``ops->flags``, only tasks with the ``SCHED_EXT`` policy are scheduled
++by sched_ext, while tasks with ``SCHED_NORMAL``, ``SCHED_BATCH`` and
++``SCHED_IDLE`` policies are scheduled by CFS.
++
++Terminating the sched_ext scheduler program, triggering :kbd:`SysRq-S`, or
++detection of any internal error including stalled runnable tasks aborts the
++BPF scheduler and reverts all tasks back to CFS.
++
++.. code-block:: none
++
++    # make -j16 -C tools/sched_ext
++    # tools/sched_ext/scx_simple
++    local=0 global=3
++    local=5 global=24
++    local=9 global=44
++    local=13 global=56
++    local=17 global=72
++    ^CEXIT: BPF scheduler unregistered
++
++The current status of the BPF scheduler can be determined as follows:
++
++.. code-block:: none
++
++    # cat /sys/kernel/sched_ext/state
++    enabled
++    # cat /sys/kernel/sched_ext/root/ops
++    simple
++
++You can check if any BPF scheduler has ever been loaded since boot by examining
++this monotonically incrementing counter (a value of zero indicates that no BPF
++scheduler has been loaded):
++
++.. code-block:: none
++
++    # cat /sys/kernel/sched_ext/enable_seq
++    1
++
++``tools/sched_ext/scx_show_state.py`` is a drgn script which shows more
++detailed information:
++
++.. code-block:: none
++
++    # tools/sched_ext/scx_show_state.py
++    ops           : simple
++    enabled       : 1
++    switching_all : 1
++    switched_all  : 1
++    enable_state  : enabled (2)
++    bypass_depth  : 0
++    nr_rejected   : 0
++    enable_seq    : 1
++
++If ``CONFIG_SCHED_DEBUG`` is set, whether a given task is on sched_ext can
++be determined as follows:
++
++.. code-block:: none
++
++    # grep ext /proc/self/sched
++    ext.enabled                                  :                    1
++
++The Basics
++==========
++
++Userspace can implement an arbitrary BPF scheduler by loading a set of BPF
++programs that implement ``struct sched_ext_ops``. The only mandatory field
++is ``ops.name`` which must be a valid BPF object name. All operations are
++optional. The following modified excerpt is from
++``tools/sched_ext/scx_simple.bpf.c`` showing a minimal global FIFO scheduler.
++
++.. code-block:: c
++
++    /*
++     * Decide which CPU a task should be migrated to before being
++     * enqueued (either at wakeup, fork time, or exec time). If an
++     * idle core is found by the default ops.select_cpu() implementation,
++     * then dispatch the task directly to SCX_DSQ_LOCAL and skip the
++     * ops.enqueue() callback.
++     *
++     * Note that this implementation has exactly the same behavior as the
++     * default ops.select_cpu implementation. The behavior of the scheduler
++     * would be exactly same if the implementation just didn't define the
++     * simple_select_cpu() struct_ops prog.
++     */
++    s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p,
++                       s32 prev_cpu, u64 wake_flags)
++    {
++            s32 cpu;
++            /* Need to initialize or the BPF verifier will reject the program */
++            bool direct = false;
++
++            cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &direct);
++
++            if (direct)
++                    scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
++
++            return cpu;
++    }
++
++    /*
++     * Do a direct dispatch of a task to the global DSQ. This ops.enqueue()
++     * callback will only be invoked if we failed to find a core to dispatch
++     * to in ops.select_cpu() above.
++     *
++     * Note that this implementation has exactly the same behavior as the
++     * default ops.enqueue implementation, which just dispatches the task
++     * to SCX_DSQ_GLOBAL. The behavior of the scheduler would be exactly same
++     * if the implementation just didn't define the simple_enqueue struct_ops
++     * prog.
++     */
++    void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
++    {
++            scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
++    }
++
++    s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
++    {
++            /*
++             * By default, all SCHED_EXT, SCHED_OTHER, SCHED_IDLE, and
++             * SCHED_BATCH tasks should use sched_ext.
++             */
++            return 0;
++    }
++
++    void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
++    {
++            exit_type = ei->type;
++    }
++
++    SEC(".struct_ops")
++    struct sched_ext_ops simple_ops = {
++            .select_cpu             = (void *)simple_select_cpu,
++            .enqueue                = (void *)simple_enqueue,
++            .init                   = (void *)simple_init,
++            .exit                   = (void *)simple_exit,
++            .name                   = "simple",
++    };
++
++Dispatch Queues
++---------------
++
++To match the impedance between the scheduler core and the BPF scheduler,
++sched_ext uses DSQs (dispatch queues) which can operate as both a FIFO and a
++priority queue. By default, there is one global FIFO (``SCX_DSQ_GLOBAL``),
++and one local dsq per CPU (``SCX_DSQ_LOCAL``). The BPF scheduler can manage
++an arbitrary number of dsq's using ``scx_bpf_create_dsq()`` and
++``scx_bpf_destroy_dsq()``.
++
++A CPU always executes a task from its local DSQ. A task is "dispatched" to a
++DSQ. A non-local DSQ is "consumed" to transfer a task to the consuming CPU's
++local DSQ.
++
++When a CPU is looking for the next task to run, if the local DSQ is not
++empty, the first task is picked. Otherwise, the CPU tries to consume the
++global DSQ. If that doesn't yield a runnable task either, ``ops.dispatch()``
++is invoked.
++
++Scheduling Cycle
++----------------
++
++The following briefly shows how a waking task is scheduled and executed.
++
++1. When a task is waking up, ``ops.select_cpu()`` is the first operation
++   invoked. This serves two purposes. First, CPU selection optimization
++   hint. Second, waking up the selected CPU if idle.
++
++   The CPU selected by ``ops.select_cpu()`` is an optimization hint and not
++   binding. The actual decision is made at the last step of scheduling.
++   However, there is a small performance gain if the CPU
++   ``ops.select_cpu()`` returns matches the CPU the task eventually runs on.
++
++   A side-effect of selecting a CPU is waking it up from idle. While a BPF
++   scheduler can wake up any cpu using the ``scx_bpf_kick_cpu()`` helper,
++   using ``ops.select_cpu()`` judiciously can be simpler and more efficient.
++
++   A task can be immediately dispatched to a DSQ from ``ops.select_cpu()`` by
++   calling ``scx_bpf_dispatch()``. If the task is dispatched to
++   ``SCX_DSQ_LOCAL`` from ``ops.select_cpu()``, it will be dispatched to the
++   local DSQ of whichever CPU is returned from ``ops.select_cpu()``.
++   Additionally, dispatching directly from ``ops.select_cpu()`` will cause the
++   ``ops.enqueue()`` callback to be skipped.
++
++   Note that the scheduler core will ignore an invalid CPU selection, for
++   example, if it's outside the allowed cpumask of the task.
++
++2. Once the target CPU is selected, ``ops.enqueue()`` is invoked (unless the
++   task was dispatched directly from ``ops.select_cpu()``). ``ops.enqueue()``
++   can make one of the following decisions:
++
++   * Immediately dispatch the task to either the global or local DSQ by
++     calling ``scx_bpf_dispatch()`` with ``SCX_DSQ_GLOBAL`` or
++     ``SCX_DSQ_LOCAL``, respectively.
++
++   * Immediately dispatch the task to a custom DSQ by calling
++     ``scx_bpf_dispatch()`` with a DSQ ID which is smaller than 2^63.
++
++   * Queue the task on the BPF side.
++
++3. When a CPU is ready to schedule, it first looks at its local DSQ. If
++   empty, it then looks at the global DSQ. If there still isn't a task to
++   run, ``ops.dispatch()`` is invoked which can use the following two
++   functions to populate the local DSQ.
++
++   * ``scx_bpf_dispatch()`` dispatches a task to a DSQ. Any target DSQ can
++     be used - ``SCX_DSQ_LOCAL``, ``SCX_DSQ_LOCAL_ON | cpu``,
++     ``SCX_DSQ_GLOBAL`` or a custom DSQ. While ``scx_bpf_dispatch()``
++     currently can't be called with BPF locks held, this is being worked on
++     and will be supported. ``scx_bpf_dispatch()`` schedules dispatching
++     rather than performing them immediately. There can be up to
++     ``ops.dispatch_max_batch`` pending tasks.
++
++   * ``scx_bpf_consume()`` tranfers a task from the specified non-local DSQ
++     to the dispatching DSQ. This function cannot be called with any BPF
++     locks held. ``scx_bpf_consume()`` flushes the pending dispatched tasks
++     before trying to consume the specified DSQ.
++
++4. After ``ops.dispatch()`` returns, if there are tasks in the local DSQ,
++   the CPU runs the first one. If empty, the following steps are taken:
++
++   * Try to consume the global DSQ. If successful, run the task.
++
++   * If ``ops.dispatch()`` has dispatched any tasks, retry #3.
++
++   * If the previous task is an SCX task and still runnable, keep executing
++     it (see ``SCX_OPS_ENQ_LAST``).
++
++   * Go idle.
++
++Note that the BPF scheduler can always choose to dispatch tasks immediately
++in ``ops.enqueue()`` as illustrated in the above simple example. If only the
++built-in DSQs are used, there is no need to implement ``ops.dispatch()`` as
++a task is never queued on the BPF scheduler and both the local and global
++DSQs are consumed automatically.
++
++``scx_bpf_dispatch()`` queues the task on the FIFO of the target DSQ. Use
++``scx_bpf_dispatch_vtime()`` for the priority queue. Internal DSQs such as
++``SCX_DSQ_LOCAL`` and ``SCX_DSQ_GLOBAL`` do not support priority-queue
++dispatching, and must be dispatched to with ``scx_bpf_dispatch()``.  See the
++function documentation and usage in ``tools/sched_ext/scx_simple.bpf.c`` for
++more information.
++
++Where to Look
++=============
++
++* ``include/linux/sched/ext.h`` defines the core data structures, ops table
++  and constants.
++
++* ``kernel/sched/ext.c`` contains sched_ext core implementation and helpers.
++  The functions prefixed with ``scx_bpf_`` can be called from the BPF
++  scheduler.
++
++* ``tools/sched_ext/`` hosts example BPF scheduler implementations.
++
++  * ``scx_simple[.bpf].c``: Minimal global FIFO scheduler example using a
++    custom DSQ.
++
++  * ``scx_qmap[.bpf].c``: A multi-level FIFO scheduler supporting five
++    levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``.
++
++ABI Instability
++===============
++
++The APIs provided by sched_ext to BPF schedulers programs have no stability
++guarantees. This includes the ops table callbacks and constants defined in
++``include/linux/sched/ext.h``, as well as the ``scx_bpf_`` kfuncs defined in
++``kernel/sched/ext.c``.
++
++While we will attempt to provide a relatively stable API surface when
++possible, they are subject to change without warning between kernel
++versions.
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 16df466c205d..3345a15afded 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -20353,6 +20353,19 @@ F:	include/linux/wait.h
+ F:	include/uapi/linux/sched.h
+ F:	kernel/sched/
+ 
++SCHEDULER - SCHED_EXT
++R:	Tejun Heo <tj@kernel.org>
++R:	David Vernet <void@manifault.com>
++L:	linux-kernel@vger.kernel.org
++S:	Maintained
++W:	https://github.com/sched-ext/scx
++T:	git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
++F:	include/linux/sched/ext.h
++F:	kernel/sched/ext.h
++F:	kernel/sched/ext.c
++F:	tools/sched_ext/
++F:	tools/testing/selftests/sched_ext
++
+ SCIOSENSE ENS160 MULTI-GAS SENSOR DRIVER
+ M:	Gustavo Silva <gustavograzs@gmail.com>
+ S:	Maintained
+diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
+index 14f8f00fdcf9..930b04e3d148 100644
+--- a/drivers/tty/sysrq.c
++++ b/drivers/tty/sysrq.c
+@@ -531,6 +531,7 @@ static const struct sysrq_key_op *sysrq_key_table[62] = {
+ 	NULL,				/* P */
+ 	NULL,				/* Q */
+ 	&sysrq_replay_logs_op,		/* R */
++	/* S: May be registered by sched_ext for resetting */
+ 	NULL,				/* S */
+ 	NULL,				/* T */
+ 	NULL,				/* U */
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 1ae44793132a..19ec49a9179b 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -133,6 +133,7 @@
+ 	*(__dl_sched_class)			\
+ 	*(__rt_sched_class)			\
+ 	*(__fair_sched_class)			\
++	*(__ext_sched_class)			\
+ 	*(__idle_sched_class)			\
+ 	__sched_class_lowest = .;
+ 
+diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
+index c60ba0ab1462..7139b33cb104 100644
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -28,8 +28,6 @@
+ 
+ struct kernel_clone_args;
+ 
+-#ifdef CONFIG_CGROUPS
+-
+ /*
+  * All weight knobs on the default hierarchy should use the following min,
+  * default and max values.  The default value is the logarithmic center of
+@@ -39,6 +37,8 @@ struct kernel_clone_args;
+ #define CGROUP_WEIGHT_DFL		100
+ #define CGROUP_WEIGHT_MAX		10000
+ 
++#ifdef CONFIG_CGROUPS
++
+ enum {
+ 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
+ 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 1c771ea4481d..c5a7901b2580 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -82,6 +82,8 @@ struct task_group;
+ struct task_struct;
+ struct user_event_mm;
+ 
++#include <linux/sched/ext.h>
++
+ /*
+  * Task state bitmask. NOTE! These bits are also
+  * encoded in fs/proc/array.c: get_task_state().
+@@ -812,6 +814,9 @@ struct task_struct {
+ 	struct sched_rt_entity		rt;
+ 	struct sched_dl_entity		dl;
+ 	struct sched_dl_entity		*dl_server;
++#ifdef CONFIG_SCHED_CLASS_EXT
++	struct sched_ext_entity		scx;
++#endif
+ 	const struct sched_class	*sched_class;
+ 
+ #ifdef CONFIG_SCHED_CORE
+diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
+new file mode 100644
+index 000000000000..76166d3b14fc
+--- /dev/null
++++ b/include/linux/sched/ext.h
+@@ -0,0 +1,216 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#ifndef _LINUX_SCHED_EXT_H
++#define _LINUX_SCHED_EXT_H
++
++#ifdef CONFIG_SCHED_CLASS_EXT
++
++#include <linux/llist.h>
++#include <linux/rhashtable-types.h>
++
++enum scx_public_consts {
++	SCX_OPS_NAME_LEN	= 128,
++
++	SCX_SLICE_DFL		= 20 * 1000000,	/* 20ms */
++	SCX_SLICE_INF		= U64_MAX,	/* infinite, implies nohz */
++};
++
++/*
++ * DSQ (dispatch queue) IDs are 64bit of the format:
++ *
++ *   Bits: [63] [62 ..  0]
++ *         [ B] [   ID   ]
++ *
++ *    B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs
++ *   ID: 63 bit ID
++ *
++ * Built-in IDs:
++ *
++ *   Bits: [63] [62] [61..32] [31 ..  0]
++ *         [ 1] [ L] [   R  ] [    V   ]
++ *
++ *    1: 1 for built-in DSQs.
++ *    L: 1 for LOCAL_ON DSQ IDs, 0 for others
++ *    V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value.
++ */
++enum scx_dsq_id_flags {
++	SCX_DSQ_FLAG_BUILTIN	= 1LLU << 63,
++	SCX_DSQ_FLAG_LOCAL_ON	= 1LLU << 62,
++
++	SCX_DSQ_INVALID		= SCX_DSQ_FLAG_BUILTIN | 0,
++	SCX_DSQ_GLOBAL		= SCX_DSQ_FLAG_BUILTIN | 1,
++	SCX_DSQ_LOCAL		= SCX_DSQ_FLAG_BUILTIN | 2,
++	SCX_DSQ_LOCAL_ON	= SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON,
++	SCX_DSQ_LOCAL_CPU_MASK	= 0xffffffffLLU,
++};
++
++/*
++ * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered
++ * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to
++ * buffer between the scheduler core and the BPF scheduler. See the
++ * documentation for more details.
++ */
++struct scx_dispatch_q {
++	raw_spinlock_t		lock;
++	struct list_head	list;	/* tasks in dispatch order */
++	struct rb_root		priq;	/* used to order by p->scx.dsq_vtime */
++	u32			nr;
++	u32			seq;	/* used by BPF iter */
++	u64			id;
++	struct rhash_head	hash_node;
++	struct llist_node	free_node;
++	struct rcu_head		rcu;
++};
++
++/* scx_entity.flags */
++enum scx_ent_flags {
++	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
++	SCX_TASK_BAL_KEEP	= 1 << 1, /* balance decided to keep current */
++	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
++	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
++
++	SCX_TASK_STATE_SHIFT	= 8,	  /* bit 8 and 9 are used to carry scx_task_state */
++	SCX_TASK_STATE_BITS	= 2,
++	SCX_TASK_STATE_MASK	= ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT,
++
++	SCX_TASK_CURSOR		= 1 << 31, /* iteration cursor, not a task */
++};
++
++/* scx_entity.flags & SCX_TASK_STATE_MASK */
++enum scx_task_state {
++	SCX_TASK_NONE,		/* ops.init_task() not called yet */
++	SCX_TASK_INIT,		/* ops.init_task() succeeded, but task can be cancelled */
++	SCX_TASK_READY,		/* fully initialized, but not in sched_ext */
++	SCX_TASK_ENABLED,	/* fully initialized and in sched_ext */
++
++	SCX_TASK_NR_STATES,
++};
++
++/* scx_entity.dsq_flags */
++enum scx_ent_dsq_flags {
++	SCX_TASK_DSQ_ON_PRIQ	= 1 << 0, /* task is queued on the priority queue of a dsq */
++};
++
++/*
++ * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
++ * everywhere and the following bits track which kfunc sets are currently
++ * allowed for %current. This simple per-task tracking works because SCX ops
++ * nest in a limited way. BPF will likely implement a way to allow and disallow
++ * kfuncs depending on the calling context which will replace this manual
++ * mechanism. See scx_kf_allow().
++ */
++enum scx_kf_mask {
++	SCX_KF_UNLOCKED		= 0,	  /* sleepable and not rq locked */
++	/* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
++	SCX_KF_CPU_RELEASE	= 1 << 0, /* ops.cpu_release() */
++	/* ops.dequeue (in REST) may be nested inside DISPATCH */
++	SCX_KF_DISPATCH		= 1 << 1, /* ops.dispatch() */
++	SCX_KF_ENQUEUE		= 1 << 2, /* ops.enqueue() and ops.select_cpu() */
++	SCX_KF_SELECT_CPU	= 1 << 3, /* ops.select_cpu() */
++	SCX_KF_REST		= 1 << 4, /* other rq-locked operations */
++
++	__SCX_KF_RQ_LOCKED	= SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
++				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
++	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
++};
++
++enum scx_dsq_lnode_flags {
++	SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0,
++
++	/* high 16 bits can be for iter cursor flags */
++	__SCX_DSQ_LNODE_PRIV_SHIFT = 16,
++};
++
++struct scx_dsq_list_node {
++	struct list_head	node;
++	u32			flags;
++	u32			priv;		/* can be used by iter cursor */
++};
++
++/*
++ * The following is embedded in task_struct and contains all fields necessary
++ * for a task to be scheduled by SCX.
++ */
++struct sched_ext_entity {
++	struct scx_dispatch_q	*dsq;
++	struct scx_dsq_list_node dsq_list;	/* dispatch order */
++	struct rb_node		dsq_priq;	/* p->scx.dsq_vtime order */
++	u32			dsq_seq;
++	u32			dsq_flags;	/* protected by DSQ lock */
++	u32			flags;		/* protected by rq lock */
++	u32			weight;
++	s32			sticky_cpu;
++	s32			holding_cpu;
++	u32			kf_mask;	/* see scx_kf_mask above */
++	struct task_struct	*kf_tasks[2];	/* see SCX_CALL_OP_TASK() */
++	atomic_long_t		ops_state;
++
++	struct list_head	runnable_node;	/* rq->scx.runnable_list */
++	unsigned long		runnable_at;
++
++#ifdef CONFIG_SCHED_CORE
++	u64			core_sched_at;	/* see scx_prio_less() */
++#endif
++	u64			ddsp_dsq_id;
++	u64			ddsp_enq_flags;
++
++	/* BPF scheduler modifiable fields */
++
++	/*
++	 * Runtime budget in nsecs. This is usually set through
++	 * scx_bpf_dispatch() but can also be modified directly by the BPF
++	 * scheduler. Automatically decreased by SCX as the task executes. On
++	 * depletion, a scheduling event is triggered.
++	 *
++	 * This value is cleared to zero if the task is preempted by
++	 * %SCX_KICK_PREEMPT and shouldn't be used to determine how long the
++	 * task ran. Use p->se.sum_exec_runtime instead.
++	 */
++	u64			slice;
++
++	/*
++	 * Used to order tasks when dispatching to the vtime-ordered priority
++	 * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
++	 * but can also be modified directly by the BPF scheduler. Modifying it
++	 * while a task is queued on a dsq may mangle the ordering and is not
++	 * recommended.
++	 */
++	u64			dsq_vtime;
++
++	/*
++	 * If set, reject future sched_setscheduler(2) calls updating the policy
++	 * to %SCHED_EXT with -%EACCES.
++	 *
++	 * Can be set from ops.init_task() while the BPF scheduler is being
++	 * loaded (!scx_init_task_args->fork). If set and the task's policy is
++	 * already %SCHED_EXT, the task's policy is rejected and forcefully
++	 * reverted to %SCHED_NORMAL. The number of such events are reported
++	 * through /sys/kernel/debug/sched_ext::nr_rejected. Setting this flag
++	 * during fork is not allowed.
++	 */
++	bool			disallow;	/* reject switching into SCX */
++
++	/* cold fields */
++#ifdef CONFIG_EXT_GROUP_SCHED
++	struct cgroup		*cgrp_moving_from;
++#endif
++	/* must be the last field, see init_scx_entity() */
++	struct list_head	tasks_node;
++};
++
++void sched_ext_free(struct task_struct *p);
++void print_scx_info(const char *log_lvl, struct task_struct *p);
++
++#else	/* !CONFIG_SCHED_CLASS_EXT */
++
++static inline void sched_ext_free(struct task_struct *p) {}
++static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
++
++#endif	/* CONFIG_SCHED_CLASS_EXT */
++#endif	/* _LINUX_SCHED_EXT_H */
+diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
+index d362aacf9f89..0f2aeb37bbb0 100644
+--- a/include/linux/sched/task.h
++++ b/include/linux/sched/task.h
+@@ -63,7 +63,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
+ extern void init_idle(struct task_struct *idle, int cpu);
+ 
+ extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+-extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
++extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
++extern void sched_cancel_fork(struct task_struct *p);
+ extern void sched_post_fork(struct task_struct *p);
+ extern void sched_dead(struct task_struct *p);
+ 
+@@ -119,6 +120,11 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
+ 	return t;
+ }
+ 
++static inline struct task_struct *tryget_task_struct(struct task_struct *t)
++{
++	return refcount_inc_not_zero(&t->usage) ? t : NULL;
++}
++
+ extern void __put_task_struct(struct task_struct *t);
+ extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
+ 
+diff --git a/include/trace/events/sched_ext.h b/include/trace/events/sched_ext.h
+new file mode 100644
+index 000000000000..fe19da7315a9
+--- /dev/null
++++ b/include/trace/events/sched_ext.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM sched_ext
++
++#if !defined(_TRACE_SCHED_EXT_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_SCHED_EXT_H
++
++#include <linux/tracepoint.h>
++
++TRACE_EVENT(sched_ext_dump,
++
++	TP_PROTO(const char *line),
++
++	TP_ARGS(line),
++
++	TP_STRUCT__entry(
++		__string(line, line)
++	),
++
++	TP_fast_assign(
++		__assign_str(line);
++	),
++
++	TP_printk("%s",
++		__get_str(line)
++	)
++);
++
++#endif /* _TRACE_SCHED_EXT_H */
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
+index 3bac0a8ceab2..359a14cc76a4 100644
+--- a/include/uapi/linux/sched.h
++++ b/include/uapi/linux/sched.h
+@@ -118,6 +118,7 @@ struct clone_args {
+ /* SCHED_ISO: reserved but not implemented yet */
+ #define SCHED_IDLE		5
+ #define SCHED_DEADLINE		6
++#define SCHED_EXT		7
+ 
+ /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+ #define SCHED_RESET_ON_FORK     0x40000000
+diff --git a/init/Kconfig b/init/Kconfig
+index 08a0d51afaae..e1a88d48d652 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1028,9 +1028,13 @@ menuconfig CGROUP_SCHED
+ 	  tasks.
+ 
+ if CGROUP_SCHED
++config GROUP_SCHED_WEIGHT
++	def_bool n
++
+ config FAIR_GROUP_SCHED
+ 	bool "Group scheduling for SCHED_OTHER"
+ 	depends on CGROUP_SCHED
++	select GROUP_SCHED_WEIGHT
+ 	default CGROUP_SCHED
+ 
+ config CFS_BANDWIDTH
+@@ -1055,6 +1059,12 @@ config RT_GROUP_SCHED
+ 	  realtime bandwidth for them.
+ 	  See Documentation/scheduler/sched-rt-group.rst for more information.
+ 
++config EXT_GROUP_SCHED
++	bool
++	depends on SCHED_CLASS_EXT && CGROUP_SCHED
++	select GROUP_SCHED_WEIGHT
++	default y
++
+ endif #CGROUP_SCHED
+ 
+ config SCHED_MM_CID
+diff --git a/init/init_task.c b/init/init_task.c
+index eeb110c65fe2..e222722e790b 100644
+--- a/init/init_task.c
++++ b/init/init_task.c
+@@ -6,6 +6,7 @@
+ #include <linux/sched/sysctl.h>
+ #include <linux/sched/rt.h>
+ #include <linux/sched/task.h>
++#include <linux/sched/ext.h>
+ #include <linux/init.h>
+ #include <linux/fs.h>
+ #include <linux/mm.h>
+@@ -98,6 +99,17 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
+ #endif
+ #ifdef CONFIG_CGROUP_SCHED
+ 	.sched_task_group = &root_task_group,
++#endif
++#ifdef CONFIG_SCHED_CLASS_EXT
++	.scx		= {
++		.dsq_list.node	= LIST_HEAD_INIT(init_task.scx.dsq_list.node),
++		.sticky_cpu	= -1,
++		.holding_cpu	= -1,
++		.runnable_node	= LIST_HEAD_INIT(init_task.scx.runnable_node),
++		.runnable_at	= INITIAL_JIFFIES,
++		.ddsp_dsq_id	= SCX_DSQ_INVALID,
++		.slice		= SCX_SLICE_DFL,
++	},
+ #endif
+ 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
+ 	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
+diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
+index c2f1fd95a821..fe782cd77388 100644
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -133,4 +133,29 @@ config SCHED_CORE
+ 	  which is the likely usage by Linux distributions, there should
+ 	  be no measurable impact on performance.
+ 
+-
++config SCHED_CLASS_EXT
++	bool "Extensible Scheduling Class"
++	depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF
++	select STACKTRACE if STACKTRACE_SUPPORT
++	help
++	  This option enables a new scheduler class sched_ext (SCX), which
++	  allows scheduling policies to be implemented as BPF programs to
++	  achieve the following:
++
++	  - Ease of experimentation and exploration: Enabling rapid
++	    iteration of new scheduling policies.
++	  - Customization: Building application-specific schedulers which
++	    implement policies that are not applicable to general-purpose
++	    schedulers.
++	  - Rapid scheduler deployments: Non-disruptive swap outs of
++	    scheduling policies in production environments.
++
++	  sched_ext leverages BPF struct_ops feature to define a structure
++	  which exports function callbacks and flags to BPF programs that
++	  wish to implement scheduling policies. The struct_ops structure
++	  exported by sched_ext is struct sched_ext_ops, and is conceptually
++	  similar to struct sched_class.
++
++	  For more information:
++	    Documentation/scheduler/sched-ext.rst
++	    https://github.com/sched-ext/scx
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 003de4829c15..eb290420d926 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -23,6 +23,7 @@
+ #include <linux/sched/task.h>
+ #include <linux/sched/task_stack.h>
+ #include <linux/sched/cputime.h>
++#include <linux/sched/ext.h>
+ #include <linux/seq_file.h>
+ #include <linux/rtmutex.h>
+ #include <linux/init.h>
+@@ -973,6 +974,7 @@ void __put_task_struct(struct task_struct *tsk)
+ 	WARN_ON(refcount_read(&tsk->usage));
+ 	WARN_ON(tsk == current);
+ 
++	sched_ext_free(tsk);
+ 	io_uring_free(tsk);
+ 	cgroup_free(tsk);
+ 	task_numa_free(tsk, true);
+@@ -2352,7 +2354,7 @@ __latent_entropy struct task_struct *copy_process(
+ 
+ 	retval = perf_event_init_task(p, clone_flags);
+ 	if (retval)
+-		goto bad_fork_cleanup_policy;
++		goto bad_fork_sched_cancel_fork;
+ 	retval = audit_alloc(p);
+ 	if (retval)
+ 		goto bad_fork_cleanup_perf;
+@@ -2485,7 +2487,9 @@ __latent_entropy struct task_struct *copy_process(
+ 	 * cgroup specific, it unconditionally needs to place the task on a
+ 	 * runqueue.
+ 	 */
+-	sched_cgroup_fork(p, args);
++	retval = sched_cgroup_fork(p, args);
++	if (retval)
++		goto bad_fork_cancel_cgroup;
+ 
+ 	/*
+ 	 * From this point on we must avoid any synchronous user-space
+@@ -2531,13 +2535,13 @@ __latent_entropy struct task_struct *copy_process(
+ 	/* Don't start children in a dying pid namespace */
+ 	if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
+ 		retval = -ENOMEM;
+-		goto bad_fork_cancel_cgroup;
++		goto bad_fork_core_free;
+ 	}
+ 
+ 	/* Let kill terminate clone/fork in the middle */
+ 	if (fatal_signal_pending(current)) {
+ 		retval = -EINTR;
+-		goto bad_fork_cancel_cgroup;
++		goto bad_fork_core_free;
+ 	}
+ 
+ 	/* No more failure paths after this point. */
+@@ -2611,10 +2615,11 @@ __latent_entropy struct task_struct *copy_process(
+ 
+ 	return p;
+ 
+-bad_fork_cancel_cgroup:
++bad_fork_core_free:
+ 	sched_core_free(p);
+ 	spin_unlock(&current->sighand->siglock);
+ 	write_unlock_irq(&tasklist_lock);
++bad_fork_cancel_cgroup:
+ 	cgroup_cancel_fork(p, args);
+ bad_fork_put_pidfd:
+ 	if (clone_flags & CLONE_PIDFD) {
+@@ -2653,6 +2658,8 @@ __latent_entropy struct task_struct *copy_process(
+ 	audit_free(p);
+ bad_fork_cleanup_perf:
+ 	perf_event_free_task(p);
++bad_fork_sched_cancel_fork:
++	sched_cancel_fork(p);
+ bad_fork_cleanup_policy:
+ 	lockdep_free_task(p);
+ #ifdef CONFIG_NUMA
+diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
+index 39c315182b35..fae1f5c921eb 100644
+--- a/kernel/sched/build_policy.c
++++ b/kernel/sched/build_policy.c
+@@ -16,18 +16,25 @@
+ #include <linux/sched/clock.h>
+ #include <linux/sched/cputime.h>
+ #include <linux/sched/hotplug.h>
++#include <linux/sched/isolation.h>
+ #include <linux/sched/posix-timers.h>
+ #include <linux/sched/rt.h>
+ 
+ #include <linux/cpuidle.h>
+ #include <linux/jiffies.h>
++#include <linux/kobject.h>
+ #include <linux/livepatch.h>
++#include <linux/pm.h>
+ #include <linux/psi.h>
++#include <linux/rhashtable.h>
++#include <linux/seq_buf.h>
+ #include <linux/seqlock_api.h>
+ #include <linux/slab.h>
+ #include <linux/suspend.h>
+ #include <linux/tsacct_kern.h>
+ #include <linux/vtime.h>
++#include <linux/sysrq.h>
++#include <linux/percpu-rwsem.h>
+ 
+ #include <uapi/linux/sched/types.h>
+ 
+@@ -52,4 +59,8 @@
+ #include "cputime.c"
+ #include "deadline.c"
+ 
++#ifdef CONFIG_SCHED_CLASS_EXT
++# include "ext.c"
++#endif
++
+ #include "syscalls.c"
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 1af59cf714cd..8ae04bd4a5a4 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -169,7 +169,10 @@ static inline int __task_prio(const struct task_struct *p)
+ 	if (p->sched_class == &idle_sched_class)
+ 		return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
+ 
+-	return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
++	if (task_on_scx(p))
++		return MAX_RT_PRIO + MAX_NICE + 1; /* 120, squash ext */
++
++	return MAX_RT_PRIO + MAX_NICE; /* 119, squash fair */
+ }
+ 
+ /*
+@@ -198,6 +201,11 @@ static inline bool prio_less(const struct task_struct *a,
+ 	if (pa == MAX_RT_PRIO + MAX_NICE)	/* fair */
+ 		return cfs_prio_less(a, b, in_fi);
+ 
++#ifdef CONFIG_SCHED_CLASS_EXT
++	if (pa == MAX_RT_PRIO + MAX_NICE + 1)	/* ext */
++		return scx_prio_less(a, b, in_fi);
++#endif
++
+ 	return false;
+ }
+ 
+@@ -1255,11 +1263,14 @@ bool sched_can_stop_tick(struct rq *rq)
+ 		return true;
+ 
+ 	/*
+-	 * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left;
+-	 * if there's more than one we need the tick for involuntary
+-	 * preemption.
++	 * If there are no DL,RR/FIFO tasks, there must only be CFS or SCX tasks
++	 * left. For CFS, if there's more than one we need the tick for
++	 * involuntary preemption. For SCX, ask.
+ 	 */
+-	if (rq->nr_running > 1)
++	if (scx_enabled() && !scx_can_stop_tick(rq))
++		return false;
++
++	if (rq->cfs.nr_running > 1)
+ 		return false;
+ 
+ 	/*
+@@ -1341,8 +1352,8 @@ void set_load_weight(struct task_struct *p, bool update_load)
+ 	 * SCHED_OTHER tasks have to update their load when changing their
+ 	 * weight
+ 	 */
+-	if (update_load && p->sched_class == &fair_sched_class)
+-		reweight_task(p, &lw);
++	if (update_load && p->sched_class->reweight_task)
++		p->sched_class->reweight_task(task_rq(p), p, &lw);
+ 	else
+ 		p->se.load = lw;
+ }
+@@ -2031,6 +2042,17 @@ inline int task_curr(const struct task_struct *p)
+ 	return cpu_curr(task_cpu(p)) == p;
+ }
+ 
++/*
++ * ->switching_to() is called with the pi_lock and rq_lock held and must not
++ * mess with locking.
++ */
++void check_class_changing(struct rq *rq, struct task_struct *p,
++			  const struct sched_class *prev_class)
++{
++	if (prev_class != p->sched_class && p->sched_class->switching_to)
++		p->sched_class->switching_to(rq, p);
++}
++
+ /*
+  * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
+  * use the balance_callback list if you want balancing.
+@@ -2289,7 +2311,7 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
+ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+ {
+ 	/* When not in the task's cpumask, no point in looking further. */
+-	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
++	if (!task_allowed_on_cpu(p, cpu))
+ 		return false;
+ 
+ 	/* migrate_disabled() must be allowed to finish. */
+@@ -2298,7 +2320,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+ 
+ 	/* Non kernel threads are not allowed during either online or offline. */
+ 	if (!(p->flags & PF_KTHREAD))
+-		return cpu_active(cpu) && task_cpu_possible(cpu, p);
++		return cpu_active(cpu);
+ 
+ 	/* KTHREAD_IS_PER_CPU is always allowed. */
+ 	if (kthread_is_per_cpu(p))
+@@ -3775,6 +3797,15 @@ bool cpus_share_resources(int this_cpu, int that_cpu)
+ 
+ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
+ {
++	/*
++	 * The BPF scheduler may depend on select_task_rq() being invoked during
++	 * wakeups. In addition, @p may end up executing on a different CPU
++	 * regardless of what happens in the wakeup path making the ttwu_queue
++	 * optimization less meaningful. Skip if on SCX.
++	 */
++	if (task_on_scx(p))
++		return false;
++
+ 	/*
+ 	 * Do not complicate things with the async wake_list while the CPU is
+ 	 * in hotplug state.
+@@ -4342,6 +4373,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	p->rt.on_rq		= 0;
+ 	p->rt.on_list		= 0;
+ 
++#ifdef CONFIG_SCHED_CLASS_EXT
++	init_scx_entity(&p->scx);
++#endif
++
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
+@@ -4582,10 +4617,18 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 
+ 	if (dl_prio(p->prio))
+ 		return -EAGAIN;
+-	else if (rt_prio(p->prio))
++
++	scx_pre_fork(p);
++
++	if (rt_prio(p->prio)) {
+ 		p->sched_class = &rt_sched_class;
+-	else
++#ifdef CONFIG_SCHED_CLASS_EXT
++	} else if (task_should_scx(p)) {
++		p->sched_class = &ext_sched_class;
++#endif
++	} else {
+ 		p->sched_class = &fair_sched_class;
++	}
+ 
+ 	init_entity_runnable_average(&p->se);
+ 
+@@ -4605,7 +4648,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	return 0;
+ }
+ 
+-void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
++int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
+ {
+ 	unsigned long flags;
+ 
+@@ -4632,11 +4675,19 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
+ 	if (p->sched_class->task_fork)
+ 		p->sched_class->task_fork(p);
+ 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++	return scx_fork(p);
++}
++
++void sched_cancel_fork(struct task_struct *p)
++{
++	scx_cancel_fork(p);
+ }
+ 
+ void sched_post_fork(struct task_struct *p)
+ {
+ 	uclamp_post_fork(p);
++	scx_post_fork(p);
+ }
+ 
+ unsigned long to_ratio(u64 period, u64 runtime)
+@@ -5469,6 +5520,7 @@ void sched_tick(void)
+ 	calc_global_load_tick(rq);
+ 	sched_core_tick(rq);
+ 	task_tick_mm_cid(rq, curr);
++	scx_tick(rq);
+ 
+ 	rq_unlock(rq, &rf);
+ 
+@@ -5481,8 +5533,10 @@ void sched_tick(void)
+ 		wq_worker_tick(curr);
+ 
+ #ifdef CONFIG_SMP
+-	rq->idle_balance = idle_cpu(cpu);
+-	sched_balance_trigger(rq);
++	if (!scx_switched_all()) {
++		rq->idle_balance = idle_cpu(cpu);
++		sched_balance_trigger(rq);
++	}
+ #endif
+ }
+ 
+@@ -5772,8 +5826,19 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
+ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
+ 				  struct rq_flags *rf)
+ {
+-#ifdef CONFIG_SMP
++	const struct sched_class *start_class = prev->sched_class;
+ 	const struct sched_class *class;
++
++#ifdef CONFIG_SCHED_CLASS_EXT
++	/*
++	 * SCX requires a balance() call before every pick_next_task() including
++	 * when waking up from SCHED_IDLE. If @start_class is below SCX, start
++	 * from SCX instead.
++	 */
++	if (scx_enabled() && sched_class_above(&ext_sched_class, start_class))
++		start_class = &ext_sched_class;
++#endif
++
+ 	/*
+ 	 * We must do the balancing pass before put_prev_task(), such
+ 	 * that when we release the rq->lock the task is in the same
+@@ -5782,11 +5847,10 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
+ 	 * We can terminate the balance pass as soon as we know there is
+ 	 * a runnable task of @class priority or higher.
+ 	 */
+-	for_class_range(class, prev->sched_class, &idle_sched_class) {
+-		if (class->balance(rq, prev, rf))
++	for_active_class_range(class, start_class, &idle_sched_class) {
++		if (class->balance && class->balance(rq, prev, rf))
+ 			break;
+ 	}
+-#endif
+ 
+ 	put_prev_task(rq, prev);
+ 
+@@ -5808,6 +5872,9 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ 	const struct sched_class *class;
+ 	struct task_struct *p;
+ 
++	if (scx_enabled())
++		goto restart;
++
+ 	/*
+ 	 * Optimization: we know that if all tasks are in the fair class we can
+ 	 * call that function directly, but only if the @prev task wasn't of a
+@@ -5847,10 +5914,15 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ restart:
+ 	put_prev_task_balance(rq, prev, rf);
+ 
+-	for_each_class(class) {
++	for_each_active_class(class) {
+ 		p = class->pick_next_task(rq);
+-		if (p)
++		if (p) {
++			const struct sched_class *prev_class = prev->sched_class;
++
++			if (class != prev_class && prev_class->switch_class)
++				prev_class->switch_class(rq, p);
+ 			return p;
++		}
+ 	}
+ 
+ 	BUG(); /* The idle class should always have a runnable task. */
+@@ -5880,7 +5952,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
+ 	const struct sched_class *class;
+ 	struct task_struct *p;
+ 
+-	for_each_class(class) {
++	for_each_active_class(class) {
+ 		p = class->pick_task(rq);
+ 		if (p)
+ 			return p;
+@@ -6877,6 +6949,10 @@ void __setscheduler_prio(struct task_struct *p, int prio)
+ 		p->sched_class = &dl_sched_class;
+ 	else if (rt_prio(prio))
+ 		p->sched_class = &rt_sched_class;
++#ifdef CONFIG_SCHED_CLASS_EXT
++	else if (task_should_scx(p))
++		p->sched_class = &ext_sched_class;
++#endif
+ 	else
+ 		p->sched_class = &fair_sched_class;
+ 
+@@ -7022,6 +7098,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
+ 	}
+ 
+ 	__setscheduler_prio(p, prio);
++	check_class_changing(rq, p, prev_class);
+ 
+ 	if (queued)
+ 		enqueue_task(rq, p, queue_flag);
+@@ -7436,6 +7513,7 @@ void sched_show_task(struct task_struct *p)
+ 
+ 	print_worker_info(KERN_INFO, p);
+ 	print_stop_info(KERN_INFO, p);
++	print_scx_info(KERN_INFO, p);
+ 	show_stack(p, NULL, KERN_INFO);
+ 	put_task_stack(p);
+ }
+@@ -7964,6 +8042,8 @@ int sched_cpu_activate(unsigned int cpu)
+ 		cpuset_cpu_active();
+ 	}
+ 
++	scx_rq_activate(rq);
++
+ 	/*
+ 	 * Put the rq online, if not already. This happens:
+ 	 *
+@@ -8013,6 +8093,8 @@ int sched_cpu_deactivate(unsigned int cpu)
+ 
+ 	sched_set_rq_offline(rq, cpu);
+ 
++	scx_rq_deactivate(rq);
++
+ 	/*
+ 	 * When going down, decrement the number of cores with SMT present.
+ 	 */
+@@ -8197,11 +8279,15 @@ void __init sched_init(void)
+ 	int i;
+ 
+ 	/* Make sure the linker didn't screw up */
+-	BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
+-	       &fair_sched_class != &rt_sched_class + 1 ||
+-	       &rt_sched_class   != &dl_sched_class + 1);
+ #ifdef CONFIG_SMP
+-	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
++	BUG_ON(!sched_class_above(&stop_sched_class, &dl_sched_class));
++#endif
++	BUG_ON(!sched_class_above(&dl_sched_class, &rt_sched_class));
++	BUG_ON(!sched_class_above(&rt_sched_class, &fair_sched_class));
++	BUG_ON(!sched_class_above(&fair_sched_class, &idle_sched_class));
++#ifdef CONFIG_SCHED_CLASS_EXT
++	BUG_ON(!sched_class_above(&fair_sched_class, &ext_sched_class));
++	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
+ #endif
+ 
+ 	wait_bit_init();
+@@ -8225,6 +8311,9 @@ void __init sched_init(void)
+ 		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
+ 		init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
+ #endif /* CONFIG_FAIR_GROUP_SCHED */
++#ifdef CONFIG_EXT_GROUP_SCHED
++		root_task_group.scx_weight = CGROUP_WEIGHT_DFL;
++#endif /* CONFIG_EXT_GROUP_SCHED */
+ #ifdef CONFIG_RT_GROUP_SCHED
+ 		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
+ 		ptr += nr_cpu_ids * sizeof(void **);
+@@ -8370,6 +8459,7 @@ void __init sched_init(void)
+ 	balance_push_set(smp_processor_id(), false);
+ #endif
+ 	init_sched_fair_class();
++	init_sched_ext_class();
+ 
+ 	psi_init();
+ 
+@@ -8655,6 +8745,7 @@ struct task_group *sched_create_group(struct task_group *parent)
+ 	if (!alloc_rt_sched_group(tg, parent))
+ 		goto err;
+ 
++	scx_group_set_weight(tg, CGROUP_WEIGHT_DFL);
+ 	alloc_uclamp_sched_group(tg, parent);
+ 
+ 	return tg;
+@@ -8782,6 +8873,7 @@ void sched_move_task(struct task_struct *tsk)
+ 		put_prev_task(rq, tsk);
+ 
+ 	sched_change_group(tsk, group);
++	scx_move_task(tsk);
+ 
+ 	if (queued)
+ 		enqueue_task(rq, tsk, queue_flags);
+@@ -8796,11 +8888,6 @@ void sched_move_task(struct task_struct *tsk)
+ 	}
+ }
+ 
+-static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
+-{
+-	return css ? container_of(css, struct task_group, css) : NULL;
+-}
+-
+ static struct cgroup_subsys_state *
+ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+ {
+@@ -8824,6 +8911,11 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+ {
+ 	struct task_group *tg = css_tg(css);
+ 	struct task_group *parent = css_tg(css->parent);
++	int ret;
++
++	ret = scx_tg_online(tg);
++	if (ret)
++		return ret;
+ 
+ 	if (parent)
+ 		sched_online_group(tg, parent);
+@@ -8838,6 +8930,13 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+ 	return 0;
+ }
+ 
++static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
++{
++	struct task_group *tg = css_tg(css);
++
++	scx_tg_offline(tg);
++}
++
+ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
+ {
+ 	struct task_group *tg = css_tg(css);
+@@ -8855,9 +8954,9 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
+ 	sched_unregister_group(tg);
+ }
+ 
+-#ifdef CONFIG_RT_GROUP_SCHED
+ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
+ {
++#ifdef CONFIG_RT_GROUP_SCHED
+ 	struct task_struct *task;
+ 	struct cgroup_subsys_state *css;
+ 
+@@ -8865,9 +8964,9 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
+ 		if (!sched_rt_can_attach(css_tg(css), task))
+ 			return -EINVAL;
+ 	}
+-	return 0;
+-}
+ #endif
++	return scx_cgroup_can_attach(tset);
++}
+ 
+ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
+ {
+@@ -8876,6 +8975,13 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
+ 
+ 	cgroup_taskset_for_each(task, css, tset)
+ 		sched_move_task(task);
++
++	scx_cgroup_finish_attach();
++}
++
++static void cpu_cgroup_cancel_attach(struct cgroup_taskset *tset)
++{
++	scx_cgroup_cancel_attach(tset);
+ }
+ 
+ #ifdef CONFIG_UCLAMP_TASK_GROUP
+@@ -9052,22 +9158,36 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
+ }
+ #endif /* CONFIG_UCLAMP_TASK_GROUP */
+ 
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
++static unsigned long tg_weight(struct task_group *tg)
++{
+ #ifdef CONFIG_FAIR_GROUP_SCHED
++	return scale_load_down(tg->shares);
++#else
++	return sched_weight_from_cgroup(tg->scx_weight);
++#endif
++}
++
+ static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
+ 				struct cftype *cftype, u64 shareval)
+ {
++	int ret;
++
+ 	if (shareval > scale_load_down(ULONG_MAX))
+ 		shareval = MAX_SHARES;
+-	return sched_group_set_shares(css_tg(css), scale_load(shareval));
++	ret = sched_group_set_shares(css_tg(css), scale_load(shareval));
++	if (!ret)
++		scx_group_set_weight(css_tg(css),
++				     sched_weight_to_cgroup(shareval));
++	return ret;
+ }
+ 
+ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
+ 			       struct cftype *cft)
+ {
+-	struct task_group *tg = css_tg(css);
+-
+-	return (u64) scale_load_down(tg->shares);
++	return tg_weight(css_tg(css));
+ }
++#endif /* CONFIG_GROUP_SCHED_WEIGHT */
+ 
+ #ifdef CONFIG_CFS_BANDWIDTH
+ static DEFINE_MUTEX(cfs_constraints_mutex);
+@@ -9413,7 +9533,6 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
+ 	return 0;
+ }
+ #endif /* CONFIG_CFS_BANDWIDTH */
+-#endif /* CONFIG_FAIR_GROUP_SCHED */
+ 
+ #ifdef CONFIG_RT_GROUP_SCHED
+ static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
+@@ -9441,7 +9560,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-#ifdef CONFIG_FAIR_GROUP_SCHED
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
+ static s64 cpu_idle_read_s64(struct cgroup_subsys_state *css,
+ 			       struct cftype *cft)
+ {
+@@ -9451,12 +9570,17 @@ static s64 cpu_idle_read_s64(struct cgroup_subsys_state *css,
+ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
+ 				struct cftype *cft, s64 idle)
+ {
+-	return sched_group_set_idle(css_tg(css), idle);
++	int ret;
++
++	ret = sched_group_set_idle(css_tg(css), idle);
++	if (!ret)
++		scx_group_set_idle(css_tg(css), idle);
++	return ret;
+ }
+ #endif
+ 
+ static struct cftype cpu_legacy_files[] = {
+-#ifdef CONFIG_FAIR_GROUP_SCHED
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
+ 	{
+ 		.name = "shares",
+ 		.read_u64 = cpu_shares_read_u64,
+@@ -9566,38 +9690,35 @@ static int cpu_local_stat_show(struct seq_file *sf,
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_FAIR_GROUP_SCHED
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
++
+ static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+ 			       struct cftype *cft)
+ {
+-	struct task_group *tg = css_tg(css);
+-	u64 weight = scale_load_down(tg->shares);
+-
+-	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++	return sched_weight_to_cgroup(tg_weight(css_tg(css)));
+ }
+ 
+ static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+-				struct cftype *cft, u64 weight)
++				struct cftype *cft, u64 cgrp_weight)
+ {
+-	/*
+-	 * cgroup weight knobs should use the common MIN, DFL and MAX
+-	 * values which are 1, 100 and 10000 respectively.  While it loses
+-	 * a bit of range on both ends, it maps pretty well onto the shares
+-	 * value used by scheduler and the round-trip conversions preserve
+-	 * the original value over the entire range.
+-	 */
+-	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++	unsigned long weight;
++	int ret;
++
++	if (cgrp_weight < CGROUP_WEIGHT_MIN || cgrp_weight > CGROUP_WEIGHT_MAX)
+ 		return -ERANGE;
+ 
+-	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++	weight = sched_weight_from_cgroup(cgrp_weight);
+ 
+-	return sched_group_set_shares(css_tg(css), scale_load(weight));
++	ret = sched_group_set_shares(css_tg(css), scale_load(weight));
++	if (!ret)
++		scx_group_set_weight(css_tg(css), cgrp_weight);
++	return ret;
+ }
+ 
+ static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
+ 				    struct cftype *cft)
+ {
+-	unsigned long weight = scale_load_down(css_tg(css)->shares);
++	unsigned long weight = tg_weight(css_tg(css));
+ 	int last_delta = INT_MAX;
+ 	int prio, delta;
+ 
+@@ -9616,7 +9737,7 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
+ 				     struct cftype *cft, s64 nice)
+ {
+ 	unsigned long weight;
+-	int idx;
++	int idx, ret;
+ 
+ 	if (nice < MIN_NICE || nice > MAX_NICE)
+ 		return -ERANGE;
+@@ -9625,9 +9746,13 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
+ 	idx = array_index_nospec(idx, 40);
+ 	weight = sched_prio_to_weight[idx];
+ 
+-	return sched_group_set_shares(css_tg(css), scale_load(weight));
++	ret = sched_group_set_shares(css_tg(css), scale_load(weight));
++	if (!ret)
++		scx_group_set_weight(css_tg(css),
++				     sched_weight_to_cgroup(weight));
++	return ret;
+ }
+-#endif
++#endif /* CONFIG_GROUP_SCHED_WEIGHT */
+ 
+ static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+ 						  long period, long quota)
+@@ -9687,7 +9812,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
+ #endif
+ 
+ static struct cftype cpu_files[] = {
+-#ifdef CONFIG_FAIR_GROUP_SCHED
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
+ 	{
+ 		.name = "weight",
+ 		.flags = CFTYPE_NOT_ON_ROOT,
+@@ -9741,14 +9866,14 @@ static struct cftype cpu_files[] = {
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_online	= cpu_cgroup_css_online,
++	.css_offline	= cpu_cgroup_css_offline,
+ 	.css_released	= cpu_cgroup_css_released,
+ 	.css_free	= cpu_cgroup_css_free,
+ 	.css_extra_stat_show = cpu_extra_stat_show,
+ 	.css_local_stat_show = cpu_local_stat_show,
+-#ifdef CONFIG_RT_GROUP_SCHED
+ 	.can_attach	= cpu_cgroup_can_attach,
+-#endif
+ 	.attach		= cpu_cgroup_attach,
++	.cancel_attach	= cpu_cgroup_cancel_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
+ 	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
+@@ -10338,3 +10463,38 @@ void sched_mm_cid_fork(struct task_struct *t)
+ 	t->mm_cid_active = 1;
+ }
+ #endif
++
++#ifdef CONFIG_SCHED_CLASS_EXT
++void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
++			    struct sched_enq_and_set_ctx *ctx)
++{
++	struct rq *rq = task_rq(p);
++
++	lockdep_assert_rq_held(rq);
++
++	*ctx = (struct sched_enq_and_set_ctx){
++		.p = p,
++		.queue_flags = queue_flags,
++		.queued = task_on_rq_queued(p),
++		.running = task_current(rq, p),
++	};
++
++	update_rq_clock(rq);
++	if (ctx->queued)
++		dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK);
++	if (ctx->running)
++		put_prev_task(rq, p);
++}
++
++void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
++{
++	struct rq *rq = task_rq(ctx->p);
++
++	lockdep_assert_rq_held(rq);
++
++	if (ctx->queued)
++		enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK);
++	if (ctx->running)
++		set_next_task(rq, ctx->p);
++}
++#endif	/* CONFIG_SCHED_CLASS_EXT */
+diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
+index eece6244f9d2..e683e5d08daa 100644
+--- a/kernel/sched/cpufreq_schedutil.c
++++ b/kernel/sched/cpufreq_schedutil.c
+@@ -197,8 +197,10 @@ unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual,
+ 
+ static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
+ {
+-	unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
++	unsigned long min, max, util = scx_cpuperf_target(sg_cpu->cpu);
+ 
++	if (!scx_switched_all())
++		util += cpu_util_cfs_boost(sg_cpu->cpu);
+ 	util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
+ 	util = max(util, boost);
+ 	sg_cpu->bw_min = min;
+@@ -325,16 +327,35 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ }
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+-static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
++static bool sugov_hold_freq(struct sugov_cpu *sg_cpu)
+ {
+-	unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
+-	bool ret = idle_calls == sg_cpu->saved_idle_calls;
++	unsigned long idle_calls;
++	bool ret;
++
++	/*
++	 * The heuristics in this function is for the fair class. For SCX, the
++	 * performance target comes directly from the BPF scheduler. Let's just
++	 * follow it.
++	 */
++	if (scx_switched_all())
++		return false;
++
++	/* if capped by uclamp_max, always update to be in compliance */
++	if (uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)))
++		return false;
++
++	/*
++	 * Maintain the frequency if the CPU has not been idle recently, as
++	 * reduction is likely to be premature.
++	 */
++	idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
++	ret = idle_calls == sg_cpu->saved_idle_calls;
+ 
+ 	sg_cpu->saved_idle_calls = idle_calls;
+ 	return ret;
+ }
+ #else
+-static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
++static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; }
+ #endif /* CONFIG_NO_HZ_COMMON */
+ 
+ /*
+@@ -382,14 +403,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
+ 		return;
+ 
+ 	next_f = get_next_freq(sg_policy, sg_cpu->util, max_cap);
+-	/*
+-	 * Do not reduce the frequency if the CPU has not been idle
+-	 * recently, as the reduction is likely to be premature then.
+-	 *
+-	 * Except when the rq is capped by uclamp_max.
+-	 */
+-	if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
+-	    sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq &&
++
++	if (sugov_hold_freq(sg_cpu) && next_f < sg_policy->next_freq &&
+ 	    !sg_policy->need_freq_update) {
+ 		next_f = sg_policy->next_freq;
+ 
+@@ -436,14 +451,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
+ 	if (!sugov_update_single_common(sg_cpu, time, max_cap, flags))
+ 		return;
+ 
+-	/*
+-	 * Do not reduce the target performance level if the CPU has not been
+-	 * idle recently, as the reduction is likely to be premature then.
+-	 *
+-	 * Except when the rq is capped by uclamp_max.
+-	 */
+-	if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
+-	    sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
++	if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
+ 		sg_cpu->util = prev_util;
+ 
+ 	cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min,
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index c1eb9a1afd13..c057ef46c5f8 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -1090,6 +1090,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ 		P(dl.runtime);
+ 		P(dl.deadline);
+ 	}
++#ifdef CONFIG_SCHED_CLASS_EXT
++	__PS("ext.enabled", task_on_scx(p));
++#endif
+ #undef PN_SCHEDSTAT
+ #undef P_SCHEDSTAT
+ 
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+new file mode 100644
+index 000000000000..5fae2292ec29
+--- /dev/null
++++ b/kernel/sched/ext.c
+@@ -0,0 +1,7281 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#define SCX_OP_IDX(op)		(offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
++
++enum scx_consts {
++	SCX_DSP_DFL_MAX_BATCH		= 32,
++	SCX_DSP_MAX_LOOPS		= 32,
++	SCX_WATCHDOG_MAX_TIMEOUT	= 30 * HZ,
++
++	SCX_EXIT_BT_LEN			= 64,
++	SCX_EXIT_MSG_LEN		= 1024,
++	SCX_EXIT_DUMP_DFL_LEN		= 32768,
++
++	SCX_CPUPERF_ONE			= SCHED_CAPACITY_SCALE,
++
++	/*
++	 * Iterating all tasks may take a while. Periodically drop
++	 * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
++	 */
++	SCX_OPS_TASK_ITER_BATCH		= 32,
++};
++
++enum scx_exit_kind {
++	SCX_EXIT_NONE,
++	SCX_EXIT_DONE,
++
++	SCX_EXIT_UNREG = 64,	/* user-space initiated unregistration */
++	SCX_EXIT_UNREG_BPF,	/* BPF-initiated unregistration */
++	SCX_EXIT_UNREG_KERN,	/* kernel-initiated unregistration */
++	SCX_EXIT_SYSRQ,		/* requested by 'S' sysrq */
++
++	SCX_EXIT_ERROR = 1024,	/* runtime error, error msg contains details */
++	SCX_EXIT_ERROR_BPF,	/* ERROR but triggered through scx_bpf_error() */
++	SCX_EXIT_ERROR_STALL,	/* watchdog detected stalled runnable tasks */
++};
++
++/*
++ * An exit code can be specified when exiting with scx_bpf_exit() or
++ * scx_ops_exit(), corresponding to exit_kind UNREG_BPF and UNREG_KERN
++ * respectively. The codes are 64bit of the format:
++ *
++ *   Bits: [63  ..  48 47   ..  32 31 .. 0]
++ *         [ SYS ACT ] [ SYS RSN ] [ USR  ]
++ *
++ *   SYS ACT: System-defined exit actions
++ *   SYS RSN: System-defined exit reasons
++ *   USR    : User-defined exit codes and reasons
++ *
++ * Using the above, users may communicate intention and context by ORing system
++ * actions and/or system reasons with a user-defined exit code.
++ */
++enum scx_exit_code {
++	/* Reasons */
++	SCX_ECODE_RSN_HOTPLUG	= 1LLU << 32,
++
++	/* Actions */
++	SCX_ECODE_ACT_RESTART	= 1LLU << 48,
++};
++
++/*
++ * scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
++ * being disabled.
++ */
++struct scx_exit_info {
++	/* %SCX_EXIT_* - broad category of the exit reason */
++	enum scx_exit_kind	kind;
++
++	/* exit code if gracefully exiting */
++	s64			exit_code;
++
++	/* textual representation of the above */
++	const char		*reason;
++
++	/* backtrace if exiting due to an error */
++	unsigned long		*bt;
++	u32			bt_len;
++
++	/* informational message */
++	char			*msg;
++
++	/* debug dump */
++	char			*dump;
++};
++
++/* sched_ext_ops.flags */
++enum scx_ops_flags {
++	/*
++	 * Keep built-in idle tracking even if ops.update_idle() is implemented.
++	 */
++	SCX_OPS_KEEP_BUILTIN_IDLE = 1LLU << 0,
++
++	/*
++	 * By default, if there are no other task to run on the CPU, ext core
++	 * keeps running the current task even after its slice expires. If this
++	 * flag is specified, such tasks are passed to ops.enqueue() with
++	 * %SCX_ENQ_LAST. See the comment above %SCX_ENQ_LAST for more info.
++	 */
++	SCX_OPS_ENQ_LAST	= 1LLU << 1,
++
++	/*
++	 * An exiting task may schedule after PF_EXITING is set. In such cases,
++	 * bpf_task_from_pid() may not be able to find the task and if the BPF
++	 * scheduler depends on pid lookup for dispatching, the task will be
++	 * lost leading to various issues including RCU grace period stalls.
++	 *
++	 * To mask this problem, by default, unhashed tasks are automatically
++	 * dispatched to the local DSQ on enqueue. If the BPF scheduler doesn't
++	 * depend on pid lookups and wants to handle these tasks directly, the
++	 * following flag can be used.
++	 */
++	SCX_OPS_ENQ_EXITING	= 1LLU << 2,
++
++	/*
++	 * If set, only tasks with policy set to SCHED_EXT are attached to
++	 * sched_ext. If clear, SCHED_NORMAL tasks are also included.
++	 */
++	SCX_OPS_SWITCH_PARTIAL	= 1LLU << 3,
++
++	/*
++	 * CPU cgroup support flags
++	 */
++	SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16,	/* cpu.weight */
++
++	SCX_OPS_ALL_FLAGS	= SCX_OPS_KEEP_BUILTIN_IDLE |
++				  SCX_OPS_ENQ_LAST |
++				  SCX_OPS_ENQ_EXITING |
++				  SCX_OPS_SWITCH_PARTIAL |
++				  SCX_OPS_HAS_CGROUP_WEIGHT,
++};
++
++/* argument container for ops.init_task() */
++struct scx_init_task_args {
++	/*
++	 * Set if ops.init_task() is being invoked on the fork path, as opposed
++	 * to the scheduler transition path.
++	 */
++	bool			fork;
++#ifdef CONFIG_EXT_GROUP_SCHED
++	/* the cgroup the task is joining */
++	struct cgroup		*cgroup;
++#endif
++};
++
++/* argument container for ops.exit_task() */
++struct scx_exit_task_args {
++	/* Whether the task exited before running on sched_ext. */
++	bool cancelled;
++};
++
++/* argument container for ops->cgroup_init() */
++struct scx_cgroup_init_args {
++	/* the weight of the cgroup [1..10000] */
++	u32			weight;
++};
++
++enum scx_cpu_preempt_reason {
++	/* next task is being scheduled by &sched_class_rt */
++	SCX_CPU_PREEMPT_RT,
++	/* next task is being scheduled by &sched_class_dl */
++	SCX_CPU_PREEMPT_DL,
++	/* next task is being scheduled by &sched_class_stop */
++	SCX_CPU_PREEMPT_STOP,
++	/* unknown reason for SCX being preempted */
++	SCX_CPU_PREEMPT_UNKNOWN,
++};
++
++/*
++ * Argument container for ops->cpu_acquire(). Currently empty, but may be
++ * expanded in the future.
++ */
++struct scx_cpu_acquire_args {};
++
++/* argument container for ops->cpu_release() */
++struct scx_cpu_release_args {
++	/* the reason the CPU was preempted */
++	enum scx_cpu_preempt_reason reason;
++
++	/* the task that's going to be scheduled on the CPU */
++	struct task_struct	*task;
++};
++
++/*
++ * Informational context provided to dump operations.
++ */
++struct scx_dump_ctx {
++	enum scx_exit_kind	kind;
++	s64			exit_code;
++	const char		*reason;
++	u64			at_ns;
++	u64			at_jiffies;
++};
++
++/**
++ * struct sched_ext_ops - Operation table for BPF scheduler implementation
++ *
++ * Userland can implement an arbitrary scheduling policy by implementing and
++ * loading operations in this table.
++ */
++struct sched_ext_ops {
++	/**
++	 * select_cpu - Pick the target CPU for a task which is being woken up
++	 * @p: task being woken up
++	 * @prev_cpu: the cpu @p was on before sleeping
++	 * @wake_flags: SCX_WAKE_*
++	 *
++	 * Decision made here isn't final. @p may be moved to any CPU while it
++	 * is getting dispatched for execution later. However, as @p is not on
++	 * the rq at this point, getting the eventual execution CPU right here
++	 * saves a small bit of overhead down the line.
++	 *
++	 * If an idle CPU is returned, the CPU is kicked and will try to
++	 * dispatch. While an explicit custom mechanism can be added,
++	 * select_cpu() serves as the default way to wake up idle CPUs.
++	 *
++	 * @p may be dispatched directly by calling scx_bpf_dispatch(). If @p
++	 * is dispatched, the ops.enqueue() callback will be skipped. Finally,
++	 * if @p is dispatched to SCX_DSQ_LOCAL, it will be dispatched to the
++	 * local DSQ of whatever CPU is returned by this callback.
++	 */
++	s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
++
++	/**
++	 * enqueue - Enqueue a task on the BPF scheduler
++	 * @p: task being enqueued
++	 * @enq_flags: %SCX_ENQ_*
++	 *
++	 * @p is ready to run. Dispatch directly by calling scx_bpf_dispatch()
++	 * or enqueue on the BPF scheduler. If not directly dispatched, the bpf
++	 * scheduler owns @p and if it fails to dispatch @p, the task will
++	 * stall.
++	 *
++	 * If @p was dispatched from ops.select_cpu(), this callback is
++	 * skipped.
++	 */
++	void (*enqueue)(struct task_struct *p, u64 enq_flags);
++
++	/**
++	 * dequeue - Remove a task from the BPF scheduler
++	 * @p: task being dequeued
++	 * @deq_flags: %SCX_DEQ_*
++	 *
++	 * Remove @p from the BPF scheduler. This is usually called to isolate
++	 * the task while updating its scheduling properties (e.g. priority).
++	 *
++	 * The ext core keeps track of whether the BPF side owns a given task or
++	 * not and can gracefully ignore spurious dispatches from BPF side,
++	 * which makes it safe to not implement this method. However, depending
++	 * on the scheduling logic, this can lead to confusing behaviors - e.g.
++	 * scheduling position not being updated across a priority change.
++	 */
++	void (*dequeue)(struct task_struct *p, u64 deq_flags);
++
++	/**
++	 * dispatch - Dispatch tasks from the BPF scheduler and/or consume DSQs
++	 * @cpu: CPU to dispatch tasks for
++	 * @prev: previous task being switched out
++	 *
++	 * Called when a CPU's local dsq is empty. The operation should dispatch
++	 * one or more tasks from the BPF scheduler into the DSQs using
++	 * scx_bpf_dispatch() and/or consume user DSQs into the local DSQ using
++	 * scx_bpf_consume().
++	 *
++	 * The maximum number of times scx_bpf_dispatch() can be called without
++	 * an intervening scx_bpf_consume() is specified by
++	 * ops.dispatch_max_batch. See the comments on top of the two functions
++	 * for more details.
++	 *
++	 * When not %NULL, @prev is an SCX task with its slice depleted. If
++	 * @prev is still runnable as indicated by set %SCX_TASK_QUEUED in
++	 * @prev->scx.flags, it is not enqueued yet and will be enqueued after
++	 * ops.dispatch() returns. To keep executing @prev, return without
++	 * dispatching or consuming any tasks. Also see %SCX_OPS_ENQ_LAST.
++	 */
++	void (*dispatch)(s32 cpu, struct task_struct *prev);
++
++	/**
++	 * tick - Periodic tick
++	 * @p: task running currently
++	 *
++	 * This operation is called every 1/HZ seconds on CPUs which are
++	 * executing an SCX task. Setting @p->scx.slice to 0 will trigger an
++	 * immediate dispatch cycle on the CPU.
++	 */
++	void (*tick)(struct task_struct *p);
++
++	/**
++	 * runnable - A task is becoming runnable on its associated CPU
++	 * @p: task becoming runnable
++	 * @enq_flags: %SCX_ENQ_*
++	 *
++	 * This and the following three functions can be used to track a task's
++	 * execution state transitions. A task becomes ->runnable() on a CPU,
++	 * and then goes through one or more ->running() and ->stopping() pairs
++	 * as it runs on the CPU, and eventually becomes ->quiescent() when it's
++	 * done running on the CPU.
++	 *
++	 * @p is becoming runnable on the CPU because it's
++	 *
++	 * - waking up (%SCX_ENQ_WAKEUP)
++	 * - being moved from another CPU
++	 * - being restored after temporarily taken off the queue for an
++	 *   attribute change.
++	 *
++	 * This and ->enqueue() are related but not coupled. This operation
++	 * notifies @p's state transition and may not be followed by ->enqueue()
++	 * e.g. when @p is being dispatched to a remote CPU, or when @p is
++	 * being enqueued on a CPU experiencing a hotplug event. Likewise, a
++	 * task may be ->enqueue()'d without being preceded by this operation
++	 * e.g. after exhausting its slice.
++	 */
++	void (*runnable)(struct task_struct *p, u64 enq_flags);
++
++	/**
++	 * running - A task is starting to run on its associated CPU
++	 * @p: task starting to run
++	 *
++	 * See ->runnable() for explanation on the task state notifiers.
++	 */
++	void (*running)(struct task_struct *p);
++
++	/**
++	 * stopping - A task is stopping execution
++	 * @p: task stopping to run
++	 * @runnable: is task @p still runnable?
++	 *
++	 * See ->runnable() for explanation on the task state notifiers. If
++	 * !@runnable, ->quiescent() will be invoked after this operation
++	 * returns.
++	 */
++	void (*stopping)(struct task_struct *p, bool runnable);
++
++	/**
++	 * quiescent - A task is becoming not runnable on its associated CPU
++	 * @p: task becoming not runnable
++	 * @deq_flags: %SCX_DEQ_*
++	 *
++	 * See ->runnable() for explanation on the task state notifiers.
++	 *
++	 * @p is becoming quiescent on the CPU because it's
++	 *
++	 * - sleeping (%SCX_DEQ_SLEEP)
++	 * - being moved to another CPU
++	 * - being temporarily taken off the queue for an attribute change
++	 *   (%SCX_DEQ_SAVE)
++	 *
++	 * This and ->dequeue() are related but not coupled. This operation
++	 * notifies @p's state transition and may not be preceded by ->dequeue()
++	 * e.g. when @p is being dispatched to a remote CPU.
++	 */
++	void (*quiescent)(struct task_struct *p, u64 deq_flags);
++
++	/**
++	 * yield - Yield CPU
++	 * @from: yielding task
++	 * @to: optional yield target task
++	 *
++	 * If @to is NULL, @from is yielding the CPU to other runnable tasks.
++	 * The BPF scheduler should ensure that other available tasks are
++	 * dispatched before the yielding task. Return value is ignored in this
++	 * case.
++	 *
++	 * If @to is not-NULL, @from wants to yield the CPU to @to. If the bpf
++	 * scheduler can implement the request, return %true; otherwise, %false.
++	 */
++	bool (*yield)(struct task_struct *from, struct task_struct *to);
++
++	/**
++	 * core_sched_before - Task ordering for core-sched
++	 * @a: task A
++	 * @b: task B
++	 *
++	 * Used by core-sched to determine the ordering between two tasks. See
++	 * Documentation/admin-guide/hw-vuln/core-scheduling.rst for details on
++	 * core-sched.
++	 *
++	 * Both @a and @b are runnable and may or may not currently be queued on
++	 * the BPF scheduler. Should return %true if @a should run before @b.
++	 * %false if there's no required ordering or @b should run before @a.
++	 *
++	 * If not specified, the default is ordering them according to when they
++	 * became runnable.
++	 */
++	bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
++
++	/**
++	 * set_weight - Set task weight
++	 * @p: task to set weight for
++	 * @weight: new weight [1..10000]
++	 *
++	 * Update @p's weight to @weight.
++	 */
++	void (*set_weight)(struct task_struct *p, u32 weight);
++
++	/**
++	 * set_cpumask - Set CPU affinity
++	 * @p: task to set CPU affinity for
++	 * @cpumask: cpumask of cpus that @p can run on
++	 *
++	 * Update @p's CPU affinity to @cpumask.
++	 */
++	void (*set_cpumask)(struct task_struct *p,
++			    const struct cpumask *cpumask);
++
++	/**
++	 * update_idle - Update the idle state of a CPU
++	 * @cpu: CPU to udpate the idle state for
++	 * @idle: whether entering or exiting the idle state
++	 *
++	 * This operation is called when @rq's CPU goes or leaves the idle
++	 * state. By default, implementing this operation disables the built-in
++	 * idle CPU tracking and the following helpers become unavailable:
++	 *
++	 * - scx_bpf_select_cpu_dfl()
++	 * - scx_bpf_test_and_clear_cpu_idle()
++	 * - scx_bpf_pick_idle_cpu()
++	 *
++	 * The user also must implement ops.select_cpu() as the default
++	 * implementation relies on scx_bpf_select_cpu_dfl().
++	 *
++	 * Specify the %SCX_OPS_KEEP_BUILTIN_IDLE flag to keep the built-in idle
++	 * tracking.
++	 */
++	void (*update_idle)(s32 cpu, bool idle);
++
++	/**
++	 * cpu_acquire - A CPU is becoming available to the BPF scheduler
++	 * @cpu: The CPU being acquired by the BPF scheduler.
++	 * @args: Acquire arguments, see the struct definition.
++	 *
++	 * A CPU that was previously released from the BPF scheduler is now once
++	 * again under its control.
++	 */
++	void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
++
++	/**
++	 * cpu_release - A CPU is taken away from the BPF scheduler
++	 * @cpu: The CPU being released by the BPF scheduler.
++	 * @args: Release arguments, see the struct definition.
++	 *
++	 * The specified CPU is no longer under the control of the BPF
++	 * scheduler. This could be because it was preempted by a higher
++	 * priority sched_class, though there may be other reasons as well. The
++	 * caller should consult @args->reason to determine the cause.
++	 */
++	void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
++
++	/**
++	 * init_task - Initialize a task to run in a BPF scheduler
++	 * @p: task to initialize for BPF scheduling
++	 * @args: init arguments, see the struct definition
++	 *
++	 * Either we're loading a BPF scheduler or a new task is being forked.
++	 * Initialize @p for BPF scheduling. This operation may block and can
++	 * be used for allocations, and is called exactly once for a task.
++	 *
++	 * Return 0 for success, -errno for failure. An error return while
++	 * loading will abort loading of the BPF scheduler. During a fork, it
++	 * will abort that specific fork.
++	 */
++	s32 (*init_task)(struct task_struct *p, struct scx_init_task_args *args);
++
++	/**
++	 * exit_task - Exit a previously-running task from the system
++	 * @p: task to exit
++	 *
++	 * @p is exiting or the BPF scheduler is being unloaded. Perform any
++	 * necessary cleanup for @p.
++	 */
++	void (*exit_task)(struct task_struct *p, struct scx_exit_task_args *args);
++
++	/**
++	 * enable - Enable BPF scheduling for a task
++	 * @p: task to enable BPF scheduling for
++	 *
++	 * Enable @p for BPF scheduling. enable() is called on @p any time it
++	 * enters SCX, and is always paired with a matching disable().
++	 */
++	void (*enable)(struct task_struct *p);
++
++	/**
++	 * disable - Disable BPF scheduling for a task
++	 * @p: task to disable BPF scheduling for
++	 *
++	 * @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
++	 * Disable BPF scheduling for @p. A disable() call is always matched
++	 * with a prior enable() call.
++	 */
++	void (*disable)(struct task_struct *p);
++
++	/**
++	 * dump - Dump BPF scheduler state on error
++	 * @ctx: debug dump context
++	 *
++	 * Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
++	 */
++	void (*dump)(struct scx_dump_ctx *ctx);
++
++	/**
++	 * dump_cpu - Dump BPF scheduler state for a CPU on error
++	 * @ctx: debug dump context
++	 * @cpu: CPU to generate debug dump for
++	 * @idle: @cpu is currently idle without any runnable tasks
++	 *
++	 * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++	 * @cpu. If @idle is %true and this operation doesn't produce any
++	 * output, @cpu is skipped for dump.
++	 */
++	void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
++
++	/**
++	 * dump_task - Dump BPF scheduler state for a runnable task on error
++	 * @ctx: debug dump context
++	 * @p: runnable task to generate debug dump for
++	 *
++	 * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++	 * @p.
++	 */
++	void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++	/**
++	 * cgroup_init - Initialize a cgroup
++	 * @cgrp: cgroup being initialized
++	 * @args: init arguments, see the struct definition
++	 *
++	 * Either the BPF scheduler is being loaded or @cgrp created, initialize
++	 * @cgrp for sched_ext. This operation may block.
++	 *
++	 * Return 0 for success, -errno for failure. An error return while
++	 * loading will abort loading of the BPF scheduler. During cgroup
++	 * creation, it will abort the specific cgroup creation.
++	 */
++	s32 (*cgroup_init)(struct cgroup *cgrp,
++			   struct scx_cgroup_init_args *args);
++
++	/**
++	 * cgroup_exit - Exit a cgroup
++	 * @cgrp: cgroup being exited
++	 *
++	 * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
++	 * @cgrp for sched_ext. This operation my block.
++	 */
++	void (*cgroup_exit)(struct cgroup *cgrp);
++
++	/**
++	 * cgroup_prep_move - Prepare a task to be moved to a different cgroup
++	 * @p: task being moved
++	 * @from: cgroup @p is being moved from
++	 * @to: cgroup @p is being moved to
++	 *
++	 * Prepare @p for move from cgroup @from to @to. This operation may
++	 * block and can be used for allocations.
++	 *
++	 * Return 0 for success, -errno for failure. An error return aborts the
++	 * migration.
++	 */
++	s32 (*cgroup_prep_move)(struct task_struct *p,
++				struct cgroup *from, struct cgroup *to);
++
++	/**
++	 * cgroup_move - Commit cgroup move
++	 * @p: task being moved
++	 * @from: cgroup @p is being moved from
++	 * @to: cgroup @p is being moved to
++	 *
++	 * Commit the move. @p is dequeued during this operation.
++	 */
++	void (*cgroup_move)(struct task_struct *p,
++			    struct cgroup *from, struct cgroup *to);
++
++	/**
++	 * cgroup_cancel_move - Cancel cgroup move
++	 * @p: task whose cgroup move is being canceled
++	 * @from: cgroup @p was being moved from
++	 * @to: cgroup @p was being moved to
++	 *
++	 * @p was cgroup_prep_move()'d but failed before reaching cgroup_move().
++	 * Undo the preparation.
++	 */
++	void (*cgroup_cancel_move)(struct task_struct *p,
++				   struct cgroup *from, struct cgroup *to);
++
++	/**
++	 * cgroup_set_weight - A cgroup's weight is being changed
++	 * @cgrp: cgroup whose weight is being updated
++	 * @weight: new weight [1..10000]
++	 *
++	 * Update @tg's weight to @weight.
++	 */
++	void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
++#endif	/* CONFIG_CGROUPS */
++
++	/*
++	 * All online ops must come before ops.cpu_online().
++	 */
++
++	/**
++	 * cpu_online - A CPU became online
++	 * @cpu: CPU which just came up
++	 *
++	 * @cpu just came online. @cpu will not call ops.enqueue() or
++	 * ops.dispatch(), nor run tasks associated with other CPUs beforehand.
++	 */
++	void (*cpu_online)(s32 cpu);
++
++	/**
++	 * cpu_offline - A CPU is going offline
++	 * @cpu: CPU which is going offline
++	 *
++	 * @cpu is going offline. @cpu will not call ops.enqueue() or
++	 * ops.dispatch(), nor run tasks associated with other CPUs afterwards.
++	 */
++	void (*cpu_offline)(s32 cpu);
++
++	/*
++	 * All CPU hotplug ops must come before ops.init().
++	 */
++
++	/**
++	 * init - Initialize the BPF scheduler
++	 */
++	s32 (*init)(void);
++
++	/**
++	 * exit - Clean up after the BPF scheduler
++	 * @info: Exit info
++	 */
++	void (*exit)(struct scx_exit_info *info);
++
++	/**
++	 * dispatch_max_batch - Max nr of tasks that dispatch() can dispatch
++	 */
++	u32 dispatch_max_batch;
++
++	/**
++	 * flags - %SCX_OPS_* flags
++	 */
++	u64 flags;
++
++	/**
++	 * timeout_ms - The maximum amount of time, in milliseconds, that a
++	 * runnable task should be able to wait before being scheduled. The
++	 * maximum timeout may not exceed the default timeout of 30 seconds.
++	 *
++	 * Defaults to the maximum allowed timeout value of 30 seconds.
++	 */
++	u32 timeout_ms;
++
++	/**
++	 * exit_dump_len - scx_exit_info.dump buffer length. If 0, the default
++	 * value of 32768 is used.
++	 */
++	u32 exit_dump_len;
++
++	/**
++	 * hotplug_seq - A sequence number that may be set by the scheduler to
++	 * detect when a hotplug event has occurred during the loading process.
++	 * If 0, no detection occurs. Otherwise, the scheduler will fail to
++	 * load if the sequence number does not match @scx_hotplug_seq on the
++	 * enable path.
++	 */
++	u64 hotplug_seq;
++
++	/**
++	 * name - BPF scheduler's name
++	 *
++	 * Must be a non-zero valid BPF object name including only isalnum(),
++	 * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
++	 * BPF scheduler is enabled.
++	 */
++	char name[SCX_OPS_NAME_LEN];
++};
++
++enum scx_opi {
++	SCX_OPI_BEGIN			= 0,
++	SCX_OPI_NORMAL_BEGIN		= 0,
++	SCX_OPI_NORMAL_END		= SCX_OP_IDX(cpu_online),
++	SCX_OPI_CPU_HOTPLUG_BEGIN	= SCX_OP_IDX(cpu_online),
++	SCX_OPI_CPU_HOTPLUG_END		= SCX_OP_IDX(init),
++	SCX_OPI_END			= SCX_OP_IDX(init),
++};
++
++enum scx_wake_flags {
++	/* expose select WF_* flags as enums */
++	SCX_WAKE_FORK		= WF_FORK,
++	SCX_WAKE_TTWU		= WF_TTWU,
++	SCX_WAKE_SYNC		= WF_SYNC,
++};
++
++enum scx_enq_flags {
++	/* expose select ENQUEUE_* flags as enums */
++	SCX_ENQ_WAKEUP		= ENQUEUE_WAKEUP,
++	SCX_ENQ_HEAD		= ENQUEUE_HEAD,
++
++	/* high 32bits are SCX specific */
++
++	/*
++	 * Set the following to trigger preemption when calling
++	 * scx_bpf_dispatch() with a local dsq as the target. The slice of the
++	 * current task is cleared to zero and the CPU is kicked into the
++	 * scheduling path. Implies %SCX_ENQ_HEAD.
++	 */
++	SCX_ENQ_PREEMPT		= 1LLU << 32,
++
++	/*
++	 * The task being enqueued was previously enqueued on the current CPU's
++	 * %SCX_DSQ_LOCAL, but was removed from it in a call to the
++	 * bpf_scx_reenqueue_local() kfunc. If bpf_scx_reenqueue_local() was
++	 * invoked in a ->cpu_release() callback, and the task is again
++	 * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
++	 * task will not be scheduled on the CPU until at least the next invocation
++	 * of the ->cpu_acquire() callback.
++	 */
++	SCX_ENQ_REENQ		= 1LLU << 40,
++
++	/*
++	 * The task being enqueued is the only task available for the cpu. By
++	 * default, ext core keeps executing such tasks but when
++	 * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
++	 * %SCX_ENQ_LAST flag set.
++	 *
++	 * If the BPF scheduler wants to continue executing the task,
++	 * ops.enqueue() should dispatch the task to %SCX_DSQ_LOCAL immediately.
++	 * If the task gets queued on a different dsq or the BPF side, the BPF
++	 * scheduler is responsible for triggering a follow-up scheduling event.
++	 * Otherwise, Execution may stall.
++	 */
++	SCX_ENQ_LAST		= 1LLU << 41,
++
++	/* high 8 bits are internal */
++	__SCX_ENQ_INTERNAL_MASK	= 0xffLLU << 56,
++
++	SCX_ENQ_CLEAR_OPSS	= 1LLU << 56,
++	SCX_ENQ_DSQ_PRIQ	= 1LLU << 57,
++};
++
++enum scx_deq_flags {
++	/* expose select DEQUEUE_* flags as enums */
++	SCX_DEQ_SLEEP		= DEQUEUE_SLEEP,
++
++	/* high 32bits are SCX specific */
++
++	/*
++	 * The generic core-sched layer decided to execute the task even though
++	 * it hasn't been dispatched yet. Dequeue from the BPF side.
++	 */
++	SCX_DEQ_CORE_SCHED_EXEC	= 1LLU << 32,
++};
++
++enum scx_pick_idle_cpu_flags {
++	SCX_PICK_IDLE_CORE	= 1LLU << 0,	/* pick a CPU whose SMT siblings are also idle */
++};
++
++enum scx_kick_flags {
++	/*
++	 * Kick the target CPU if idle. Guarantees that the target CPU goes
++	 * through at least one full scheduling cycle before going idle. If the
++	 * target CPU can be determined to be currently not idle and going to go
++	 * through a scheduling cycle before going idle, noop.
++	 */
++	SCX_KICK_IDLE		= 1LLU << 0,
++
++	/*
++	 * Preempt the current task and execute the dispatch path. If the
++	 * current task of the target CPU is an SCX task, its ->scx.slice is
++	 * cleared to zero before the scheduling path is invoked so that the
++	 * task expires and the dispatch path is invoked.
++	 */
++	SCX_KICK_PREEMPT	= 1LLU << 1,
++
++	/*
++	 * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
++	 * return after the target CPU finishes picking the next task.
++	 */
++	SCX_KICK_WAIT		= 1LLU << 2,
++};
++
++enum scx_tg_flags {
++	SCX_TG_ONLINE		= 1U << 0,
++	SCX_TG_INITED		= 1U << 1,
++};
++
++enum scx_ops_enable_state {
++	SCX_OPS_ENABLING,
++	SCX_OPS_ENABLED,
++	SCX_OPS_DISABLING,
++	SCX_OPS_DISABLED,
++};
++
++static const char *scx_ops_enable_state_str[] = {
++	[SCX_OPS_ENABLING]	= "enabling",
++	[SCX_OPS_ENABLED]	= "enabled",
++	[SCX_OPS_DISABLING]	= "disabling",
++	[SCX_OPS_DISABLED]	= "disabled",
++};
++
++/*
++ * sched_ext_entity->ops_state
++ *
++ * Used to track the task ownership between the SCX core and the BPF scheduler.
++ * State transitions look as follows:
++ *
++ * NONE -> QUEUEING -> QUEUED -> DISPATCHING
++ *   ^              |                 |
++ *   |              v                 v
++ *   \-------------------------------/
++ *
++ * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call
++ * sites for explanations on the conditions being waited upon and why they are
++ * safe. Transitions out of them into NONE or QUEUED must store_release and the
++ * waiters should load_acquire.
++ *
++ * Tracking scx_ops_state enables sched_ext core to reliably determine whether
++ * any given task can be dispatched by the BPF scheduler at all times and thus
++ * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler
++ * to try to dispatch any task anytime regardless of its state as the SCX core
++ * can safely reject invalid dispatches.
++ */
++enum scx_ops_state {
++	SCX_OPSS_NONE,		/* owned by the SCX core */
++	SCX_OPSS_QUEUEING,	/* in transit to the BPF scheduler */
++	SCX_OPSS_QUEUED,	/* owned by the BPF scheduler */
++	SCX_OPSS_DISPATCHING,	/* in transit back to the SCX core */
++
++	/*
++	 * QSEQ brands each QUEUED instance so that, when dispatch races
++	 * dequeue/requeue, the dispatcher can tell whether it still has a claim
++	 * on the task being dispatched.
++	 *
++	 * As some 32bit archs can't do 64bit store_release/load_acquire,
++	 * p->scx.ops_state is atomic_long_t which leaves 30 bits for QSEQ on
++	 * 32bit machines. The dispatch race window QSEQ protects is very narrow
++	 * and runs with IRQ disabled. 30 bits should be sufficient.
++	 */
++	SCX_OPSS_QSEQ_SHIFT	= 2,
++};
++
++/* Use macros to ensure that the type is unsigned long for the masks */
++#define SCX_OPSS_STATE_MASK	((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
++#define SCX_OPSS_QSEQ_MASK	(~SCX_OPSS_STATE_MASK)
++
++/*
++ * During exit, a task may schedule after losing its PIDs. When disabling the
++ * BPF scheduler, we need to be able to iterate tasks in every state to
++ * guarantee system safety. Maintain a dedicated task list which contains every
++ * task between its fork and eventual free.
++ */
++static DEFINE_SPINLOCK(scx_tasks_lock);
++static LIST_HEAD(scx_tasks);
++
++/* ops enable/disable */
++static struct kthread_worker *scx_ops_helper;
++static DEFINE_MUTEX(scx_ops_enable_mutex);
++DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
++DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
++static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
++static atomic_t scx_ops_bypass_depth = ATOMIC_INIT(0);
++static bool scx_ops_init_task_enabled;
++static bool scx_switching_all;
++DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
++
++static struct sched_ext_ops scx_ops;
++static bool scx_warned_zero_slice;
++
++static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last);
++static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
++static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
++static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);
++
++static struct static_key_false scx_has_op[SCX_OPI_END] =
++	{ [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT };
++
++static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE);
++static struct scx_exit_info *scx_exit_info;
++
++static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
++static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
++
++/*
++ * A monotically increasing sequence number that is incremented every time a
++ * scheduler is enabled. This can be used by to check if any custom sched_ext
++ * scheduler has ever been used in the system.
++ */
++static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
++
++/*
++ * The maximum amount of time in jiffies that a task may be runnable without
++ * being scheduled on a CPU. If this timeout is exceeded, it will trigger
++ * scx_ops_error().
++ */
++static unsigned long scx_watchdog_timeout;
++
++/*
++ * The last time the delayed work was run. This delayed work relies on
++ * ksoftirqd being able to run to service timer interrupts, so it's possible
++ * that this work itself could get wedged. To account for this, we check that
++ * it's not stalled in the timer tick, and trigger an error if it is.
++ */
++static unsigned long scx_watchdog_timestamp = INITIAL_JIFFIES;
++
++static struct delayed_work scx_watchdog_work;
++
++/* idle tracking */
++#ifdef CONFIG_SMP
++#ifdef CONFIG_CPUMASK_OFFSTACK
++#define CL_ALIGNED_IF_ONSTACK
++#else
++#define CL_ALIGNED_IF_ONSTACK __cacheline_aligned_in_smp
++#endif
++
++static struct {
++	cpumask_var_t cpu;
++	cpumask_var_t smt;
++} idle_masks CL_ALIGNED_IF_ONSTACK;
++
++#endif	/* CONFIG_SMP */
++
++/* for %SCX_KICK_WAIT */
++static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
++
++/*
++ * Direct dispatch marker.
++ *
++ * Non-NULL values are used for direct dispatch from enqueue path. A valid
++ * pointer points to the task currently being enqueued. An ERR_PTR value is used
++ * to indicate that direct dispatch has already happened.
++ */
++static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);
++
++/*
++ * Dispatch queues.
++ *
++ * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
++ * to avoid live-locking in bypass mode where all tasks are dispatched to
++ * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
++ * sufficient, it can be further split.
++ */
++static struct scx_dispatch_q **global_dsqs;
++
++static const struct rhashtable_params dsq_hash_params = {
++	.key_len		= 8,
++	.key_offset		= offsetof(struct scx_dispatch_q, id),
++	.head_offset		= offsetof(struct scx_dispatch_q, hash_node),
++};
++
++static struct rhashtable dsq_hash;
++static LLIST_HEAD(dsqs_to_free);
++
++/* dispatch buf */
++struct scx_dsp_buf_ent {
++	struct task_struct	*task;
++	unsigned long		qseq;
++	u64			dsq_id;
++	u64			enq_flags;
++};
++
++static u32 scx_dsp_max_batch;
++
++struct scx_dsp_ctx {
++	struct rq		*rq;
++	u32			cursor;
++	u32			nr_tasks;
++	struct scx_dsp_buf_ent	buf[];
++};
++
++static struct scx_dsp_ctx __percpu *scx_dsp_ctx;
++
++/* string formatting from BPF */
++struct scx_bstr_buf {
++	u64			data[MAX_BPRINTF_VARARGS];
++	char			line[SCX_EXIT_MSG_LEN];
++};
++
++static DEFINE_RAW_SPINLOCK(scx_exit_bstr_buf_lock);
++static struct scx_bstr_buf scx_exit_bstr_buf;
++
++/* ops debug dump */
++struct scx_dump_data {
++	s32			cpu;
++	bool			first;
++	s32			cursor;
++	struct seq_buf		*s;
++	const char		*prefix;
++	struct scx_bstr_buf	buf;
++};
++
++static struct scx_dump_data scx_dump_data = {
++	.cpu			= -1,
++};
++
++/* /sys/kernel/sched_ext interface */
++static struct kset *scx_kset;
++static struct kobject *scx_root_kobj;
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/sched_ext.h>
++
++static void process_ddsp_deferred_locals(struct rq *rq);
++static void scx_bpf_kick_cpu(s32 cpu, u64 flags);
++static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
++					     s64 exit_code,
++					     const char *fmt, ...);
++
++#define scx_ops_error_kind(err, fmt, args...)					\
++	scx_ops_exit_kind((err), 0, fmt, ##args)
++
++#define scx_ops_exit(code, fmt, args...)					\
++	scx_ops_exit_kind(SCX_EXIT_UNREG_KERN, (code), fmt, ##args)
++
++#define scx_ops_error(fmt, args...)						\
++	scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args)
++
++#define SCX_HAS_OP(op)	static_branch_likely(&scx_has_op[SCX_OP_IDX(op)])
++
++static long jiffies_delta_msecs(unsigned long at, unsigned long now)
++{
++	if (time_after(at, now))
++		return jiffies_to_msecs(at - now);
++	else
++		return -(long)jiffies_to_msecs(now - at);
++}
++
++/* if the highest set bit is N, return a mask with bits [N+1, 31] set */
++static u32 higher_bits(u32 flags)
++{
++	return ~((1 << fls(flags)) - 1);
++}
++
++/* return the mask with only the highest bit set */
++static u32 highest_bit(u32 flags)
++{
++	int bit = fls(flags);
++	return ((u64)1 << bit) >> 1;
++}
++
++static bool u32_before(u32 a, u32 b)
++{
++	return (s32)(a - b) < 0;
++}
++
++static struct scx_dispatch_q *find_global_dsq(struct task_struct *p)
++{
++	return global_dsqs[cpu_to_node(task_cpu(p))];
++}
++
++static struct scx_dispatch_q *find_user_dsq(u64 dsq_id)
++{
++	return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params);
++}
++
++/*
++ * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
++ * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
++ * the allowed kfuncs and those kfuncs should use scx_kf_allowed() to check
++ * whether it's running from an allowed context.
++ *
++ * @mask is constant, always inline to cull the mask calculations.
++ */
++static __always_inline void scx_kf_allow(u32 mask)
++{
++	/* nesting is allowed only in increasing scx_kf_mask order */
++	WARN_ONCE((mask | higher_bits(mask)) & current->scx.kf_mask,
++		  "invalid nesting current->scx.kf_mask=0x%x mask=0x%x\n",
++		  current->scx.kf_mask, mask);
++	current->scx.kf_mask |= mask;
++	barrier();
++}
++
++static void scx_kf_disallow(u32 mask)
++{
++	barrier();
++	current->scx.kf_mask &= ~mask;
++}
++
++#define SCX_CALL_OP(mask, op, args...)						\
++do {										\
++	if (mask) {								\
++		scx_kf_allow(mask);						\
++		scx_ops.op(args);						\
++		scx_kf_disallow(mask);						\
++	} else {								\
++		scx_ops.op(args);						\
++	}									\
++} while (0)
++
++#define SCX_CALL_OP_RET(mask, op, args...)					\
++({										\
++	__typeof__(scx_ops.op(args)) __ret;					\
++	if (mask) {								\
++		scx_kf_allow(mask);						\
++		__ret = scx_ops.op(args);					\
++		scx_kf_disallow(mask);						\
++	} else {								\
++		__ret = scx_ops.op(args);					\
++	}									\
++	__ret;									\
++})
++
++/*
++ * Some kfuncs are allowed only on the tasks that are subjects of the
++ * in-progress scx_ops operation for, e.g., locking guarantees. To enforce such
++ * restrictions, the following SCX_CALL_OP_*() variants should be used when
++ * invoking scx_ops operations that take task arguments. These can only be used
++ * for non-nesting operations due to the way the tasks are tracked.
++ *
++ * kfuncs which can only operate on such tasks can in turn use
++ * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
++ * the specific task.
++ */
++#define SCX_CALL_OP_TASK(mask, op, task, args...)				\
++do {										\
++	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
++	current->scx.kf_tasks[0] = task;					\
++	SCX_CALL_OP(mask, op, task, ##args);					\
++	current->scx.kf_tasks[0] = NULL;					\
++} while (0)
++
++#define SCX_CALL_OP_TASK_RET(mask, op, task, args...)				\
++({										\
++	__typeof__(scx_ops.op(task, ##args)) __ret;				\
++	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
++	current->scx.kf_tasks[0] = task;					\
++	__ret = SCX_CALL_OP_RET(mask, op, task, ##args);			\
++	current->scx.kf_tasks[0] = NULL;					\
++	__ret;									\
++})
++
++#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...)			\
++({										\
++	__typeof__(scx_ops.op(task0, task1, ##args)) __ret;			\
++	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
++	current->scx.kf_tasks[0] = task0;					\
++	current->scx.kf_tasks[1] = task1;					\
++	__ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args);		\
++	current->scx.kf_tasks[0] = NULL;					\
++	current->scx.kf_tasks[1] = NULL;					\
++	__ret;									\
++})
++
++/* @mask is constant, always inline to cull unnecessary branches */
++static __always_inline bool scx_kf_allowed(u32 mask)
++{
++	if (unlikely(!(current->scx.kf_mask & mask))) {
++		scx_ops_error("kfunc with mask 0x%x called from an operation only allowing 0x%x",
++			      mask, current->scx.kf_mask);
++		return false;
++	}
++
++	/*
++	 * Enforce nesting boundaries. e.g. A kfunc which can be called from
++	 * DISPATCH must not be called if we're running DEQUEUE which is nested
++	 * inside ops.dispatch(). We don't need to check boundaries for any
++	 * blocking kfuncs as the verifier ensures they're only called from
++	 * sleepable progs.
++	 */
++	if (unlikely(highest_bit(mask) == SCX_KF_CPU_RELEASE &&
++		     (current->scx.kf_mask & higher_bits(SCX_KF_CPU_RELEASE)))) {
++		scx_ops_error("cpu_release kfunc called from a nested operation");
++		return false;
++	}
++
++	if (unlikely(highest_bit(mask) == SCX_KF_DISPATCH &&
++		     (current->scx.kf_mask & higher_bits(SCX_KF_DISPATCH)))) {
++		scx_ops_error("dispatch kfunc called from a nested operation");
++		return false;
++	}
++
++	return true;
++}
++
++/* see SCX_CALL_OP_TASK() */
++static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
++							struct task_struct *p)
++{
++	if (!scx_kf_allowed(mask))
++		return false;
++
++	if (unlikely((p != current->scx.kf_tasks[0] &&
++		      p != current->scx.kf_tasks[1]))) {
++		scx_ops_error("called on a task not being operated on");
++		return false;
++	}
++
++	return true;
++}
++
++static bool scx_kf_allowed_if_unlocked(void)
++{
++	return !current->scx.kf_mask;
++}
++
++/**
++ * nldsq_next_task - Iterate to the next task in a non-local DSQ
++ * @dsq: user dsq being interated
++ * @cur: current position, %NULL to start iteration
++ * @rev: walk backwards
++ *
++ * Returns %NULL when iteration is finished.
++ */
++static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq,
++					   struct task_struct *cur, bool rev)
++{
++	struct list_head *list_node;
++	struct scx_dsq_list_node *dsq_lnode;
++
++	lockdep_assert_held(&dsq->lock);
++
++	if (cur)
++		list_node = &cur->scx.dsq_list.node;
++	else
++		list_node = &dsq->list;
++
++	/* find the next task, need to skip BPF iteration cursors */
++	do {
++		if (rev)
++			list_node = list_node->prev;
++		else
++			list_node = list_node->next;
++
++		if (list_node == &dsq->list)
++			return NULL;
++
++		dsq_lnode = container_of(list_node, struct scx_dsq_list_node,
++					 node);
++	} while (dsq_lnode->flags & SCX_DSQ_LNODE_ITER_CURSOR);
++
++	return container_of(dsq_lnode, struct task_struct, scx.dsq_list);
++}
++
++#define nldsq_for_each_task(p, dsq)						\
++	for ((p) = nldsq_next_task((dsq), NULL, false); (p);			\
++	     (p) = nldsq_next_task((dsq), (p), false))
++
++
++/*
++ * BPF DSQ iterator. Tasks in a non-local DSQ can be iterated in [reverse]
++ * dispatch order. BPF-visible iterator is opaque and larger to allow future
++ * changes without breaking backward compatibility. Can be used with
++ * bpf_for_each(). See bpf_iter_scx_dsq_*().
++ */
++enum scx_dsq_iter_flags {
++	/* iterate in the reverse dispatch order */
++	SCX_DSQ_ITER_REV		= 1U << 16,
++
++	__SCX_DSQ_ITER_HAS_SLICE	= 1U << 30,
++	__SCX_DSQ_ITER_HAS_VTIME	= 1U << 31,
++
++	__SCX_DSQ_ITER_USER_FLAGS	= SCX_DSQ_ITER_REV,
++	__SCX_DSQ_ITER_ALL_FLAGS	= __SCX_DSQ_ITER_USER_FLAGS |
++					  __SCX_DSQ_ITER_HAS_SLICE |
++					  __SCX_DSQ_ITER_HAS_VTIME,
++};
++
++struct bpf_iter_scx_dsq_kern {
++	struct scx_dsq_list_node	cursor;
++	struct scx_dispatch_q		*dsq;
++	u64				slice;
++	u64				vtime;
++} __attribute__((aligned(8)));
++
++struct bpf_iter_scx_dsq {
++	u64				__opaque[6];
++} __attribute__((aligned(8)));
++
++
++/*
++ * SCX task iterator.
++ */
++struct scx_task_iter {
++	struct sched_ext_entity		cursor;
++	struct task_struct		*locked;
++	struct rq			*rq;
++	struct rq_flags			rf;
++	u32				cnt;
++};
++
++/**
++ * scx_task_iter_start - Lock scx_tasks_lock and start a task iteration
++ * @iter: iterator to init
++ *
++ * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter
++ * must eventually be stopped with scx_task_iter_stop().
++ *
++ * scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock()
++ * between this and the first next() call or between any two next() calls. If
++ * the locks are released between two next() calls, the caller is responsible
++ * for ensuring that the task being iterated remains accessible either through
++ * RCU read lock or obtaining a reference count.
++ *
++ * All tasks which existed when the iteration started are guaranteed to be
++ * visited as long as they still exist.
++ */
++static void scx_task_iter_start(struct scx_task_iter *iter)
++{
++	BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS &
++		     ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1));
++
++	spin_lock_irq(&scx_tasks_lock);
++
++	iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
++	list_add(&iter->cursor.tasks_node, &scx_tasks);
++	iter->locked = NULL;
++	iter->cnt = 0;
++}
++
++static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
++{
++	if (iter->locked) {
++		task_rq_unlock(iter->rq, iter->locked, &iter->rf);
++		iter->locked = NULL;
++	}
++}
++
++/**
++ * scx_task_iter_unlock - Unlock rq and scx_tasks_lock held by a task iterator
++ * @iter: iterator to unlock
++ *
++ * If @iter is in the middle of a locked iteration, it may be locking the rq of
++ * the task currently being visited in addition to scx_tasks_lock. Unlock both.
++ * This function can be safely called anytime during an iteration.
++ */
++static void scx_task_iter_unlock(struct scx_task_iter *iter)
++{
++	__scx_task_iter_rq_unlock(iter);
++	spin_unlock_irq(&scx_tasks_lock);
++}
++
++/**
++ * scx_task_iter_relock - Lock scx_tasks_lock released by scx_task_iter_unlock()
++ * @iter: iterator to re-lock
++ *
++ * Re-lock scx_tasks_lock unlocked by scx_task_iter_unlock(). Note that it
++ * doesn't re-lock the rq lock. Must be called before other iterator operations.
++ */
++static void scx_task_iter_relock(struct scx_task_iter *iter)
++{
++	spin_lock_irq(&scx_tasks_lock);
++}
++
++/**
++ * scx_task_iter_stop - Stop a task iteration and unlock scx_tasks_lock
++ * @iter: iterator to exit
++ *
++ * Exit a previously initialized @iter. Must be called with scx_tasks_lock held
++ * which is released on return. If the iterator holds a task's rq lock, that rq
++ * lock is also released. See scx_task_iter_start() for details.
++ */
++static void scx_task_iter_stop(struct scx_task_iter *iter)
++{
++	list_del_init(&iter->cursor.tasks_node);
++	scx_task_iter_unlock(iter);
++}
++
++/**
++ * scx_task_iter_next - Next task
++ * @iter: iterator to walk
++ *
++ * Visit the next task. See scx_task_iter_start() for details. Locks are dropped
++ * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing
++ * stalls by holding scx_tasks_lock for too long.
++ */
++static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
++{
++	struct list_head *cursor = &iter->cursor.tasks_node;
++	struct sched_ext_entity *pos;
++
++	if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) {
++		scx_task_iter_unlock(iter);
++		cpu_relax();
++		cond_resched();
++		scx_task_iter_relock(iter);
++	}
++
++	list_for_each_entry(pos, cursor, tasks_node) {
++		if (&pos->tasks_node == &scx_tasks)
++			return NULL;
++		if (!(pos->flags & SCX_TASK_CURSOR)) {
++			list_move(cursor, &pos->tasks_node);
++			return container_of(pos, struct task_struct, scx);
++		}
++	}
++
++	/* can't happen, should always terminate at scx_tasks above */
++	BUG();
++}
++
++/**
++ * scx_task_iter_next_locked - Next non-idle task with its rq locked
++ * @iter: iterator to walk
++ * @include_dead: Whether we should include dead tasks in the iteration
++ *
++ * Visit the non-idle task with its rq lock held. Allows callers to specify
++ * whether they would like to filter out dead tasks. See scx_task_iter_start()
++ * for details.
++ */
++static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
++{
++	struct task_struct *p;
++
++	__scx_task_iter_rq_unlock(iter);
++
++	while ((p = scx_task_iter_next(iter))) {
++		/*
++		 * scx_task_iter is used to prepare and move tasks into SCX
++		 * while loading the BPF scheduler and vice-versa while
++		 * unloading. The init_tasks ("swappers") should be excluded
++		 * from the iteration because:
++		 *
++		 * - It's unsafe to use __setschduler_prio() on an init_task to
++		 *   determine the sched_class to use as it won't preserve its
++		 *   idle_sched_class.
++		 *
++		 * - ops.init/exit_task() can easily be confused if called with
++		 *   init_tasks as they, e.g., share PID 0.
++		 *
++		 * As init_tasks are never scheduled through SCX, they can be
++		 * skipped safely. Note that is_idle_task() which tests %PF_IDLE
++		 * doesn't work here:
++		 *
++		 * - %PF_IDLE may not be set for an init_task whose CPU hasn't
++		 *   yet been onlined.
++		 *
++		 * - %PF_IDLE can be set on tasks that are not init_tasks. See
++		 *   play_idle_precise() used by CONFIG_IDLE_INJECT.
++		 *
++		 * Test for idle_sched_class as only init_tasks are on it.
++		 */
++		if (p->sched_class != &idle_sched_class)
++			break;
++	}
++	if (!p)
++		return NULL;
++
++	iter->rq = task_rq_lock(p, &iter->rf);
++	iter->locked = p;
++
++	return p;
++}
++
++static enum scx_ops_enable_state scx_ops_enable_state(void)
++{
++	return atomic_read(&scx_ops_enable_state_var);
++}
++
++static enum scx_ops_enable_state
++scx_ops_set_enable_state(enum scx_ops_enable_state to)
++{
++	return atomic_xchg(&scx_ops_enable_state_var, to);
++}
++
++static bool scx_ops_tryset_enable_state(enum scx_ops_enable_state to,
++					enum scx_ops_enable_state from)
++{
++	int from_v = from;
++
++	return atomic_try_cmpxchg(&scx_ops_enable_state_var, &from_v, to);
++}
++
++static bool scx_rq_bypassing(struct rq *rq)
++{
++	return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
++}
++
++/**
++ * wait_ops_state - Busy-wait the specified ops state to end
++ * @p: target task
++ * @opss: state to wait the end of
++ *
++ * Busy-wait for @p to transition out of @opss. This can only be used when the
++ * state part of @opss is %SCX_QUEUEING or %SCX_DISPATCHING. This function also
++ * has load_acquire semantics to ensure that the caller can see the updates made
++ * in the enqueueing and dispatching paths.
++ */
++static void wait_ops_state(struct task_struct *p, unsigned long opss)
++{
++	do {
++		cpu_relax();
++	} while (atomic_long_read_acquire(&p->scx.ops_state) == opss);
++}
++
++/**
++ * ops_cpu_valid - Verify a cpu number
++ * @cpu: cpu number which came from a BPF ops
++ * @where: extra information reported on error
++ *
++ * @cpu is a cpu number which came from the BPF scheduler and can be any value.
++ * Verify that it is in range and one of the possible cpus. If invalid, trigger
++ * an ops error.
++ */
++static bool ops_cpu_valid(s32 cpu, const char *where)
++{
++	if (likely(cpu >= 0 && cpu < nr_cpu_ids && cpu_possible(cpu))) {
++		return true;
++	} else {
++		scx_ops_error("invalid CPU %d%s%s", cpu,
++			      where ? " " : "", where ?: "");
++		return false;
++	}
++}
++
++/**
++ * ops_sanitize_err - Sanitize a -errno value
++ * @ops_name: operation to blame on failure
++ * @err: -errno value to sanitize
++ *
++ * Verify @err is a valid -errno. If not, trigger scx_ops_error() and return
++ * -%EPROTO. This is necessary because returning a rogue -errno up the chain can
++ * cause misbehaviors. For an example, a large negative return from
++ * ops.init_task() triggers an oops when passed up the call chain because the
++ * value fails IS_ERR() test after being encoded with ERR_PTR() and then is
++ * handled as a pointer.
++ */
++static int ops_sanitize_err(const char *ops_name, s32 err)
++{
++	if (err < 0 && err >= -MAX_ERRNO)
++		return err;
++
++	scx_ops_error("ops.%s() returned an invalid errno %d", ops_name, err);
++	return -EPROTO;
++}
++
++static void run_deferred(struct rq *rq)
++{
++	process_ddsp_deferred_locals(rq);
++}
++
++#ifdef CONFIG_SMP
++static void deferred_bal_cb_workfn(struct rq *rq)
++{
++	run_deferred(rq);
++}
++#endif
++
++static void deferred_irq_workfn(struct irq_work *irq_work)
++{
++	struct rq *rq = container_of(irq_work, struct rq, scx.deferred_irq_work);
++
++	raw_spin_rq_lock(rq);
++	run_deferred(rq);
++	raw_spin_rq_unlock(rq);
++}
++
++/**
++ * schedule_deferred - Schedule execution of deferred actions on an rq
++ * @rq: target rq
++ *
++ * Schedule execution of deferred actions on @rq. Must be called with @rq
++ * locked. Deferred actions are executed with @rq locked but unpinned, and thus
++ * can unlock @rq to e.g. migrate tasks to other rqs.
++ */
++static void schedule_deferred(struct rq *rq)
++{
++	lockdep_assert_rq_held(rq);
++
++#ifdef CONFIG_SMP
++	/*
++	 * If in the middle of waking up a task, task_woken_scx() will be called
++	 * afterwards which will then run the deferred actions, no need to
++	 * schedule anything.
++	 */
++	if (rq->scx.flags & SCX_RQ_IN_WAKEUP)
++		return;
++
++	/*
++	 * If in balance, the balance callbacks will be called before rq lock is
++	 * released. Schedule one.
++	 */
++	if (rq->scx.flags & SCX_RQ_IN_BALANCE) {
++		queue_balance_callback(rq, &rq->scx.deferred_bal_cb,
++				       deferred_bal_cb_workfn);
++		return;
++	}
++#endif
++	/*
++	 * No scheduler hooks available. Queue an irq work. They are executed on
++	 * IRQ re-enable which may take a bit longer than the scheduler hooks.
++	 * The above WAKEUP and BALANCE paths should cover most of the cases and
++	 * the time to IRQ re-enable shouldn't be long.
++	 */
++	irq_work_queue(&rq->scx.deferred_irq_work);
++}
++
++/**
++ * touch_core_sched - Update timestamp used for core-sched task ordering
++ * @rq: rq to read clock from, must be locked
++ * @p: task to update the timestamp for
++ *
++ * Update @p->scx.core_sched_at timestamp. This is used by scx_prio_less() to
++ * implement global or local-DSQ FIFO ordering for core-sched. Should be called
++ * when a task becomes runnable and its turn on the CPU ends (e.g. slice
++ * exhaustion).
++ */
++static void touch_core_sched(struct rq *rq, struct task_struct *p)
++{
++	lockdep_assert_rq_held(rq);
++
++#ifdef CONFIG_SCHED_CORE
++	/*
++	 * It's okay to update the timestamp spuriously. Use
++	 * sched_core_disabled() which is cheaper than enabled().
++	 *
++	 * As this is used to determine ordering between tasks of sibling CPUs,
++	 * it may be better to use per-core dispatch sequence instead.
++	 */
++	if (!sched_core_disabled())
++		p->scx.core_sched_at = sched_clock_cpu(cpu_of(rq));
++#endif
++}
++
++/**
++ * touch_core_sched_dispatch - Update core-sched timestamp on dispatch
++ * @rq: rq to read clock from, must be locked
++ * @p: task being dispatched
++ *
++ * If the BPF scheduler implements custom core-sched ordering via
++ * ops.core_sched_before(), @p->scx.core_sched_at is used to implement FIFO
++ * ordering within each local DSQ. This function is called from dispatch paths
++ * and updates @p->scx.core_sched_at if custom core-sched ordering is in effect.
++ */
++static void touch_core_sched_dispatch(struct rq *rq, struct task_struct *p)
++{
++	lockdep_assert_rq_held(rq);
++
++#ifdef CONFIG_SCHED_CORE
++	if (SCX_HAS_OP(core_sched_before))
++		touch_core_sched(rq, p);
++#endif
++}
++
++static void update_curr_scx(struct rq *rq)
++{
++	struct task_struct *curr = rq->curr;
++	s64 delta_exec;
++
++	delta_exec = update_curr_common(rq);
++	if (unlikely(delta_exec <= 0))
++		return;
++
++	if (curr->scx.slice != SCX_SLICE_INF) {
++		curr->scx.slice -= min_t(u64, curr->scx.slice, delta_exec);
++		if (!curr->scx.slice)
++			touch_core_sched(rq, curr);
++	}
++}
++
++static bool scx_dsq_priq_less(struct rb_node *node_a,
++			      const struct rb_node *node_b)
++{
++	const struct task_struct *a =
++		container_of(node_a, struct task_struct, scx.dsq_priq);
++	const struct task_struct *b =
++		container_of(node_b, struct task_struct, scx.dsq_priq);
++
++	return time_before64(a->scx.dsq_vtime, b->scx.dsq_vtime);
++}
++
++static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
++{
++	/* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */
++	WRITE_ONCE(dsq->nr, dsq->nr + delta);
++}
++
++static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
++			     u64 enq_flags)
++{
++	bool is_local = dsq->id == SCX_DSQ_LOCAL;
++
++	WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_list.node));
++	WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) ||
++		     !RB_EMPTY_NODE(&p->scx.dsq_priq));
++
++	if (!is_local) {
++		raw_spin_lock(&dsq->lock);
++		if (unlikely(dsq->id == SCX_DSQ_INVALID)) {
++			scx_ops_error("attempting to dispatch to a destroyed dsq");
++			/* fall back to the global dsq */
++			raw_spin_unlock(&dsq->lock);
++			dsq = find_global_dsq(p);
++			raw_spin_lock(&dsq->lock);
++		}
++	}
++
++	if (unlikely((dsq->id & SCX_DSQ_FLAG_BUILTIN) &&
++		     (enq_flags & SCX_ENQ_DSQ_PRIQ))) {
++		/*
++		 * SCX_DSQ_LOCAL and SCX_DSQ_GLOBAL DSQs always consume from
++		 * their FIFO queues. To avoid confusion and accidentally
++		 * starving vtime-dispatched tasks by FIFO-dispatched tasks, we
++		 * disallow any internal DSQ from doing vtime ordering of
++		 * tasks.
++		 */
++		scx_ops_error("cannot use vtime ordering for built-in DSQs");
++		enq_flags &= ~SCX_ENQ_DSQ_PRIQ;
++	}
++
++	if (enq_flags & SCX_ENQ_DSQ_PRIQ) {
++		struct rb_node *rbp;
++
++		/*
++		 * A PRIQ DSQ shouldn't be using FIFO enqueueing. As tasks are
++		 * linked to both the rbtree and list on PRIQs, this can only be
++		 * tested easily when adding the first task.
++		 */
++		if (unlikely(RB_EMPTY_ROOT(&dsq->priq) &&
++			     nldsq_next_task(dsq, NULL, false)))
++			scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks",
++				      dsq->id);
++
++		p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ;
++		rb_add(&p->scx.dsq_priq, &dsq->priq, scx_dsq_priq_less);
++
++		/*
++		 * Find the previous task and insert after it on the list so
++		 * that @dsq->list is vtime ordered.
++		 */
++		rbp = rb_prev(&p->scx.dsq_priq);
++		if (rbp) {
++			struct task_struct *prev =
++				container_of(rbp, struct task_struct,
++					     scx.dsq_priq);
++			list_add(&p->scx.dsq_list.node, &prev->scx.dsq_list.node);
++		} else {
++			list_add(&p->scx.dsq_list.node, &dsq->list);
++		}
++	} else {
++		/* a FIFO DSQ shouldn't be using PRIQ enqueuing */
++		if (unlikely(!RB_EMPTY_ROOT(&dsq->priq)))
++			scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks",
++				      dsq->id);
++
++		if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT))
++			list_add(&p->scx.dsq_list.node, &dsq->list);
++		else
++			list_add_tail(&p->scx.dsq_list.node, &dsq->list);
++	}
++
++	/* seq records the order tasks are queued, used by BPF DSQ iterator */
++	dsq->seq++;
++	p->scx.dsq_seq = dsq->seq;
++
++	dsq_mod_nr(dsq, 1);
++	p->scx.dsq = dsq;
++
++	/*
++	 * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
++	 * direct dispatch path, but we clear them here because the direct
++	 * dispatch verdict may be overridden on the enqueue path during e.g.
++	 * bypass.
++	 */
++	p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
++	p->scx.ddsp_enq_flags = 0;
++
++	/*
++	 * We're transitioning out of QUEUEING or DISPATCHING. store_release to
++	 * match waiters' load_acquire.
++	 */
++	if (enq_flags & SCX_ENQ_CLEAR_OPSS)
++		atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
++
++	if (is_local) {
++		struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
++		bool preempt = false;
++
++		if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
++		    rq->curr->sched_class == &ext_sched_class) {
++			rq->curr->scx.slice = 0;
++			preempt = true;
++		}
++
++		if (preempt || sched_class_above(&ext_sched_class,
++						 rq->curr->sched_class))
++			resched_curr(rq);
++	} else {
++		raw_spin_unlock(&dsq->lock);
++	}
++}
++
++static void task_unlink_from_dsq(struct task_struct *p,
++				 struct scx_dispatch_q *dsq)
++{
++	WARN_ON_ONCE(list_empty(&p->scx.dsq_list.node));
++
++	if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) {
++		rb_erase(&p->scx.dsq_priq, &dsq->priq);
++		RB_CLEAR_NODE(&p->scx.dsq_priq);
++		p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ;
++	}
++
++	list_del_init(&p->scx.dsq_list.node);
++	dsq_mod_nr(dsq, -1);
++}
++
++static void dispatch_dequeue(struct rq *rq, struct task_struct *p)
++{
++	struct scx_dispatch_q *dsq = p->scx.dsq;
++	bool is_local = dsq == &rq->scx.local_dsq;
++
++	if (!dsq) {
++		/*
++		 * If !dsq && on-list, @p is on @rq's ddsp_deferred_locals.
++		 * Unlinking is all that's needed to cancel.
++		 */
++		if (unlikely(!list_empty(&p->scx.dsq_list.node)))
++			list_del_init(&p->scx.dsq_list.node);
++
++		/*
++		 * When dispatching directly from the BPF scheduler to a local
++		 * DSQ, the task isn't associated with any DSQ but
++		 * @p->scx.holding_cpu may be set under the protection of
++		 * %SCX_OPSS_DISPATCHING.
++		 */
++		if (p->scx.holding_cpu >= 0)
++			p->scx.holding_cpu = -1;
++
++		return;
++	}
++
++	if (!is_local)
++		raw_spin_lock(&dsq->lock);
++
++	/*
++	 * Now that we hold @dsq->lock, @p->holding_cpu and @p->scx.dsq_* can't
++	 * change underneath us.
++	*/
++	if (p->scx.holding_cpu < 0) {
++		/* @p must still be on @dsq, dequeue */
++		task_unlink_from_dsq(p, dsq);
++	} else {
++		/*
++		 * We're racing against dispatch_to_local_dsq() which already
++		 * removed @p from @dsq and set @p->scx.holding_cpu. Clear the
++		 * holding_cpu which tells dispatch_to_local_dsq() that it lost
++		 * the race.
++		 */
++		WARN_ON_ONCE(!list_empty(&p->scx.dsq_list.node));
++		p->scx.holding_cpu = -1;
++	}
++	p->scx.dsq = NULL;
++
++	if (!is_local)
++		raw_spin_unlock(&dsq->lock);
++}
++
++static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
++						    struct task_struct *p)
++{
++	struct scx_dispatch_q *dsq;
++
++	if (dsq_id == SCX_DSQ_LOCAL)
++		return &rq->scx.local_dsq;
++
++	if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
++		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
++
++		if (!ops_cpu_valid(cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
++			return find_global_dsq(p);
++
++		return &cpu_rq(cpu)->scx.local_dsq;
++	}
++
++	if (dsq_id == SCX_DSQ_GLOBAL)
++		dsq = find_global_dsq(p);
++	else
++		dsq = find_user_dsq(dsq_id);
++
++	if (unlikely(!dsq)) {
++		scx_ops_error("non-existent DSQ 0x%llx for %s[%d]",
++			      dsq_id, p->comm, p->pid);
++		return find_global_dsq(p);
++	}
++
++	return dsq;
++}
++
++static void mark_direct_dispatch(struct task_struct *ddsp_task,
++				 struct task_struct *p, u64 dsq_id,
++				 u64 enq_flags)
++{
++	/*
++	 * Mark that dispatch already happened from ops.select_cpu() or
++	 * ops.enqueue() by spoiling direct_dispatch_task with a non-NULL value
++	 * which can never match a valid task pointer.
++	 */
++	__this_cpu_write(direct_dispatch_task, ERR_PTR(-ESRCH));
++
++	/* @p must match the task on the enqueue path */
++	if (unlikely(p != ddsp_task)) {
++		if (IS_ERR(ddsp_task))
++			scx_ops_error("%s[%d] already direct-dispatched",
++				      p->comm, p->pid);
++		else
++			scx_ops_error("scheduling for %s[%d] but trying to direct-dispatch %s[%d]",
++				      ddsp_task->comm, ddsp_task->pid,
++				      p->comm, p->pid);
++		return;
++	}
++
++	WARN_ON_ONCE(p->scx.ddsp_dsq_id != SCX_DSQ_INVALID);
++	WARN_ON_ONCE(p->scx.ddsp_enq_flags);
++
++	p->scx.ddsp_dsq_id = dsq_id;
++	p->scx.ddsp_enq_flags = enq_flags;
++}
++
++static void direct_dispatch(struct task_struct *p, u64 enq_flags)
++{
++	struct rq *rq = task_rq(p);
++	struct scx_dispatch_q *dsq =
++		find_dsq_for_dispatch(rq, p->scx.ddsp_dsq_id, p);
++
++	touch_core_sched_dispatch(rq, p);
++
++	p->scx.ddsp_enq_flags |= enq_flags;
++
++	/*
++	 * We are in the enqueue path with @rq locked and pinned, and thus can't
++	 * double lock a remote rq and enqueue to its local DSQ. For
++	 * DSQ_LOCAL_ON verdicts targeting the local DSQ of a remote CPU, defer
++	 * the enqueue so that it's executed when @rq can be unlocked.
++	 */
++	if (dsq->id == SCX_DSQ_LOCAL && dsq != &rq->scx.local_dsq) {
++		unsigned long opss;
++
++		opss = atomic_long_read(&p->scx.ops_state) & SCX_OPSS_STATE_MASK;
++
++		switch (opss & SCX_OPSS_STATE_MASK) {
++		case SCX_OPSS_NONE:
++			break;
++		case SCX_OPSS_QUEUEING:
++			/*
++			 * As @p was never passed to the BPF side, _release is
++			 * not strictly necessary. Still do it for consistency.
++			 */
++			atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
++			break;
++		default:
++			WARN_ONCE(true, "sched_ext: %s[%d] has invalid ops state 0x%lx in direct_dispatch()",
++				  p->comm, p->pid, opss);
++			atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
++			break;
++		}
++
++		WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_list.node));
++		list_add_tail(&p->scx.dsq_list.node,
++			      &rq->scx.ddsp_deferred_locals);
++		schedule_deferred(rq);
++		return;
++	}
++
++	dispatch_enqueue(dsq, p, p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
++}
++
++static bool scx_rq_online(struct rq *rq)
++{
++	/*
++	 * Test both cpu_active() and %SCX_RQ_ONLINE. %SCX_RQ_ONLINE indicates
++	 * the online state as seen from the BPF scheduler. cpu_active() test
++	 * guarantees that, if this function returns %true, %SCX_RQ_ONLINE will
++	 * stay set until the current scheduling operation is complete even if
++	 * we aren't locking @rq.
++	 */
++	return likely((rq->scx.flags & SCX_RQ_ONLINE) && cpu_active(cpu_of(rq)));
++}
++
++static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
++			    int sticky_cpu)
++{
++	struct task_struct **ddsp_taskp;
++	unsigned long qseq;
++
++	WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED));
++
++	/* rq migration */
++	if (sticky_cpu == cpu_of(rq))
++		goto local_norefill;
++
++	/*
++	 * If !scx_rq_online(), we already told the BPF scheduler that the CPU
++	 * is offline and are just running the hotplug path. Don't bother the
++	 * BPF scheduler.
++	 */
++	if (!scx_rq_online(rq))
++		goto local;
++
++	if (scx_rq_bypassing(rq)) {
++		if (enq_flags & SCX_ENQ_LAST)
++			goto local;
++		else
++			goto global;
++	}
++
++	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
++		goto direct;
++
++	/* see %SCX_OPS_ENQ_EXITING */
++	if (!static_branch_unlikely(&scx_ops_enq_exiting) &&
++	    unlikely(p->flags & PF_EXITING))
++		goto local;
++
++	/* see %SCX_OPS_ENQ_LAST */
++	if (!static_branch_unlikely(&scx_ops_enq_last) &&
++	    (enq_flags & SCX_ENQ_LAST))
++		goto local;
++
++	if (!SCX_HAS_OP(enqueue))
++		goto global;
++
++	/* DSQ bypass didn't trigger, enqueue on the BPF scheduler */
++	qseq = rq->scx.ops_qseq++ << SCX_OPSS_QSEQ_SHIFT;
++
++	WARN_ON_ONCE(atomic_long_read(&p->scx.ops_state) != SCX_OPSS_NONE);
++	atomic_long_set(&p->scx.ops_state, SCX_OPSS_QUEUEING | qseq);
++
++	ddsp_taskp = this_cpu_ptr(&direct_dispatch_task);
++	WARN_ON_ONCE(*ddsp_taskp);
++	*ddsp_taskp = p;
++
++	SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
++
++	*ddsp_taskp = NULL;
++	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
++		goto direct;
++
++	/*
++	 * If not directly dispatched, QUEUEING isn't clear yet and dispatch or
++	 * dequeue may be waiting. The store_release matches their load_acquire.
++	 */
++	atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_QUEUED | qseq);
++	return;
++
++direct:
++	direct_dispatch(p, enq_flags);
++	return;
++
++local:
++	/*
++	 * For task-ordering, slice refill must be treated as implying the end
++	 * of the current slice. Otherwise, the longer @p stays on the CPU, the
++	 * higher priority it becomes from scx_prio_less()'s POV.
++	 */
++	touch_core_sched(rq, p);
++	p->scx.slice = SCX_SLICE_DFL;
++local_norefill:
++	dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
++	return;
++
++global:
++	touch_core_sched(rq, p);	/* see the comment in local: */
++	p->scx.slice = SCX_SLICE_DFL;
++	dispatch_enqueue(find_global_dsq(p), p, enq_flags);
++}
++
++static bool task_runnable(const struct task_struct *p)
++{
++	return !list_empty(&p->scx.runnable_node);
++}
++
++static void set_task_runnable(struct rq *rq, struct task_struct *p)
++{
++	lockdep_assert_rq_held(rq);
++
++	if (p->scx.flags & SCX_TASK_RESET_RUNNABLE_AT) {
++		p->scx.runnable_at = jiffies;
++		p->scx.flags &= ~SCX_TASK_RESET_RUNNABLE_AT;
++	}
++
++	/*
++	 * list_add_tail() must be used. scx_ops_bypass() depends on tasks being
++	 * appened to the runnable_list.
++	 */
++	list_add_tail(&p->scx.runnable_node, &rq->scx.runnable_list);
++}
++
++static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at)
++{
++	list_del_init(&p->scx.runnable_node);
++	if (reset_runnable_at)
++		p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT;
++}
++
++static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags)
++{
++	int sticky_cpu = p->scx.sticky_cpu;
++
++	if (enq_flags & ENQUEUE_WAKEUP)
++		rq->scx.flags |= SCX_RQ_IN_WAKEUP;
++
++	enq_flags |= rq->scx.extra_enq_flags;
++
++	if (sticky_cpu >= 0)
++		p->scx.sticky_cpu = -1;
++
++	/*
++	 * Restoring a running task will be immediately followed by
++	 * set_next_task_scx() which expects the task to not be on the BPF
++	 * scheduler as tasks can only start running through local DSQs. Force
++	 * direct-dispatch into the local DSQ by setting the sticky_cpu.
++	 */
++	if (unlikely(enq_flags & ENQUEUE_RESTORE) && task_current(rq, p))
++		sticky_cpu = cpu_of(rq);
++
++	if (p->scx.flags & SCX_TASK_QUEUED) {
++		WARN_ON_ONCE(!task_runnable(p));
++		goto out;
++	}
++
++	set_task_runnable(rq, p);
++	p->scx.flags |= SCX_TASK_QUEUED;
++	rq->scx.nr_running++;
++	add_nr_running(rq, 1);
++
++	if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p))
++		SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
++
++	if (enq_flags & SCX_ENQ_WAKEUP)
++		touch_core_sched(rq, p);
++
++	do_enqueue_task(rq, p, enq_flags, sticky_cpu);
++out:
++	rq->scx.flags &= ~SCX_RQ_IN_WAKEUP;
++}
++
++static void ops_dequeue(struct task_struct *p, u64 deq_flags)
++{
++	unsigned long opss;
++
++	/* dequeue is always temporary, don't reset runnable_at */
++	clr_task_runnable(p, false);
++
++	/* acquire ensures that we see the preceding updates on QUEUED */
++	opss = atomic_long_read_acquire(&p->scx.ops_state);
++
++	switch (opss & SCX_OPSS_STATE_MASK) {
++	case SCX_OPSS_NONE:
++		break;
++	case SCX_OPSS_QUEUEING:
++		/*
++		 * QUEUEING is started and finished while holding @p's rq lock.
++		 * As we're holding the rq lock now, we shouldn't see QUEUEING.
++		 */
++		BUG();
++	case SCX_OPSS_QUEUED:
++		if (SCX_HAS_OP(dequeue))
++			SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
++
++		if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
++					    SCX_OPSS_NONE))
++			break;
++		fallthrough;
++	case SCX_OPSS_DISPATCHING:
++		/*
++		 * If @p is being dispatched from the BPF scheduler to a DSQ,
++		 * wait for the transfer to complete so that @p doesn't get
++		 * added to its DSQ after dequeueing is complete.
++		 *
++		 * As we're waiting on DISPATCHING with the rq locked, the
++		 * dispatching side shouldn't try to lock the rq while
++		 * DISPATCHING is set. See dispatch_to_local_dsq().
++		 *
++		 * DISPATCHING shouldn't have qseq set and control can reach
++		 * here with NONE @opss from the above QUEUED case block.
++		 * Explicitly wait on %SCX_OPSS_DISPATCHING instead of @opss.
++		 */
++		wait_ops_state(p, SCX_OPSS_DISPATCHING);
++		BUG_ON(atomic_long_read(&p->scx.ops_state) != SCX_OPSS_NONE);
++		break;
++	}
++}
++
++static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
++{
++	if (!(p->scx.flags & SCX_TASK_QUEUED)) {
++		WARN_ON_ONCE(task_runnable(p));
++		return;
++	}
++
++	ops_dequeue(p, deq_flags);
++
++	/*
++	 * A currently running task which is going off @rq first gets dequeued
++	 * and then stops running. As we want running <-> stopping transitions
++	 * to be contained within runnable <-> quiescent transitions, trigger
++	 * ->stopping() early here instead of in put_prev_task_scx().
++	 *
++	 * @p may go through multiple stopping <-> running transitions between
++	 * here and put_prev_task_scx() if task attribute changes occur while
++	 * balance_scx() leaves @rq unlocked. However, they don't contain any
++	 * information meaningful to the BPF scheduler and can be suppressed by
++	 * skipping the callbacks if the task is !QUEUED.
++	 */
++	if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
++		update_curr_scx(rq);
++		SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
++	}
++
++	if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p))
++		SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
++
++	if (deq_flags & SCX_DEQ_SLEEP)
++		p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
++	else
++		p->scx.flags &= ~SCX_TASK_DEQD_FOR_SLEEP;
++
++	p->scx.flags &= ~SCX_TASK_QUEUED;
++	rq->scx.nr_running--;
++	sub_nr_running(rq, 1);
++
++	dispatch_dequeue(rq, p);
++}
++
++static void yield_task_scx(struct rq *rq)
++{
++	struct task_struct *p = rq->curr;
++
++	if (SCX_HAS_OP(yield))
++		SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
++	else
++		p->scx.slice = 0;
++}
++
++static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
++{
++	struct task_struct *from = rq->curr;
++
++	if (SCX_HAS_OP(yield))
++		return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
++	else
++		return false;
++}
++
++static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
++					 struct scx_dispatch_q *src_dsq,
++					 struct rq *dst_rq)
++{
++	struct scx_dispatch_q *dst_dsq = &dst_rq->scx.local_dsq;
++
++	/* @dsq is locked and @p is on @dst_rq */
++	lockdep_assert_held(&src_dsq->lock);
++	lockdep_assert_rq_held(dst_rq);
++
++	WARN_ON_ONCE(p->scx.holding_cpu >= 0);
++
++	if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT))
++		list_add(&p->scx.dsq_list.node, &dst_dsq->list);
++	else
++		list_add_tail(&p->scx.dsq_list.node, &dst_dsq->list);
++
++	dsq_mod_nr(dst_dsq, 1);
++	p->scx.dsq = dst_dsq;
++}
++
++#ifdef CONFIG_SMP
++/**
++ * move_remote_task_to_local_dsq - Move a task from a foreign rq to a local DSQ
++ * @p: task to move
++ * @enq_flags: %SCX_ENQ_*
++ * @src_rq: rq to move the task from, locked on entry, released on return
++ * @dst_rq: rq to move the task into, locked on return
++ *
++ * Move @p which is currently on @src_rq to @dst_rq's local DSQ.
++ */
++static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
++					  struct rq *src_rq, struct rq *dst_rq)
++{
++	lockdep_assert_rq_held(src_rq);
++
++	/* the following marks @p MIGRATING which excludes dequeue */
++	deactivate_task(src_rq, p, 0);
++	set_task_cpu(p, cpu_of(dst_rq));
++	p->scx.sticky_cpu = cpu_of(dst_rq);
++
++	raw_spin_rq_unlock(src_rq);
++	raw_spin_rq_lock(dst_rq);
++
++	/*
++	 * We want to pass scx-specific enq_flags but activate_task() will
++	 * truncate the upper 32 bit. As we own @rq, we can pass them through
++	 * @rq->scx.extra_enq_flags instead.
++	 */
++	WARN_ON_ONCE(!cpumask_test_cpu(cpu_of(dst_rq), p->cpus_ptr));
++	WARN_ON_ONCE(dst_rq->scx.extra_enq_flags);
++	dst_rq->scx.extra_enq_flags = enq_flags;
++	activate_task(dst_rq, p, 0);
++	dst_rq->scx.extra_enq_flags = 0;
++}
++
++/*
++ * Similar to kernel/sched/core.c::is_cpu_allowed(). However, there are two
++ * differences:
++ *
++ * - is_cpu_allowed() asks "Can this task run on this CPU?" while
++ *   task_can_run_on_remote_rq() asks "Can the BPF scheduler migrate the task to
++ *   this CPU?".
++ *
++ *   While migration is disabled, is_cpu_allowed() has to say "yes" as the task
++ *   must be allowed to finish on the CPU that it's currently on regardless of
++ *   the CPU state. However, task_can_run_on_remote_rq() must say "no" as the
++ *   BPF scheduler shouldn't attempt to migrate a task which has migration
++ *   disabled.
++ *
++ * - The BPF scheduler is bypassed while the rq is offline and we can always say
++ *   no to the BPF scheduler initiated migrations while offline.
++ */
++static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
++				      bool trigger_error)
++{
++	int cpu = cpu_of(rq);
++
++	/*
++	 * We don't require the BPF scheduler to avoid dispatching to offline
++	 * CPUs mostly for convenience but also because CPUs can go offline
++	 * between scx_bpf_dispatch() calls and here. Trigger error iff the
++	 * picked CPU is outside the allowed mask.
++	 */
++	if (!task_allowed_on_cpu(p, cpu)) {
++		if (trigger_error)
++			scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]",
++				      cpu_of(rq), p->comm, p->pid);
++		return false;
++	}
++
++	if (unlikely(is_migration_disabled(p)))
++		return false;
++
++	if (!scx_rq_online(rq))
++		return false;
++
++	return true;
++}
++
++/**
++ * unlink_dsq_and_lock_src_rq() - Unlink task from its DSQ and lock its task_rq
++ * @p: target task
++ * @dsq: locked DSQ @p is currently on
++ * @src_rq: rq @p is currently on, stable with @dsq locked
++ *
++ * Called with @dsq locked but no rq's locked. We want to move @p to a different
++ * DSQ, including any local DSQ, but are not locking @src_rq. Locking @src_rq is
++ * required when transferring into a local DSQ. Even when transferring into a
++ * non-local DSQ, it's better to use the same mechanism to protect against
++ * dequeues and maintain the invariant that @p->scx.dsq can only change while
++ * @src_rq is locked, which e.g. scx_dump_task() depends on.
++ *
++ * We want to grab @src_rq but that can deadlock if we try while locking @dsq,
++ * so we want to unlink @p from @dsq, drop its lock and then lock @src_rq. As
++ * this may race with dequeue, which can't drop the rq lock or fail, do a little
++ * dancing from our side.
++ *
++ * @p->scx.holding_cpu is set to this CPU before @dsq is unlocked. If @p gets
++ * dequeued after we unlock @dsq but before locking @src_rq, the holding_cpu
++ * would be cleared to -1. While other cpus may have updated it to different
++ * values afterwards, as this operation can't be preempted or recurse, the
++ * holding_cpu can never become this CPU again before we're done. Thus, we can
++ * tell whether we lost to dequeue by testing whether the holding_cpu still
++ * points to this CPU. See dispatch_dequeue() for the counterpart.
++ *
++ * On return, @dsq is unlocked and @src_rq is locked. Returns %true if @p is
++ * still valid. %false if lost to dequeue.
++ */
++static bool unlink_dsq_and_lock_src_rq(struct task_struct *p,
++				       struct scx_dispatch_q *dsq,
++				       struct rq *src_rq)
++{
++	s32 cpu = raw_smp_processor_id();
++
++	lockdep_assert_held(&dsq->lock);
++
++	WARN_ON_ONCE(p->scx.holding_cpu >= 0);
++	task_unlink_from_dsq(p, dsq);
++	p->scx.holding_cpu = cpu;
++
++	raw_spin_unlock(&dsq->lock);
++	raw_spin_rq_lock(src_rq);
++
++	/* task_rq couldn't have changed if we're still the holding cpu */
++	return likely(p->scx.holding_cpu == cpu) &&
++		!WARN_ON_ONCE(src_rq != task_rq(p));
++}
++
++static bool consume_remote_task(struct rq *this_rq, struct task_struct *p,
++				struct scx_dispatch_q *dsq, struct rq *src_rq)
++{
++	raw_spin_rq_unlock(this_rq);
++
++	if (unlink_dsq_and_lock_src_rq(p, dsq, src_rq)) {
++		move_remote_task_to_local_dsq(p, 0, src_rq, this_rq);
++		return true;
++	} else {
++		raw_spin_rq_unlock(src_rq);
++		raw_spin_rq_lock(this_rq);
++		return false;
++	}
++}
++#else	/* CONFIG_SMP */
++static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); }
++static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { return false; }
++static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; }
++#endif	/* CONFIG_SMP */
++
++static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
++{
++	struct task_struct *p;
++retry:
++	/*
++	 * The caller can't expect to successfully consume a task if the task's
++	 * addition to @dsq isn't guaranteed to be visible somehow. Test
++	 * @dsq->list without locking and skip if it seems empty.
++	 */
++	if (list_empty(&dsq->list))
++		return false;
++
++	raw_spin_lock(&dsq->lock);
++
++	nldsq_for_each_task(p, dsq) {
++		struct rq *task_rq = task_rq(p);
++
++		if (rq == task_rq) {
++			task_unlink_from_dsq(p, dsq);
++			move_local_task_to_local_dsq(p, 0, dsq, rq);
++			raw_spin_unlock(&dsq->lock);
++			return true;
++		}
++
++		if (task_can_run_on_remote_rq(p, rq, false)) {
++			if (likely(consume_remote_task(rq, p, dsq, task_rq)))
++				return true;
++			goto retry;
++		}
++	}
++
++	raw_spin_unlock(&dsq->lock);
++	return false;
++}
++
++static bool consume_global_dsq(struct rq *rq)
++{
++	int node = cpu_to_node(cpu_of(rq));
++
++	return consume_dispatch_q(rq, global_dsqs[node]);
++}
++
++/**
++ * dispatch_to_local_dsq - Dispatch a task to a local dsq
++ * @rq: current rq which is locked
++ * @dst_dsq: destination DSQ
++ * @p: task to dispatch
++ * @enq_flags: %SCX_ENQ_*
++ *
++ * We're holding @rq lock and want to dispatch @p to @dst_dsq which is a local
++ * DSQ. This function performs all the synchronization dancing needed because
++ * local DSQs are protected with rq locks.
++ *
++ * The caller must have exclusive ownership of @p (e.g. through
++ * %SCX_OPSS_DISPATCHING).
++ */
++static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
++				  struct task_struct *p, u64 enq_flags)
++{
++	struct rq *src_rq = task_rq(p);
++	struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
++
++	/*
++	 * We're synchronized against dequeue through DISPATCHING. As @p can't
++	 * be dequeued, its task_rq and cpus_allowed are stable too.
++	 *
++	 * If dispatching to @rq that @p is already on, no lock dancing needed.
++	 */
++	if (rq == src_rq && rq == dst_rq) {
++		dispatch_enqueue(dst_dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
++		return;
++	}
++
++#ifdef CONFIG_SMP
++	if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
++		dispatch_enqueue(find_global_dsq(p), p,
++				 enq_flags | SCX_ENQ_CLEAR_OPSS);
++		return;
++	}
++
++	/*
++	 * @p is on a possibly remote @src_rq which we need to lock to move the
++	 * task. If dequeue is in progress, it'd be locking @src_rq and waiting
++	 * on DISPATCHING, so we can't grab @src_rq lock while holding
++	 * DISPATCHING.
++	 *
++	 * As DISPATCHING guarantees that @p is wholly ours, we can pretend that
++	 * we're moving from a DSQ and use the same mechanism - mark the task
++	 * under transfer with holding_cpu, release DISPATCHING and then follow
++	 * the same protocol. See unlink_dsq_and_lock_src_rq().
++	 */
++	p->scx.holding_cpu = raw_smp_processor_id();
++
++	/* store_release ensures that dequeue sees the above */
++	atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
++
++	/* switch to @src_rq lock */
++	if (rq != src_rq) {
++		raw_spin_rq_unlock(rq);
++		raw_spin_rq_lock(src_rq);
++	}
++
++	/* task_rq couldn't have changed if we're still the holding cpu */
++	if (likely(p->scx.holding_cpu == raw_smp_processor_id()) &&
++	    !WARN_ON_ONCE(src_rq != task_rq(p))) {
++		/*
++		 * If @p is staying on the same rq, there's no need to go
++		 * through the full deactivate/activate cycle. Optimize by
++		 * abbreviating move_remote_task_to_local_dsq().
++		 */
++		if (src_rq == dst_rq) {
++			p->scx.holding_cpu = -1;
++			dispatch_enqueue(&dst_rq->scx.local_dsq, p, enq_flags);
++		} else {
++			move_remote_task_to_local_dsq(p, enq_flags,
++						      src_rq, dst_rq);
++		}
++
++		/* if the destination CPU is idle, wake it up */
++		if (sched_class_above(p->sched_class, dst_rq->curr->sched_class))
++			resched_curr(dst_rq);
++	}
++
++	/* switch back to @rq lock */
++	if (rq != dst_rq) {
++		raw_spin_rq_unlock(dst_rq);
++		raw_spin_rq_lock(rq);
++	}
++#else	/* CONFIG_SMP */
++	BUG();	/* control can not reach here on UP */
++#endif	/* CONFIG_SMP */
++}
++
++/**
++ * finish_dispatch - Asynchronously finish dispatching a task
++ * @rq: current rq which is locked
++ * @p: task to finish dispatching
++ * @qseq_at_dispatch: qseq when @p started getting dispatched
++ * @dsq_id: destination DSQ ID
++ * @enq_flags: %SCX_ENQ_*
++ *
++ * Dispatching to local DSQs may need to wait for queueing to complete or
++ * require rq lock dancing. As we don't wanna do either while inside
++ * ops.dispatch() to avoid locking order inversion, we split dispatching into
++ * two parts. scx_bpf_dispatch() which is called by ops.dispatch() records the
++ * task and its qseq. Once ops.dispatch() returns, this function is called to
++ * finish up.
++ *
++ * There is no guarantee that @p is still valid for dispatching or even that it
++ * was valid in the first place. Make sure that the task is still owned by the
++ * BPF scheduler and claim the ownership before dispatching.
++ */
++static void finish_dispatch(struct rq *rq, struct task_struct *p,
++			    unsigned long qseq_at_dispatch,
++			    u64 dsq_id, u64 enq_flags)
++{
++	struct scx_dispatch_q *dsq;
++	unsigned long opss;
++
++	touch_core_sched_dispatch(rq, p);
++retry:
++	/*
++	 * No need for _acquire here. @p is accessed only after a successful
++	 * try_cmpxchg to DISPATCHING.
++	 */
++	opss = atomic_long_read(&p->scx.ops_state);
++
++	switch (opss & SCX_OPSS_STATE_MASK) {
++	case SCX_OPSS_DISPATCHING:
++	case SCX_OPSS_NONE:
++		/* someone else already got to it */
++		return;
++	case SCX_OPSS_QUEUED:
++		/*
++		 * If qseq doesn't match, @p has gone through at least one
++		 * dispatch/dequeue and re-enqueue cycle between
++		 * scx_bpf_dispatch() and here and we have no claim on it.
++		 */
++		if ((opss & SCX_OPSS_QSEQ_MASK) != qseq_at_dispatch)
++			return;
++
++		/*
++		 * While we know @p is accessible, we don't yet have a claim on
++		 * it - the BPF scheduler is allowed to dispatch tasks
++		 * spuriously and there can be a racing dequeue attempt. Let's
++		 * claim @p by atomically transitioning it from QUEUED to
++		 * DISPATCHING.
++		 */
++		if (likely(atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
++						   SCX_OPSS_DISPATCHING)))
++			break;
++		goto retry;
++	case SCX_OPSS_QUEUEING:
++		/*
++		 * do_enqueue_task() is in the process of transferring the task
++		 * to the BPF scheduler while holding @p's rq lock. As we aren't
++		 * holding any kernel or BPF resource that the enqueue path may
++		 * depend upon, it's safe to wait.
++		 */
++		wait_ops_state(p, opss);
++		goto retry;
++	}
++
++	BUG_ON(!(p->scx.flags & SCX_TASK_QUEUED));
++
++	dsq = find_dsq_for_dispatch(this_rq(), dsq_id, p);
++
++	if (dsq->id == SCX_DSQ_LOCAL)
++		dispatch_to_local_dsq(rq, dsq, p, enq_flags);
++	else
++		dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
++}
++
++static void flush_dispatch_buf(struct rq *rq)
++{
++	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
++	u32 u;
++
++	for (u = 0; u < dspc->cursor; u++) {
++		struct scx_dsp_buf_ent *ent = &dspc->buf[u];
++
++		finish_dispatch(rq, ent->task, ent->qseq, ent->dsq_id,
++				ent->enq_flags);
++	}
++
++	dspc->nr_tasks += dspc->cursor;
++	dspc->cursor = 0;
++}
++
++static int balance_one(struct rq *rq, struct task_struct *prev, bool local)
++{
++	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
++	bool prev_on_scx = prev->sched_class == &ext_sched_class;
++	int nr_loops = SCX_DSP_MAX_LOOPS;
++	bool has_tasks = false;
++
++	lockdep_assert_rq_held(rq);
++	rq->scx.flags |= SCX_RQ_IN_BALANCE;
++
++	if (static_branch_unlikely(&scx_ops_cpu_preempt) &&
++	    unlikely(rq->scx.cpu_released)) {
++		/*
++		 * If the previous sched_class for the current CPU was not SCX,
++		 * notify the BPF scheduler that it again has control of the
++		 * core. This callback complements ->cpu_release(), which is
++		 * emitted in scx_next_task_picked().
++		 */
++		if (SCX_HAS_OP(cpu_acquire))
++			SCX_CALL_OP(0, cpu_acquire, cpu_of(rq), NULL);
++		rq->scx.cpu_released = false;
++	}
++
++	if (prev_on_scx) {
++		WARN_ON_ONCE(local && (prev->scx.flags & SCX_TASK_BAL_KEEP));
++		update_curr_scx(rq);
++
++		/*
++		 * If @prev is runnable & has slice left, it has priority and
++		 * fetching more just increases latency for the fetched tasks.
++		 * Tell put_prev_task_scx() to put @prev on local_dsq. If the
++		 * BPF scheduler wants to handle this explicitly, it should
++		 * implement ->cpu_released().
++		 *
++		 * See scx_ops_disable_workfn() for the explanation on the
++		 * bypassing test.
++		 *
++		 * When balancing a remote CPU for core-sched, there won't be a
++		 * following put_prev_task_scx() call and we don't own
++		 * %SCX_TASK_BAL_KEEP. Instead, pick_task_scx() will test the
++		 * same conditions later and pick @rq->curr accordingly.
++		 */
++		if ((prev->scx.flags & SCX_TASK_QUEUED) &&
++		    prev->scx.slice && !scx_rq_bypassing(rq)) {
++			if (local)
++				prev->scx.flags |= SCX_TASK_BAL_KEEP;
++			goto has_tasks;
++		}
++	}
++
++	/* if there already are tasks to run, nothing to do */
++	if (rq->scx.local_dsq.nr)
++		goto has_tasks;
++
++	if (consume_global_dsq(rq))
++		goto has_tasks;
++
++	if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq))
++		goto out;
++
++	dspc->rq = rq;
++
++	/*
++	 * The dispatch loop. Because flush_dispatch_buf() may drop the rq lock,
++	 * the local DSQ might still end up empty after a successful
++	 * ops.dispatch(). If the local DSQ is empty even after ops.dispatch()
++	 * produced some tasks, retry. The BPF scheduler may depend on this
++	 * looping behavior to simplify its implementation.
++	 */
++	do {
++		dspc->nr_tasks = 0;
++
++		SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq),
++			    prev_on_scx ? prev : NULL);
++
++		flush_dispatch_buf(rq);
++
++		if (rq->scx.local_dsq.nr)
++			goto has_tasks;
++		if (consume_global_dsq(rq))
++			goto has_tasks;
++
++		/*
++		 * ops.dispatch() can trap us in this loop by repeatedly
++		 * dispatching ineligible tasks. Break out once in a while to
++		 * allow the watchdog to run. As IRQ can't be enabled in
++		 * balance(), we want to complete this scheduling cycle and then
++		 * start a new one. IOW, we want to call resched_curr() on the
++		 * next, most likely idle, task, not the current one. Use
++		 * scx_bpf_kick_cpu() for deferred kicking.
++		 */
++		if (unlikely(!--nr_loops)) {
++			scx_bpf_kick_cpu(cpu_of(rq), 0);
++			break;
++		}
++	} while (dspc->nr_tasks);
++
++	goto out;
++
++has_tasks:
++	has_tasks = true;
++out:
++	rq->scx.flags &= ~SCX_RQ_IN_BALANCE;
++	return has_tasks;
++}
++
++static int balance_scx(struct rq *rq, struct task_struct *prev,
++		       struct rq_flags *rf)
++{
++	int ret;
++
++	rq_unpin_lock(rq, rf);
++
++	ret = balance_one(rq, prev, true);
++
++#ifdef CONFIG_SCHED_SMT
++	/*
++	 * When core-sched is enabled, this ops.balance() call will be followed
++	 * by put_prev_scx() and pick_task_scx() on this CPU and pick_task_scx()
++	 * on the SMT siblings. Balance the siblings too.
++	 */
++	if (sched_core_enabled(rq)) {
++		const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
++		int scpu;
++
++		for_each_cpu_andnot(scpu, smt_mask, cpumask_of(cpu_of(rq))) {
++			struct rq *srq = cpu_rq(scpu);
++			struct task_struct *sprev = srq->curr;
++
++			WARN_ON_ONCE(__rq_lockp(rq) != __rq_lockp(srq));
++			update_rq_clock(srq);
++			balance_one(srq, sprev, false);
++		}
++	}
++#endif
++	rq_repin_lock(rq, rf);
++
++	return ret;
++}
++
++static void process_ddsp_deferred_locals(struct rq *rq)
++{
++	struct task_struct *p;
++
++	lockdep_assert_rq_held(rq);
++
++	/*
++	 * Now that @rq can be unlocked, execute the deferred enqueueing of
++	 * tasks directly dispatched to the local DSQs of other CPUs. See
++	 * direct_dispatch(). Keep popping from the head instead of using
++	 * list_for_each_entry_safe() as dispatch_local_dsq() may unlock @rq
++	 * temporarily.
++	 */
++	while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
++				struct task_struct, scx.dsq_list.node))) {
++		struct scx_dispatch_q *dsq;
++
++		list_del_init(&p->scx.dsq_list.node);
++
++		dsq = find_dsq_for_dispatch(rq, p->scx.ddsp_dsq_id, p);
++		if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
++			dispatch_to_local_dsq(rq, dsq, p, p->scx.ddsp_enq_flags);
++	}
++}
++
++static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
++{
++	if (p->scx.flags & SCX_TASK_QUEUED) {
++		/*
++		 * Core-sched might decide to execute @p before it is
++		 * dispatched. Call ops_dequeue() to notify the BPF scheduler.
++		 */
++		ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC);
++		dispatch_dequeue(rq, p);
++	}
++
++	p->se.exec_start = rq_clock_task(rq);
++
++	/* see dequeue_task_scx() on why we skip when !QUEUED */
++	if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
++		SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
++
++	clr_task_runnable(p, true);
++
++	/*
++	 * @p is getting newly scheduled or got kicked after someone updated its
++	 * slice. Refresh whether tick can be stopped. See scx_can_stop_tick().
++	 */
++	if ((p->scx.slice == SCX_SLICE_INF) !=
++	    (bool)(rq->scx.flags & SCX_RQ_CAN_STOP_TICK)) {
++		if (p->scx.slice == SCX_SLICE_INF)
++			rq->scx.flags |= SCX_RQ_CAN_STOP_TICK;
++		else
++			rq->scx.flags &= ~SCX_RQ_CAN_STOP_TICK;
++
++		sched_update_tick_dependency(rq);
++
++		/*
++		 * For now, let's refresh the load_avgs just when transitioning
++		 * in and out of nohz. In the future, we might want to add a
++		 * mechanism which calls the following periodically on
++		 * tick-stopped CPUs.
++		 */
++		update_other_load_avgs(rq);
++	}
++}
++
++static enum scx_cpu_preempt_reason
++preempt_reason_from_class(const struct sched_class *class)
++{
++#ifdef CONFIG_SMP
++	if (class == &stop_sched_class)
++		return SCX_CPU_PREEMPT_STOP;
++#endif
++	if (class == &dl_sched_class)
++		return SCX_CPU_PREEMPT_DL;
++	if (class == &rt_sched_class)
++		return SCX_CPU_PREEMPT_RT;
++	return SCX_CPU_PREEMPT_UNKNOWN;
++}
++
++static void switch_class_scx(struct rq *rq, struct task_struct *next)
++{
++	const struct sched_class *next_class = next->sched_class;
++
++	if (!scx_enabled())
++		return;
++#ifdef CONFIG_SMP
++	/*
++	 * Pairs with the smp_load_acquire() issued by a CPU in
++	 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
++	 * resched.
++	 */
++	smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
++#endif
++	if (!static_branch_unlikely(&scx_ops_cpu_preempt))
++		return;
++
++	/*
++	 * The callback is conceptually meant to convey that the CPU is no
++	 * longer under the control of SCX. Therefore, don't invoke the callback
++	 * if the next class is below SCX (in which case the BPF scheduler has
++	 * actively decided not to schedule any tasks on the CPU).
++	 */
++	if (sched_class_above(&ext_sched_class, next_class))
++		return;
++
++	/*
++	 * At this point we know that SCX was preempted by a higher priority
++	 * sched_class, so invoke the ->cpu_release() callback if we have not
++	 * done so already. We only send the callback once between SCX being
++	 * preempted, and it regaining control of the CPU.
++	 *
++	 * ->cpu_release() complements ->cpu_acquire(), which is emitted the
++	 *  next time that balance_scx() is invoked.
++	 */
++	if (!rq->scx.cpu_released) {
++		if (SCX_HAS_OP(cpu_release)) {
++			struct scx_cpu_release_args args = {
++				.reason = preempt_reason_from_class(next_class),
++				.task = next,
++			};
++
++			SCX_CALL_OP(SCX_KF_CPU_RELEASE,
++				    cpu_release, cpu_of(rq), &args);
++		}
++		rq->scx.cpu_released = true;
++	}
++}
++
++static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
++{
++	update_curr_scx(rq);
++
++	/* see dequeue_task_scx() on why we skip when !QUEUED */
++	if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
++		SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
++
++	/*
++	 * If we're being called from put_prev_task_balance(), balance_scx() may
++	 * have decided that @p should keep running.
++	 */
++	if (p->scx.flags & SCX_TASK_BAL_KEEP) {
++		p->scx.flags &= ~SCX_TASK_BAL_KEEP;
++		set_task_runnable(rq, p);
++		dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
++		return;
++	}
++
++	if (p->scx.flags & SCX_TASK_QUEUED) {
++		set_task_runnable(rq, p);
++
++		/*
++		 * If @p has slice left and balance_scx() didn't tag it for
++		 * keeping, @p is getting preempted by a higher priority
++		 * scheduler class or core-sched forcing a different task. Leave
++		 * it at the head of the local DSQ.
++		 */
++		if (p->scx.slice && !scx_rq_bypassing(rq)) {
++			dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
++			return;
++		}
++
++		/*
++		 * If we're in the pick_next_task path, balance_scx() should
++		 * have already populated the local DSQ if there are any other
++		 * available tasks. If empty, tell ops.enqueue() that @p is the
++		 * only one available for this cpu. ops.enqueue() should put it
++		 * on the local DSQ so that the subsequent pick_next_task_scx()
++		 * can find the task unless it wants to trigger a separate
++		 * follow-up scheduling event.
++		 */
++		if (list_empty(&rq->scx.local_dsq.list))
++			do_enqueue_task(rq, p, SCX_ENQ_LAST, -1);
++		else
++			do_enqueue_task(rq, p, 0, -1);
++	}
++}
++
++static struct task_struct *first_local_task(struct rq *rq)
++{
++	return list_first_entry_or_null(&rq->scx.local_dsq.list,
++					struct task_struct, scx.dsq_list.node);
++}
++
++static struct task_struct *pick_next_task_scx(struct rq *rq)
++{
++	struct task_struct *p;
++
++	p = first_local_task(rq);
++	if (!p)
++		return NULL;
++
++	set_next_task_scx(rq, p, true);
++
++	if (unlikely(!p->scx.slice)) {
++		if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
++			printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
++					p->comm, p->pid);
++			scx_warned_zero_slice = true;
++		}
++		p->scx.slice = SCX_SLICE_DFL;
++	}
++
++	return p;
++}
++
++#ifdef CONFIG_SCHED_CORE
++/**
++ * scx_prio_less - Task ordering for core-sched
++ * @a: task A
++ * @b: task B
++ *
++ * Core-sched is implemented as an additional scheduling layer on top of the
++ * usual sched_class'es and needs to find out the expected task ordering. For
++ * SCX, core-sched calls this function to interrogate the task ordering.
++ *
++ * Unless overridden by ops.core_sched_before(), @p->scx.core_sched_at is used
++ * to implement the default task ordering. The older the timestamp, the higher
++ * prority the task - the global FIFO ordering matching the default scheduling
++ * behavior.
++ *
++ * When ops.core_sched_before() is enabled, @p->scx.core_sched_at is used to
++ * implement FIFO ordering within each local DSQ. See pick_task_scx().
++ */
++bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
++		   bool in_fi)
++{
++	/*
++	 * The const qualifiers are dropped from task_struct pointers when
++	 * calling ops.core_sched_before(). Accesses are controlled by the
++	 * verifier.
++	 */
++	if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a)))
++		return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before,
++					      (struct task_struct *)a,
++					      (struct task_struct *)b);
++	else
++		return time_after64(a->scx.core_sched_at, b->scx.core_sched_at);
++}
++
++/**
++ * pick_task_scx - Pick a candidate task for core-sched
++ * @rq: rq to pick the candidate task from
++ *
++ * Core-sched calls this function on each SMT sibling to determine the next
++ * tasks to run on the SMT siblings. balance_one() has been called on all
++ * siblings and put_prev_task_scx() has been called only for the current CPU.
++ *
++ * As put_prev_task_scx() hasn't been called on remote CPUs, we can't just look
++ * at the first task in the local dsq. @rq->curr has to be considered explicitly
++ * to mimic %SCX_TASK_BAL_KEEP.
++ */
++static struct task_struct *pick_task_scx(struct rq *rq)
++{
++	struct task_struct *curr = rq->curr;
++	struct task_struct *first = first_local_task(rq);
++
++	if (curr->scx.flags & SCX_TASK_QUEUED) {
++		/* is curr the only runnable task? */
++		if (!first)
++			return curr;
++
++		/*
++		 * Does curr trump first? We can always go by core_sched_at for
++		 * this comparison as it represents global FIFO ordering when
++		 * the default core-sched ordering is used and local-DSQ FIFO
++		 * ordering otherwise.
++		 *
++		 * We can have a task with an earlier timestamp on the DSQ. For
++		 * example, when a current task is preempted by a sibling
++		 * picking a different cookie, the task would be requeued at the
++		 * head of the local DSQ with an earlier timestamp than the
++		 * core-sched picked next task. Besides, the BPF scheduler may
++		 * dispatch any tasks to the local DSQ anytime.
++		 */
++		if (curr->scx.slice && time_before64(curr->scx.core_sched_at,
++						     first->scx.core_sched_at))
++			return curr;
++	}
++
++	return first;	/* this may be %NULL */
++}
++#endif	/* CONFIG_SCHED_CORE */
++
++#ifdef CONFIG_SMP
++
++static bool test_and_clear_cpu_idle(int cpu)
++{
++#ifdef CONFIG_SCHED_SMT
++	/*
++	 * SMT mask should be cleared whether we can claim @cpu or not. The SMT
++	 * cluster is not wholly idle either way. This also prevents
++	 * scx_pick_idle_cpu() from getting caught in an infinite loop.
++	 */
++	if (sched_smt_active()) {
++		const struct cpumask *smt = cpu_smt_mask(cpu);
++
++		/*
++		 * If offline, @cpu is not its own sibling and
++		 * scx_pick_idle_cpu() can get caught in an infinite loop as
++		 * @cpu is never cleared from idle_masks.smt. Ensure that @cpu
++		 * is eventually cleared.
++		 */
++		if (cpumask_intersects(smt, idle_masks.smt))
++			cpumask_andnot(idle_masks.smt, idle_masks.smt, smt);
++		else if (cpumask_test_cpu(cpu, idle_masks.smt))
++			__cpumask_clear_cpu(cpu, idle_masks.smt);
++	}
++#endif
++	return cpumask_test_and_clear_cpu(cpu, idle_masks.cpu);
++}
++
++static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags)
++{
++	int cpu;
++
++retry:
++	if (sched_smt_active()) {
++		cpu = cpumask_any_and_distribute(idle_masks.smt, cpus_allowed);
++		if (cpu < nr_cpu_ids)
++			goto found;
++
++		if (flags & SCX_PICK_IDLE_CORE)
++			return -EBUSY;
++	}
++
++	cpu = cpumask_any_and_distribute(idle_masks.cpu, cpus_allowed);
++	if (cpu >= nr_cpu_ids)
++		return -EBUSY;
++
++found:
++	if (test_and_clear_cpu_idle(cpu))
++		return cpu;
++	else
++		goto retry;
++}
++
++static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
++			      u64 wake_flags, bool *found)
++{
++	s32 cpu;
++
++	*found = false;
++
++	/*
++	 * If WAKE_SYNC, the waker's local DSQ is empty, and the system is
++	 * under utilized, wake up @p to the local DSQ of the waker. Checking
++	 * only for an empty local DSQ is insufficient as it could give the
++	 * wakee an unfair advantage when the system is oversaturated.
++	 * Checking only for the presence of idle CPUs is also insufficient as
++	 * the local DSQ of the waker could have tasks piled up on it even if
++	 * there is an idle core elsewhere on the system.
++	 */
++	cpu = smp_processor_id();
++	if ((wake_flags & SCX_WAKE_SYNC) && p->nr_cpus_allowed > 1 &&
++	    !cpumask_empty(idle_masks.cpu) && !(current->flags & PF_EXITING) &&
++	    cpu_rq(cpu)->scx.local_dsq.nr == 0) {
++		if (cpumask_test_cpu(cpu, p->cpus_ptr))
++			goto cpu_found;
++	}
++
++	if (p->nr_cpus_allowed == 1) {
++		if (test_and_clear_cpu_idle(prev_cpu)) {
++			cpu = prev_cpu;
++			goto cpu_found;
++		} else {
++			return prev_cpu;
++		}
++	}
++
++	/*
++	 * If CPU has SMT, any wholly idle CPU is likely a better pick than
++	 * partially idle @prev_cpu.
++	 */
++	if (sched_smt_active()) {
++		if (cpumask_test_cpu(prev_cpu, idle_masks.smt) &&
++		    test_and_clear_cpu_idle(prev_cpu)) {
++			cpu = prev_cpu;
++			goto cpu_found;
++		}
++
++		cpu = scx_pick_idle_cpu(p->cpus_ptr, SCX_PICK_IDLE_CORE);
++		if (cpu >= 0)
++			goto cpu_found;
++	}
++
++	if (test_and_clear_cpu_idle(prev_cpu)) {
++		cpu = prev_cpu;
++		goto cpu_found;
++	}
++
++	cpu = scx_pick_idle_cpu(p->cpus_ptr, 0);
++	if (cpu >= 0)
++		goto cpu_found;
++
++	return prev_cpu;
++
++cpu_found:
++	*found = true;
++	return cpu;
++}
++
++static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
++{
++	/*
++	 * sched_exec() calls with %WF_EXEC when @p is about to exec(2) as it
++	 * can be a good migration opportunity with low cache and memory
++	 * footprint. Returning a CPU different than @prev_cpu triggers
++	 * immediate rq migration. However, for SCX, as the current rq
++	 * association doesn't dictate where the task is going to run, this
++	 * doesn't fit well. If necessary, we can later add a dedicated method
++	 * which can decide to preempt self to force it through the regular
++	 * scheduling path.
++	 */
++	if (unlikely(wake_flags & WF_EXEC))
++		return prev_cpu;
++
++	if (SCX_HAS_OP(select_cpu) && !scx_rq_bypassing(task_rq(p))) {
++		s32 cpu;
++		struct task_struct **ddsp_taskp;
++
++		ddsp_taskp = this_cpu_ptr(&direct_dispatch_task);
++		WARN_ON_ONCE(*ddsp_taskp);
++		*ddsp_taskp = p;
++
++		cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
++					   select_cpu, p, prev_cpu, wake_flags);
++		*ddsp_taskp = NULL;
++		if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
++			return cpu;
++		else
++			return prev_cpu;
++	} else {
++		bool found;
++		s32 cpu;
++
++		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, &found);
++		if (found) {
++			p->scx.slice = SCX_SLICE_DFL;
++			p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
++		}
++		return cpu;
++	}
++}
++
++static void task_woken_scx(struct rq *rq, struct task_struct *p)
++{
++	run_deferred(rq);
++}
++
++static void set_cpus_allowed_scx(struct task_struct *p,
++				 struct affinity_context *ac)
++{
++	set_cpus_allowed_common(p, ac);
++
++	/*
++	 * The effective cpumask is stored in @p->cpus_ptr which may temporarily
++	 * differ from the configured one in @p->cpus_mask. Always tell the bpf
++	 * scheduler the effective one.
++	 *
++	 * Fine-grained memory write control is enforced by BPF making the const
++	 * designation pointless. Cast it away when calling the operation.
++	 */
++	if (SCX_HAS_OP(set_cpumask))
++		SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
++				 (struct cpumask *)p->cpus_ptr);
++}
++
++static void reset_idle_masks(void)
++{
++	/*
++	 * Consider all online cpus idle. Should converge to the actual state
++	 * quickly.
++	 */
++	cpumask_copy(idle_masks.cpu, cpu_online_mask);
++	cpumask_copy(idle_masks.smt, cpu_online_mask);
++}
++
++void __scx_update_idle(struct rq *rq, bool idle)
++{
++	int cpu = cpu_of(rq);
++
++	if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
++		SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
++		if (!static_branch_unlikely(&scx_builtin_idle_enabled))
++			return;
++	}
++
++	if (idle)
++		cpumask_set_cpu(cpu, idle_masks.cpu);
++	else
++		cpumask_clear_cpu(cpu, idle_masks.cpu);
++
++#ifdef CONFIG_SCHED_SMT
++	if (sched_smt_active()) {
++		const struct cpumask *smt = cpu_smt_mask(cpu);
++
++		if (idle) {
++			/*
++			 * idle_masks.smt handling is racy but that's fine as
++			 * it's only for optimization and self-correcting.
++			 */
++			for_each_cpu(cpu, smt) {
++				if (!cpumask_test_cpu(cpu, idle_masks.cpu))
++					return;
++			}
++			cpumask_or(idle_masks.smt, idle_masks.smt, smt);
++		} else {
++			cpumask_andnot(idle_masks.smt, idle_masks.smt, smt);
++		}
++	}
++#endif
++}
++
++static void handle_hotplug(struct rq *rq, bool online)
++{
++	int cpu = cpu_of(rq);
++
++	atomic_long_inc(&scx_hotplug_seq);
++
++	if (online && SCX_HAS_OP(cpu_online))
++		SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu);
++	else if (!online && SCX_HAS_OP(cpu_offline))
++		SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu);
++	else
++		scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
++			     "cpu %d going %s, exiting scheduler", cpu,
++			     online ? "online" : "offline");
++}
++
++void scx_rq_activate(struct rq *rq)
++{
++	handle_hotplug(rq, true);
++}
++
++void scx_rq_deactivate(struct rq *rq)
++{
++	handle_hotplug(rq, false);
++}
++
++static void rq_online_scx(struct rq *rq)
++{
++	rq->scx.flags |= SCX_RQ_ONLINE;
++}
++
++static void rq_offline_scx(struct rq *rq)
++{
++	rq->scx.flags &= ~SCX_RQ_ONLINE;
++}
++
++#else	/* CONFIG_SMP */
++
++static bool test_and_clear_cpu_idle(int cpu) { return false; }
++static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags) { return -EBUSY; }
++static void reset_idle_masks(void) {}
++
++#endif	/* CONFIG_SMP */
++
++static bool check_rq_for_timeouts(struct rq *rq)
++{
++	struct task_struct *p;
++	struct rq_flags rf;
++	bool timed_out = false;
++
++	rq_lock_irqsave(rq, &rf);
++	list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) {
++		unsigned long last_runnable = p->scx.runnable_at;
++
++		if (unlikely(time_after(jiffies,
++					last_runnable + scx_watchdog_timeout))) {
++			u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
++
++			scx_ops_error_kind(SCX_EXIT_ERROR_STALL,
++					   "%s[%d] failed to run for %u.%03us",
++					   p->comm, p->pid,
++					   dur_ms / 1000, dur_ms % 1000);
++			timed_out = true;
++			break;
++		}
++	}
++	rq_unlock_irqrestore(rq, &rf);
++
++	return timed_out;
++}
++
++static void scx_watchdog_workfn(struct work_struct *work)
++{
++	int cpu;
++
++	WRITE_ONCE(scx_watchdog_timestamp, jiffies);
++
++	for_each_online_cpu(cpu) {
++		if (unlikely(check_rq_for_timeouts(cpu_rq(cpu))))
++			break;
++
++		cond_resched();
++	}
++	queue_delayed_work(system_unbound_wq, to_delayed_work(work),
++			   scx_watchdog_timeout / 2);
++}
++
++void scx_tick(struct rq *rq)
++{
++	unsigned long last_check;
++
++	if (!scx_enabled())
++		return;
++
++	last_check = READ_ONCE(scx_watchdog_timestamp);
++	if (unlikely(time_after(jiffies,
++				last_check + READ_ONCE(scx_watchdog_timeout)))) {
++		u32 dur_ms = jiffies_to_msecs(jiffies - last_check);
++
++		scx_ops_error_kind(SCX_EXIT_ERROR_STALL,
++				   "watchdog failed to check in for %u.%03us",
++				   dur_ms / 1000, dur_ms % 1000);
++	}
++
++	update_other_load_avgs(rq);
++}
++
++static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
++{
++	update_curr_scx(rq);
++
++	/*
++	 * While disabling, always resched and refresh core-sched timestamp as
++	 * we can't trust the slice management or ops.core_sched_before().
++	 */
++	if (scx_rq_bypassing(rq)) {
++		curr->scx.slice = 0;
++		touch_core_sched(rq, curr);
++	} else if (SCX_HAS_OP(tick)) {
++		SCX_CALL_OP(SCX_KF_REST, tick, curr);
++	}
++
++	if (!curr->scx.slice)
++		resched_curr(rq);
++}
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++static struct cgroup *tg_cgrp(struct task_group *tg)
++{
++	/*
++	 * If CGROUP_SCHED is disabled, @tg is NULL. If @tg is an autogroup,
++	 * @tg->css.cgroup is NULL. In both cases, @tg can be treated as the
++	 * root cgroup.
++	 */
++	if (tg && tg->css.cgroup)
++		return tg->css.cgroup;
++	else
++		return &cgrp_dfl_root.cgrp;
++}
++
++#define SCX_INIT_TASK_ARGS_CGROUP(tg)		.cgroup = tg_cgrp(tg),
++
++#else	/* CONFIG_EXT_GROUP_SCHED */
++
++#define SCX_INIT_TASK_ARGS_CGROUP(tg)
++
++#endif	/* CONFIG_EXT_GROUP_SCHED */
++
++static enum scx_task_state scx_get_task_state(const struct task_struct *p)
++{
++	return (p->scx.flags & SCX_TASK_STATE_MASK) >> SCX_TASK_STATE_SHIFT;
++}
++
++static void scx_set_task_state(struct task_struct *p, enum scx_task_state state)
++{
++	enum scx_task_state prev_state = scx_get_task_state(p);
++	bool warn = false;
++
++	BUILD_BUG_ON(SCX_TASK_NR_STATES > (1 << SCX_TASK_STATE_BITS));
++
++	switch (state) {
++	case SCX_TASK_NONE:
++		break;
++	case SCX_TASK_INIT:
++		warn = prev_state != SCX_TASK_NONE;
++		break;
++	case SCX_TASK_READY:
++		warn = prev_state == SCX_TASK_NONE;
++		break;
++	case SCX_TASK_ENABLED:
++		warn = prev_state != SCX_TASK_READY;
++		break;
++	default:
++		warn = true;
++		return;
++	}
++
++	WARN_ONCE(warn, "sched_ext: Invalid task state transition %d -> %d for %s[%d]",
++		  prev_state, state, p->comm, p->pid);
++
++	p->scx.flags &= ~SCX_TASK_STATE_MASK;
++	p->scx.flags |= state << SCX_TASK_STATE_SHIFT;
++}
++
++static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool fork)
++{
++	int ret;
++
++	p->scx.disallow = false;
++
++	if (SCX_HAS_OP(init_task)) {
++		struct scx_init_task_args args = {
++			SCX_INIT_TASK_ARGS_CGROUP(tg)
++			.fork = fork,
++		};
++
++		ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args);
++		if (unlikely(ret)) {
++			ret = ops_sanitize_err("init_task", ret);
++			return ret;
++		}
++	}
++
++	scx_set_task_state(p, SCX_TASK_INIT);
++
++	if (p->scx.disallow) {
++		if (!fork) {
++			struct rq *rq;
++			struct rq_flags rf;
++
++			rq = task_rq_lock(p, &rf);
++
++			/*
++			 * We're in the load path and @p->policy will be applied
++			 * right after. Reverting @p->policy here and rejecting
++			 * %SCHED_EXT transitions from scx_check_setscheduler()
++			 * guarantees that if ops.init_task() sets @p->disallow,
++			 * @p can never be in SCX.
++			 */
++			if (p->policy == SCHED_EXT) {
++				p->policy = SCHED_NORMAL;
++				atomic_long_inc(&scx_nr_rejected);
++			}
++
++			task_rq_unlock(rq, p, &rf);
++		} else if (p->policy == SCHED_EXT) {
++			scx_ops_error("ops.init_task() set task->scx.disallow for %s[%d] during fork",
++				      p->comm, p->pid);
++		}
++	}
++
++	p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT;
++	return 0;
++}
++
++static void scx_ops_enable_task(struct task_struct *p)
++{
++	u32 weight;
++
++	lockdep_assert_rq_held(task_rq(p));
++
++	/*
++	 * Set the weight before calling ops.enable() so that the scheduler
++	 * doesn't see a stale value if they inspect the task struct.
++	 */
++	if (task_has_idle_policy(p))
++		weight = WEIGHT_IDLEPRIO;
++	else
++		weight = sched_prio_to_weight[p->static_prio - MAX_RT_PRIO];
++
++	p->scx.weight = sched_weight_to_cgroup(weight);
++
++	if (SCX_HAS_OP(enable))
++		SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
++	scx_set_task_state(p, SCX_TASK_ENABLED);
++
++	if (SCX_HAS_OP(set_weight))
++		SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
++}
++
++static void scx_ops_disable_task(struct task_struct *p)
++{
++	lockdep_assert_rq_held(task_rq(p));
++	WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
++
++	if (SCX_HAS_OP(disable))
++		SCX_CALL_OP(SCX_KF_REST, disable, p);
++	scx_set_task_state(p, SCX_TASK_READY);
++}
++
++static void scx_ops_exit_task(struct task_struct *p)
++{
++	struct scx_exit_task_args args = {
++		.cancelled = false,
++	};
++
++	lockdep_assert_rq_held(task_rq(p));
++
++	switch (scx_get_task_state(p)) {
++	case SCX_TASK_NONE:
++		return;
++	case SCX_TASK_INIT:
++		args.cancelled = true;
++		break;
++	case SCX_TASK_READY:
++		break;
++	case SCX_TASK_ENABLED:
++		scx_ops_disable_task(p);
++		break;
++	default:
++		WARN_ON_ONCE(true);
++		return;
++	}
++
++	if (SCX_HAS_OP(exit_task))
++		SCX_CALL_OP(SCX_KF_REST, exit_task, p, &args);
++	scx_set_task_state(p, SCX_TASK_NONE);
++}
++
++void init_scx_entity(struct sched_ext_entity *scx)
++{
++	/*
++	 * init_idle() calls this function again after fork sequence is
++	 * complete. Don't touch ->tasks_node as it's already linked.
++	 */
++	memset(scx, 0, offsetof(struct sched_ext_entity, tasks_node));
++
++	INIT_LIST_HEAD(&scx->dsq_list.node);
++	RB_CLEAR_NODE(&scx->dsq_priq);
++	scx->sticky_cpu = -1;
++	scx->holding_cpu = -1;
++	INIT_LIST_HEAD(&scx->runnable_node);
++	scx->runnable_at = jiffies;
++	scx->ddsp_dsq_id = SCX_DSQ_INVALID;
++	scx->slice = SCX_SLICE_DFL;
++}
++
++void scx_pre_fork(struct task_struct *p)
++{
++	/*
++	 * BPF scheduler enable/disable paths want to be able to iterate and
++	 * update all tasks which can become complex when racing forks. As
++	 * enable/disable are very cold paths, let's use a percpu_rwsem to
++	 * exclude forks.
++	 */
++	percpu_down_read(&scx_fork_rwsem);
++}
++
++int scx_fork(struct task_struct *p)
++{
++	percpu_rwsem_assert_held(&scx_fork_rwsem);
++
++	if (scx_ops_init_task_enabled)
++		return scx_ops_init_task(p, task_group(p), true);
++	else
++		return 0;
++}
++
++void scx_post_fork(struct task_struct *p)
++{
++	if (scx_ops_init_task_enabled) {
++		scx_set_task_state(p, SCX_TASK_READY);
++
++		/*
++		 * Enable the task immediately if it's running on sched_ext.
++		 * Otherwise, it'll be enabled in switching_to_scx() if and
++		 * when it's ever configured to run with a SCHED_EXT policy.
++		 */
++		if (p->sched_class == &ext_sched_class) {
++			struct rq_flags rf;
++			struct rq *rq;
++
++			rq = task_rq_lock(p, &rf);
++			scx_ops_enable_task(p);
++			task_rq_unlock(rq, p, &rf);
++		}
++	}
++
++	spin_lock_irq(&scx_tasks_lock);
++	list_add_tail(&p->scx.tasks_node, &scx_tasks);
++	spin_unlock_irq(&scx_tasks_lock);
++
++	percpu_up_read(&scx_fork_rwsem);
++}
++
++void scx_cancel_fork(struct task_struct *p)
++{
++	if (scx_enabled()) {
++		struct rq *rq;
++		struct rq_flags rf;
++
++		rq = task_rq_lock(p, &rf);
++		WARN_ON_ONCE(scx_get_task_state(p) >= SCX_TASK_READY);
++		scx_ops_exit_task(p);
++		task_rq_unlock(rq, p, &rf);
++	}
++
++	percpu_up_read(&scx_fork_rwsem);
++}
++
++void sched_ext_free(struct task_struct *p)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&scx_tasks_lock, flags);
++	list_del_init(&p->scx.tasks_node);
++	spin_unlock_irqrestore(&scx_tasks_lock, flags);
++
++	/*
++	 * @p is off scx_tasks and wholly ours. scx_ops_enable()'s READY ->
++	 * ENABLED transitions can't race us. Disable ops for @p.
++	 */
++	if (scx_get_task_state(p) != SCX_TASK_NONE) {
++		struct rq_flags rf;
++		struct rq *rq;
++
++		rq = task_rq_lock(p, &rf);
++		scx_ops_exit_task(p);
++		task_rq_unlock(rq, p, &rf);
++	}
++}
++
++static void reweight_task_scx(struct rq *rq, struct task_struct *p,
++			      const struct load_weight *lw)
++{
++	lockdep_assert_rq_held(task_rq(p));
++
++	p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
++	if (SCX_HAS_OP(set_weight))
++		SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
++}
++
++static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
++{
++}
++
++static void switching_to_scx(struct rq *rq, struct task_struct *p)
++{
++	scx_ops_enable_task(p);
++
++	/*
++	 * set_cpus_allowed_scx() is not called while @p is associated with a
++	 * different scheduler class. Keep the BPF scheduler up-to-date.
++	 */
++	if (SCX_HAS_OP(set_cpumask))
++		SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
++				 (struct cpumask *)p->cpus_ptr);
++}
++
++static void switched_from_scx(struct rq *rq, struct task_struct *p)
++{
++	scx_ops_disable_task(p);
++}
++
++static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
++static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
++
++int scx_check_setscheduler(struct task_struct *p, int policy)
++{
++	lockdep_assert_rq_held(task_rq(p));
++
++	/* if disallow, reject transitioning into SCX */
++	if (scx_enabled() && READ_ONCE(p->scx.disallow) &&
++	    p->policy != policy && policy == SCHED_EXT)
++		return -EACCES;
++
++	return 0;
++}
++
++#ifdef CONFIG_NO_HZ_FULL
++bool scx_can_stop_tick(struct rq *rq)
++{
++	struct task_struct *p = rq->curr;
++
++	if (scx_rq_bypassing(rq))
++		return false;
++
++	if (p->sched_class != &ext_sched_class)
++		return true;
++
++	/*
++	 * @rq can dispatch from different DSQs, so we can't tell whether it
++	 * needs the tick or not by looking at nr_running. Allow stopping ticks
++	 * iff the BPF scheduler indicated so. See set_next_task_scx().
++	 */
++	return rq->scx.flags & SCX_RQ_CAN_STOP_TICK;
++}
++#endif
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++
++DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem);
++static bool scx_cgroup_enabled;
++static bool cgroup_warned_missing_weight;
++static bool cgroup_warned_missing_idle;
++
++static void scx_cgroup_warn_missing_weight(struct task_group *tg)
++{
++	if (scx_ops_enable_state() == SCX_OPS_DISABLED ||
++	    cgroup_warned_missing_weight)
++		return;
++
++	if ((scx_ops.flags & SCX_OPS_HAS_CGROUP_WEIGHT) || !tg->css.parent)
++		return;
++
++	pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.weight\n",
++		scx_ops.name);
++	cgroup_warned_missing_weight = true;
++}
++
++static void scx_cgroup_warn_missing_idle(struct task_group *tg)
++{
++	if (!scx_cgroup_enabled || cgroup_warned_missing_idle)
++		return;
++
++	if (!tg->idle)
++		return;
++
++	pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.idle\n",
++		scx_ops.name);
++	cgroup_warned_missing_idle = true;
++}
++
++int scx_tg_online(struct task_group *tg)
++{
++	int ret = 0;
++
++	WARN_ON_ONCE(tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED));
++
++	percpu_down_read(&scx_cgroup_rwsem);
++
++	scx_cgroup_warn_missing_weight(tg);
++
++	if (scx_cgroup_enabled) {
++		if (SCX_HAS_OP(cgroup_init)) {
++			struct scx_cgroup_init_args args =
++				{ .weight = tg->scx_weight };
++
++			ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
++					      tg->css.cgroup, &args);
++			if (ret)
++				ret = ops_sanitize_err("cgroup_init", ret);
++		}
++		if (ret == 0)
++			tg->scx_flags |= SCX_TG_ONLINE | SCX_TG_INITED;
++	} else {
++		tg->scx_flags |= SCX_TG_ONLINE;
++	}
++
++	percpu_up_read(&scx_cgroup_rwsem);
++	return ret;
++}
++
++void scx_tg_offline(struct task_group *tg)
++{
++	WARN_ON_ONCE(!(tg->scx_flags & SCX_TG_ONLINE));
++
++	percpu_down_read(&scx_cgroup_rwsem);
++
++	if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED))
++		SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup);
++	tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
++
++	percpu_up_read(&scx_cgroup_rwsem);
++}
++
++int scx_cgroup_can_attach(struct cgroup_taskset *tset)
++{
++	struct cgroup_subsys_state *css;
++	struct task_struct *p;
++	int ret;
++
++	/* released in scx_finish/cancel_attach() */
++	percpu_down_read(&scx_cgroup_rwsem);
++
++	if (!scx_cgroup_enabled)
++		return 0;
++
++	cgroup_taskset_for_each(p, css, tset) {
++		struct cgroup *from = tg_cgrp(task_group(p));
++		struct cgroup *to = tg_cgrp(css_tg(css));
++
++		WARN_ON_ONCE(p->scx.cgrp_moving_from);
++
++		/*
++		 * sched_move_task() omits identity migrations. Let's match the
++		 * behavior so that ops.cgroup_prep_move() and ops.cgroup_move()
++		 * always match one-to-one.
++		 */
++		if (from == to)
++			continue;
++
++		if (SCX_HAS_OP(cgroup_prep_move)) {
++			ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move,
++					      p, from, css->cgroup);
++			if (ret)
++				goto err;
++		}
++
++		p->scx.cgrp_moving_from = from;
++	}
++
++	return 0;
++
++err:
++	cgroup_taskset_for_each(p, css, tset) {
++		if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
++			SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
++				    p->scx.cgrp_moving_from, css->cgroup);
++		p->scx.cgrp_moving_from = NULL;
++	}
++
++	percpu_up_read(&scx_cgroup_rwsem);
++	return ops_sanitize_err("cgroup_prep_move", ret);
++}
++
++void scx_move_task(struct task_struct *p)
++{
++	if (!scx_cgroup_enabled)
++		return;
++
++	/*
++	 * We're called from sched_move_task() which handles both cgroup and
++	 * autogroup moves. Ignore the latter.
++	 *
++	 * Also ignore exiting tasks, because in the exit path tasks transition
++	 * from the autogroup to the root group, so task_group_is_autogroup()
++	 * alone isn't able to catch exiting autogroup tasks. This is safe for
++	 * cgroup_move(), because cgroup migrations never happen for PF_EXITING
++	 * tasks.
++	 */
++	if (task_group_is_autogroup(task_group(p)) || (p->flags & PF_EXITING))
++		return;
++
++	/*
++	 * @p must have ops.cgroup_prep_move() called on it and thus
++	 * cgrp_moving_from set.
++	 */
++	if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
++		SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p,
++			p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
++	p->scx.cgrp_moving_from = NULL;
++}
++
++void scx_cgroup_finish_attach(void)
++{
++	percpu_up_read(&scx_cgroup_rwsem);
++}
++
++void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
++{
++	struct cgroup_subsys_state *css;
++	struct task_struct *p;
++
++	if (!scx_cgroup_enabled)
++		goto out_unlock;
++
++	cgroup_taskset_for_each(p, css, tset) {
++		if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
++			SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
++				    p->scx.cgrp_moving_from, css->cgroup);
++		p->scx.cgrp_moving_from = NULL;
++	}
++out_unlock:
++	percpu_up_read(&scx_cgroup_rwsem);
++}
++
++void scx_group_set_weight(struct task_group *tg, unsigned long weight)
++{
++	percpu_down_read(&scx_cgroup_rwsem);
++
++	if (scx_cgroup_enabled && tg->scx_weight != weight) {
++		if (SCX_HAS_OP(cgroup_set_weight))
++			SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight,
++				    tg_cgrp(tg), weight);
++		tg->scx_weight = weight;
++	}
++
++	percpu_up_read(&scx_cgroup_rwsem);
++}
++
++void scx_group_set_idle(struct task_group *tg, bool idle)
++{
++	percpu_down_read(&scx_cgroup_rwsem);
++	scx_cgroup_warn_missing_idle(tg);
++	percpu_up_read(&scx_cgroup_rwsem);
++}
++
++static void scx_cgroup_lock(void)
++{
++	percpu_down_write(&scx_cgroup_rwsem);
++}
++
++static void scx_cgroup_unlock(void)
++{
++	percpu_up_write(&scx_cgroup_rwsem);
++}
++
++#else	/* CONFIG_EXT_GROUP_SCHED */
++
++static inline void scx_cgroup_lock(void) {}
++static inline void scx_cgroup_unlock(void) {}
++
++#endif	/* CONFIG_EXT_GROUP_SCHED */
++
++/*
++ * Omitted operations:
++ *
++ * - wakeup_preempt: NOOP as it isn't useful in the wakeup path because the task
++ *   isn't tied to the CPU at that point. Preemption is implemented by resetting
++ *   the victim task's slice to 0 and triggering reschedule on the target CPU.
++ *
++ * - migrate_task_rq: Unnecessary as task to cpu mapping is transient.
++ *
++ * - task_fork/dead: We need fork/dead notifications for all tasks regardless of
++ *   their current sched_class. Call them directly from sched core instead.
++ */
++DEFINE_SCHED_CLASS(ext) = {
++	.enqueue_task		= enqueue_task_scx,
++	.dequeue_task		= dequeue_task_scx,
++	.yield_task		= yield_task_scx,
++	.yield_to_task		= yield_to_task_scx,
++
++	.wakeup_preempt		= wakeup_preempt_scx,
++
++	.balance		= balance_scx,
++	.pick_next_task		= pick_next_task_scx,
++
++	.put_prev_task		= put_prev_task_scx,
++	.set_next_task		= set_next_task_scx,
++
++	.switch_class		= switch_class_scx,
++
++#ifdef CONFIG_SMP
++	.select_task_rq		= select_task_rq_scx,
++	.task_woken		= task_woken_scx,
++	.set_cpus_allowed	= set_cpus_allowed_scx,
++
++	.rq_online		= rq_online_scx,
++	.rq_offline		= rq_offline_scx,
++#endif
++
++#ifdef CONFIG_SCHED_CORE
++	.pick_task		= pick_task_scx,
++#endif
++
++	.task_tick		= task_tick_scx,
++
++	.switching_to		= switching_to_scx,
++	.switched_from		= switched_from_scx,
++	.switched_to		= switched_to_scx,
++	.reweight_task		= reweight_task_scx,
++	.prio_changed		= prio_changed_scx,
++
++	.update_curr		= update_curr_scx,
++
++#ifdef CONFIG_UCLAMP_TASK
++	.uclamp_enabled		= 1,
++#endif
++};
++
++static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
++{
++	memset(dsq, 0, sizeof(*dsq));
++
++	raw_spin_lock_init(&dsq->lock);
++	INIT_LIST_HEAD(&dsq->list);
++	dsq->id = dsq_id;
++}
++
++static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node)
++{
++	struct scx_dispatch_q *dsq;
++	int ret;
++
++	if (dsq_id & SCX_DSQ_FLAG_BUILTIN)
++		return ERR_PTR(-EINVAL);
++
++	dsq = kmalloc_node(sizeof(*dsq), GFP_KERNEL, node);
++	if (!dsq)
++		return ERR_PTR(-ENOMEM);
++
++	init_dsq(dsq, dsq_id);
++
++	ret = rhashtable_insert_fast(&dsq_hash, &dsq->hash_node,
++				     dsq_hash_params);
++	if (ret) {
++		kfree(dsq);
++		return ERR_PTR(ret);
++	}
++	return dsq;
++}
++
++static void free_dsq_irq_workfn(struct irq_work *irq_work)
++{
++	struct llist_node *to_free = llist_del_all(&dsqs_to_free);
++	struct scx_dispatch_q *dsq, *tmp_dsq;
++
++	llist_for_each_entry_safe(dsq, tmp_dsq, to_free, free_node)
++		kfree_rcu(dsq, rcu);
++}
++
++static DEFINE_IRQ_WORK(free_dsq_irq_work, free_dsq_irq_workfn);
++
++static void destroy_dsq(u64 dsq_id)
++{
++	struct scx_dispatch_q *dsq;
++	unsigned long flags;
++
++	rcu_read_lock();
++
++	dsq = find_user_dsq(dsq_id);
++	if (!dsq)
++		goto out_unlock_rcu;
++
++	raw_spin_lock_irqsave(&dsq->lock, flags);
++
++	if (dsq->nr) {
++		scx_ops_error("attempting to destroy in-use dsq 0x%016llx (nr=%u)",
++			      dsq->id, dsq->nr);
++		goto out_unlock_dsq;
++	}
++
++	if (rhashtable_remove_fast(&dsq_hash, &dsq->hash_node, dsq_hash_params))
++		goto out_unlock_dsq;
++
++	/*
++	 * Mark dead by invalidating ->id to prevent dispatch_enqueue() from
++	 * queueing more tasks. As this function can be called from anywhere,
++	 * freeing is bounced through an irq work to avoid nesting RCU
++	 * operations inside scheduler locks.
++	 */
++	dsq->id = SCX_DSQ_INVALID;
++	llist_add(&dsq->free_node, &dsqs_to_free);
++	irq_work_queue(&free_dsq_irq_work);
++
++out_unlock_dsq:
++	raw_spin_unlock_irqrestore(&dsq->lock, flags);
++out_unlock_rcu:
++	rcu_read_unlock();
++}
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++static void scx_cgroup_exit(void)
++{
++	struct cgroup_subsys_state *css;
++
++	percpu_rwsem_assert_held(&scx_cgroup_rwsem);
++
++	WARN_ON_ONCE(!scx_cgroup_enabled);
++	scx_cgroup_enabled = false;
++
++	/*
++	 * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk
++	 * cgroups and exit all the inited ones, all online cgroups are exited.
++	 */
++	rcu_read_lock();
++	css_for_each_descendant_post(css, &root_task_group.css) {
++		struct task_group *tg = css_tg(css);
++
++		if (!(tg->scx_flags & SCX_TG_INITED))
++			continue;
++		tg->scx_flags &= ~SCX_TG_INITED;
++
++		if (!scx_ops.cgroup_exit)
++			continue;
++
++		if (WARN_ON_ONCE(!css_tryget(css)))
++			continue;
++		rcu_read_unlock();
++
++		SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup);
++
++		rcu_read_lock();
++		css_put(css);
++	}
++	rcu_read_unlock();
++}
++
++static int scx_cgroup_init(void)
++{
++	struct cgroup_subsys_state *css;
++	int ret;
++
++	percpu_rwsem_assert_held(&scx_cgroup_rwsem);
++
++	cgroup_warned_missing_weight = false;
++	cgroup_warned_missing_idle = false;
++
++	/*
++	 * scx_tg_on/offline() are excluded thorugh scx_cgroup_rwsem. If we walk
++	 * cgroups and init, all online cgroups are initialized.
++	 */
++	rcu_read_lock();
++	css_for_each_descendant_pre(css, &root_task_group.css) {
++		struct task_group *tg = css_tg(css);
++		struct scx_cgroup_init_args args = { .weight = tg->scx_weight };
++
++		scx_cgroup_warn_missing_weight(tg);
++		scx_cgroup_warn_missing_idle(tg);
++
++		if ((tg->scx_flags &
++		     (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE)
++			continue;
++
++		if (!scx_ops.cgroup_init) {
++			tg->scx_flags |= SCX_TG_INITED;
++			continue;
++		}
++
++		if (WARN_ON_ONCE(!css_tryget(css)))
++			continue;
++		rcu_read_unlock();
++
++		ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
++				      css->cgroup, &args);
++		if (ret) {
++			css_put(css);
++			return ret;
++		}
++		tg->scx_flags |= SCX_TG_INITED;
++
++		rcu_read_lock();
++		css_put(css);
++	}
++	rcu_read_unlock();
++
++	WARN_ON_ONCE(scx_cgroup_enabled);
++	scx_cgroup_enabled = true;
++
++	return 0;
++}
++
++#else
++static void scx_cgroup_exit(void) {}
++static int scx_cgroup_init(void) { return 0; }
++#endif
++
++
++/********************************************************************************
++ * Sysfs interface and ops enable/disable.
++ */
++
++#define SCX_ATTR(_name)								\
++	static struct kobj_attribute scx_attr_##_name = {			\
++		.attr = { .name = __stringify(_name), .mode = 0444 },		\
++		.show = scx_attr_##_name##_show,				\
++	}
++
++static ssize_t scx_attr_state_show(struct kobject *kobj,
++				   struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%s\n",
++			  scx_ops_enable_state_str[scx_ops_enable_state()]);
++}
++SCX_ATTR(state);
++
++static ssize_t scx_attr_switch_all_show(struct kobject *kobj,
++					struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%d\n", READ_ONCE(scx_switching_all));
++}
++SCX_ATTR(switch_all);
++
++static ssize_t scx_attr_nr_rejected_show(struct kobject *kobj,
++					 struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_nr_rejected));
++}
++SCX_ATTR(nr_rejected);
++
++static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
++					 struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_hotplug_seq));
++}
++SCX_ATTR(hotplug_seq);
++
++static ssize_t scx_attr_enable_seq_show(struct kobject *kobj,
++					struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_enable_seq));
++}
++SCX_ATTR(enable_seq);
++
++static struct attribute *scx_global_attrs[] = {
++	&scx_attr_state.attr,
++	&scx_attr_switch_all.attr,
++	&scx_attr_nr_rejected.attr,
++	&scx_attr_hotplug_seq.attr,
++	&scx_attr_enable_seq.attr,
++	NULL,
++};
++
++static const struct attribute_group scx_global_attr_group = {
++	.attrs = scx_global_attrs,
++};
++
++static void scx_kobj_release(struct kobject *kobj)
++{
++	kfree(kobj);
++}
++
++static ssize_t scx_attr_ops_show(struct kobject *kobj,
++				 struct kobj_attribute *ka, char *buf)
++{
++	return sysfs_emit(buf, "%s\n", scx_ops.name);
++}
++SCX_ATTR(ops);
++
++static struct attribute *scx_sched_attrs[] = {
++	&scx_attr_ops.attr,
++	NULL,
++};
++ATTRIBUTE_GROUPS(scx_sched);
++
++static const struct kobj_type scx_ktype = {
++	.release = scx_kobj_release,
++	.sysfs_ops = &kobj_sysfs_ops,
++	.default_groups = scx_sched_groups,
++};
++
++static int scx_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
++{
++	return add_uevent_var(env, "SCXOPS=%s", scx_ops.name);
++}
++
++static const struct kset_uevent_ops scx_uevent_ops = {
++	.uevent = scx_uevent,
++};
++
++/*
++ * Used by sched_fork() and __setscheduler_prio() to pick the matching
++ * sched_class. dl/rt are already handled.
++ */
++bool task_should_scx(struct task_struct *p)
++{
++	if (!scx_enabled() ||
++	    unlikely(scx_ops_enable_state() == SCX_OPS_DISABLING))
++		return false;
++	if (READ_ONCE(scx_switching_all))
++		return true;
++	return p->policy == SCHED_EXT;
++}
++
++/**
++ * scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
++ *
++ * Bypassing guarantees that all runnable tasks make forward progress without
++ * trusting the BPF scheduler. We can't grab any mutexes or rwsems as they might
++ * be held by tasks that the BPF scheduler is forgetting to run, which
++ * unfortunately also excludes toggling the static branches.
++ *
++ * Let's work around by overriding a couple ops and modifying behaviors based on
++ * the DISABLING state and then cycling the queued tasks through dequeue/enqueue
++ * to force global FIFO scheduling.
++ *
++ * - ops.select_cpu() is ignored and the default select_cpu() is used.
++ *
++ * - ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
++ *
++ * - ops.dispatch() is ignored.
++ *
++ * - balance_scx() does not set %SCX_RQ_BAL_KEEP on no*n-zero slice as slice
++ *   can't be trusted. Whenever a tick triggers, the running task is rotated to
++ *   the tail of the queue with core_sched_at touched.
++ *
++ * - pick_next_task() suppresses zero slice warning.
++ *
++ * - scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
++ *   operations.
++ *
++ * - scx_prio_less() reverts to the default core_sched_at order.
++ */
++static void scx_ops_bypass(bool bypass)
++{
++	int depth, cpu;
++
++	if (bypass) {
++		depth = atomic_inc_return(&scx_ops_bypass_depth);
++		WARN_ON_ONCE(depth <= 0);
++		if (depth != 1)
++			return;
++	} else {
++		depth = atomic_dec_return(&scx_ops_bypass_depth);
++		WARN_ON_ONCE(depth < 0);
++		if (depth != 0)
++			return;
++	}
++
++	/*
++	 * No task property is changing. We just need to make sure all currently
++	 * queued tasks are re-queued according to the new scx_rq_bypassing()
++	 * state. As an optimization, walk each rq's runnable_list instead of
++	 * the scx_tasks list.
++	 *
++	 * This function can't trust the scheduler and thus can't use
++	 * cpus_read_lock(). Walk all possible CPUs instead of online.
++	 */
++	for_each_possible_cpu(cpu) {
++		struct rq *rq = cpu_rq(cpu);
++		struct rq_flags rf;
++		struct task_struct *p, *n;
++
++		rq_lock_irqsave(rq, &rf);
++
++		if (bypass) {
++			WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
++			rq->scx.flags |= SCX_RQ_BYPASSING;
++		} else {
++			WARN_ON_ONCE(!(rq->scx.flags & SCX_RQ_BYPASSING));
++			rq->scx.flags &= ~SCX_RQ_BYPASSING;
++		}
++
++		/*
++		 * We need to guarantee that no tasks are on the BPF scheduler
++		 * while bypassing. Either we see enabled or the enable path
++		 * sees scx_rq_bypassing() before moving tasks to SCX.
++		 */
++		if (!scx_enabled()) {
++			rq_unlock_irqrestore(rq, &rf);
++			continue;
++		}
++
++		/*
++		 * The use of list_for_each_entry_safe_reverse() is required
++		 * because each task is going to be removed from and added back
++		 * to the runnable_list during iteration. Because they're added
++		 * to the tail of the list, safe reverse iteration can still
++		 * visit all nodes.
++		 */
++		list_for_each_entry_safe_reverse(p, n, &rq->scx.runnable_list,
++						 scx.runnable_node) {
++			struct sched_enq_and_set_ctx ctx;
++
++			/* cycling deq/enq is enough, see the function comment */
++			sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
++			sched_enq_and_set_task(&ctx);
++		}
++
++		rq_unlock_irqrestore(rq, &rf);
++
++		/* resched to restore ticks and idle state */
++		resched_cpu(cpu);
++	}
++}
++
++static void free_exit_info(struct scx_exit_info *ei)
++{
++	kfree(ei->dump);
++	kfree(ei->msg);
++	kfree(ei->bt);
++	kfree(ei);
++}
++
++static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len)
++{
++	struct scx_exit_info *ei;
++
++	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++	if (!ei)
++		return NULL;
++
++	ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL);
++	ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
++	ei->dump = kzalloc(exit_dump_len, GFP_KERNEL);
++
++	if (!ei->bt || !ei->msg || !ei->dump) {
++		free_exit_info(ei);
++		return NULL;
++	}
++
++	return ei;
++}
++
++static const char *scx_exit_reason(enum scx_exit_kind kind)
++{
++	switch (kind) {
++	case SCX_EXIT_UNREG:
++		return "unregistered from user space";
++	case SCX_EXIT_UNREG_BPF:
++		return "unregistered from BPF";
++	case SCX_EXIT_UNREG_KERN:
++		return "unregistered from the main kernel";
++	case SCX_EXIT_SYSRQ:
++		return "disabled by sysrq-S";
++	case SCX_EXIT_ERROR:
++		return "runtime error";
++	case SCX_EXIT_ERROR_BPF:
++		return "scx_bpf_error";
++	case SCX_EXIT_ERROR_STALL:
++		return "runnable task stall";
++	default:
++		return "<UNKNOWN>";
++	}
++}
++
++static void scx_ops_disable_workfn(struct kthread_work *work)
++{
++	struct scx_exit_info *ei = scx_exit_info;
++	struct scx_task_iter sti;
++	struct task_struct *p;
++	struct rhashtable_iter rht_iter;
++	struct scx_dispatch_q *dsq;
++	int i, kind;
++
++	kind = atomic_read(&scx_exit_kind);
++	while (true) {
++		/*
++		 * NONE indicates that a new scx_ops has been registered since
++		 * disable was scheduled - don't kill the new ops. DONE
++		 * indicates that the ops has already been disabled.
++		 */
++		if (kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE)
++			return;
++		if (atomic_try_cmpxchg(&scx_exit_kind, &kind, SCX_EXIT_DONE))
++			break;
++	}
++	ei->kind = kind;
++	ei->reason = scx_exit_reason(ei->kind);
++
++	/* guarantee forward progress by bypassing scx_ops */
++	scx_ops_bypass(true);
++
++	switch (scx_ops_set_enable_state(SCX_OPS_DISABLING)) {
++	case SCX_OPS_DISABLING:
++		WARN_ONCE(true, "sched_ext: duplicate disabling instance?");
++		break;
++	case SCX_OPS_DISABLED:
++		pr_warn("sched_ext: ops error detected without ops (%s)\n",
++			scx_exit_info->msg);
++		WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) !=
++			     SCX_OPS_DISABLING);
++		goto done;
++	default:
++		break;
++	}
++
++	/*
++	 * Here, every runnable task is guaranteed to make forward progress and
++	 * we can safely use blocking synchronization constructs. Actually
++	 * disable ops.
++	 */
++	mutex_lock(&scx_ops_enable_mutex);
++
++	static_branch_disable(&__scx_switched_all);
++	WRITE_ONCE(scx_switching_all, false);
++
++	/*
++	 * Shut down cgroup support before tasks so that the cgroup attach path
++	 * doesn't race against scx_ops_exit_task().
++	 */
++	scx_cgroup_lock();
++	scx_cgroup_exit();
++	scx_cgroup_unlock();
++
++	/*
++	 * The BPF scheduler is going away. All tasks including %TASK_DEAD ones
++	 * must be switched out and exited synchronously.
++	 */
++	percpu_down_write(&scx_fork_rwsem);
++
++	scx_ops_init_task_enabled = false;
++
++	scx_task_iter_start(&sti);
++	while ((p = scx_task_iter_next_locked(&sti))) {
++		const struct sched_class *old_class = p->sched_class;
++		struct sched_enq_and_set_ctx ctx;
++
++		sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
++
++		__setscheduler_prio(p, p->prio);
++		check_class_changing(task_rq(p), p, old_class);
++
++		sched_enq_and_set_task(&ctx);
++
++		check_class_changed(task_rq(p), p, old_class, p->prio);
++		scx_ops_exit_task(p);
++	}
++	scx_task_iter_stop(&sti);
++	percpu_up_write(&scx_fork_rwsem);
++
++	/* no task is on scx, turn off all the switches and flush in-progress calls */
++	static_branch_disable(&__scx_ops_enabled);
++	for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++)
++		static_branch_disable(&scx_has_op[i]);
++	static_branch_disable(&scx_ops_enq_last);
++	static_branch_disable(&scx_ops_enq_exiting);
++	static_branch_disable(&scx_ops_cpu_preempt);
++	static_branch_disable(&scx_builtin_idle_enabled);
++	synchronize_rcu();
++
++	if (ei->kind >= SCX_EXIT_ERROR) {
++		pr_err("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
++		       scx_ops.name, ei->reason);
++
++		if (ei->msg[0] != '\0')
++			pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg);
++#ifdef CONFIG_STACKTRACE
++		stack_trace_print(ei->bt, ei->bt_len, 2);
++#endif
++	} else {
++		pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
++			scx_ops.name, ei->reason);
++	}
++
++	if (scx_ops.exit)
++		SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei);
++
++	cancel_delayed_work_sync(&scx_watchdog_work);
++
++	/*
++	 * Delete the kobject from the hierarchy eagerly in addition to just
++	 * dropping a reference. Otherwise, if the object is deleted
++	 * asynchronously, sysfs could observe an object of the same name still
++	 * in the hierarchy when another scheduler is loaded.
++	 */
++	kobject_del(scx_root_kobj);
++	kobject_put(scx_root_kobj);
++	scx_root_kobj = NULL;
++
++	memset(&scx_ops, 0, sizeof(scx_ops));
++
++	rhashtable_walk_enter(&dsq_hash, &rht_iter);
++	do {
++		rhashtable_walk_start(&rht_iter);
++
++		while ((dsq = rhashtable_walk_next(&rht_iter)) && !IS_ERR(dsq))
++			destroy_dsq(dsq->id);
++
++		rhashtable_walk_stop(&rht_iter);
++	} while (dsq == ERR_PTR(-EAGAIN));
++	rhashtable_walk_exit(&rht_iter);
++
++	free_percpu(scx_dsp_ctx);
++	scx_dsp_ctx = NULL;
++	scx_dsp_max_batch = 0;
++
++	free_exit_info(scx_exit_info);
++	scx_exit_info = NULL;
++
++	mutex_unlock(&scx_ops_enable_mutex);
++
++	WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_DISABLED) !=
++		     SCX_OPS_DISABLING);
++done:
++	scx_ops_bypass(false);
++}
++
++static DEFINE_KTHREAD_WORK(scx_ops_disable_work, scx_ops_disable_workfn);
++
++static void schedule_scx_ops_disable_work(void)
++{
++	struct kthread_worker *helper = READ_ONCE(scx_ops_helper);
++
++	/*
++	 * We may be called spuriously before the first bpf_sched_ext_reg(). If
++	 * scx_ops_helper isn't set up yet, there's nothing to do.
++	 */
++	if (helper)
++		kthread_queue_work(helper, &scx_ops_disable_work);
++}
++
++static void scx_ops_disable(enum scx_exit_kind kind)
++{
++	int none = SCX_EXIT_NONE;
++
++	if (WARN_ON_ONCE(kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE))
++		kind = SCX_EXIT_ERROR;
++
++	atomic_try_cmpxchg(&scx_exit_kind, &none, kind);
++
++	schedule_scx_ops_disable_work();
++}
++
++static void dump_newline(struct seq_buf *s)
++{
++	trace_sched_ext_dump("");
++
++	/* @s may be zero sized and seq_buf triggers WARN if so */
++	if (s->size)
++		seq_buf_putc(s, '\n');
++}
++
++static __printf(2, 3) void dump_line(struct seq_buf *s, const char *fmt, ...)
++{
++	va_list args;
++
++#ifdef CONFIG_TRACEPOINTS
++	if (trace_sched_ext_dump_enabled()) {
++		/* protected by scx_dump_state()::dump_lock */
++		static char line_buf[SCX_EXIT_MSG_LEN];
++
++		va_start(args, fmt);
++		vscnprintf(line_buf, sizeof(line_buf), fmt, args);
++		va_end(args);
++
++		trace_sched_ext_dump(line_buf);
++	}
++#endif
++	/* @s may be zero sized and seq_buf triggers WARN if so */
++	if (s->size) {
++		va_start(args, fmt);
++		seq_buf_vprintf(s, fmt, args);
++		va_end(args);
++
++		seq_buf_putc(s, '\n');
++	}
++}
++
++static void dump_stack_trace(struct seq_buf *s, const char *prefix,
++			     const unsigned long *bt, unsigned int len)
++{
++	unsigned int i;
++
++	for (i = 0; i < len; i++)
++		dump_line(s, "%s%pS", prefix, (void *)bt[i]);
++}
++
++static void ops_dump_init(struct seq_buf *s, const char *prefix)
++{
++	struct scx_dump_data *dd = &scx_dump_data;
++
++	lockdep_assert_irqs_disabled();
++
++	dd->cpu = smp_processor_id();		/* allow scx_bpf_dump() */
++	dd->first = true;
++	dd->cursor = 0;
++	dd->s = s;
++	dd->prefix = prefix;
++}
++
++static void ops_dump_flush(void)
++{
++	struct scx_dump_data *dd = &scx_dump_data;
++	char *line = dd->buf.line;
++
++	if (!dd->cursor)
++		return;
++
++	/*
++	 * There's something to flush and this is the first line. Insert a blank
++	 * line to distinguish ops dump.
++	 */
++	if (dd->first) {
++		dump_newline(dd->s);
++		dd->first = false;
++	}
++
++	/*
++	 * There may be multiple lines in $line. Scan and emit each line
++	 * separately.
++	 */
++	while (true) {
++		char *end = line;
++		char c;
++
++		while (*end != '\n' && *end != '\0')
++			end++;
++
++		/*
++		 * If $line overflowed, it may not have newline at the end.
++		 * Always emit with a newline.
++		 */
++		c = *end;
++		*end = '\0';
++		dump_line(dd->s, "%s%s", dd->prefix, line);
++		if (c == '\0')
++			break;
++
++		/* move to the next line */
++		end++;
++		if (*end == '\0')
++			break;
++		line = end;
++	}
++
++	dd->cursor = 0;
++}
++
++static void ops_dump_exit(void)
++{
++	ops_dump_flush();
++	scx_dump_data.cpu = -1;
++}
++
++static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
++			  struct task_struct *p, char marker)
++{
++	static unsigned long bt[SCX_EXIT_BT_LEN];
++	char dsq_id_buf[19] = "(n/a)";
++	unsigned long ops_state = atomic_long_read(&p->scx.ops_state);
++	unsigned int bt_len = 0;
++
++	if (p->scx.dsq)
++		scnprintf(dsq_id_buf, sizeof(dsq_id_buf), "0x%llx",
++			  (unsigned long long)p->scx.dsq->id);
++
++	dump_newline(s);
++	dump_line(s, " %c%c %s[%d] %+ldms",
++		  marker, task_state_to_char(p), p->comm, p->pid,
++		  jiffies_delta_msecs(p->scx.runnable_at, dctx->at_jiffies));
++	dump_line(s, "      scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu",
++		  scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK,
++		  p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK,
++		  ops_state >> SCX_OPSS_QSEQ_SHIFT);
++	dump_line(s, "      sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu",
++		  p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf,
++		  p->scx.dsq_vtime);
++	dump_line(s, "      cpus=%*pb", cpumask_pr_args(p->cpus_ptr));
++
++	if (SCX_HAS_OP(dump_task)) {
++		ops_dump_init(s, "    ");
++		SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p);
++		ops_dump_exit();
++	}
++
++#ifdef CONFIG_STACKTRACE
++	bt_len = stack_trace_save_tsk(p, bt, SCX_EXIT_BT_LEN, 1);
++#endif
++	if (bt_len) {
++		dump_newline(s);
++		dump_stack_trace(s, "    ", bt, bt_len);
++	}
++}
++
++static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
++{
++	static DEFINE_SPINLOCK(dump_lock);
++	static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
++	struct scx_dump_ctx dctx = {
++		.kind = ei->kind,
++		.exit_code = ei->exit_code,
++		.reason = ei->reason,
++		.at_ns = ktime_get_ns(),
++		.at_jiffies = jiffies,
++	};
++	struct seq_buf s;
++	unsigned long flags;
++	char *buf;
++	int cpu;
++
++	spin_lock_irqsave(&dump_lock, flags);
++
++	seq_buf_init(&s, ei->dump, dump_len);
++
++	if (ei->kind == SCX_EXIT_NONE) {
++		dump_line(&s, "Debug dump triggered by %s", ei->reason);
++	} else {
++		dump_line(&s, "%s[%d] triggered exit kind %d:",
++			  current->comm, current->pid, ei->kind);
++		dump_line(&s, "  %s (%s)", ei->reason, ei->msg);
++		dump_newline(&s);
++		dump_line(&s, "Backtrace:");
++		dump_stack_trace(&s, "  ", ei->bt, ei->bt_len);
++	}
++
++	if (SCX_HAS_OP(dump)) {
++		ops_dump_init(&s, "");
++		SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx);
++		ops_dump_exit();
++	}
++
++	dump_newline(&s);
++	dump_line(&s, "CPU states");
++	dump_line(&s, "----------");
++
++	for_each_possible_cpu(cpu) {
++		struct rq *rq = cpu_rq(cpu);
++		struct rq_flags rf;
++		struct task_struct *p;
++		struct seq_buf ns;
++		size_t avail, used;
++		bool idle;
++
++		rq_lock(rq, &rf);
++
++		idle = list_empty(&rq->scx.runnable_list) &&
++			rq->curr->sched_class == &idle_sched_class;
++
++		if (idle && !SCX_HAS_OP(dump_cpu))
++			goto next;
++
++		/*
++		 * We don't yet know whether ops.dump_cpu() will produce output
++		 * and we may want to skip the default CPU dump if it doesn't.
++		 * Use a nested seq_buf to generate the standard dump so that we
++		 * can decide whether to commit later.
++		 */
++		avail = seq_buf_get_buf(&s, &buf);
++		seq_buf_init(&ns, buf, avail);
++
++		dump_newline(&ns);
++		dump_line(&ns, "CPU %-4d: nr_run=%u flags=0x%x cpu_rel=%d ops_qseq=%lu pnt_seq=%lu",
++			  cpu, rq->scx.nr_running, rq->scx.flags,
++			  rq->scx.cpu_released, rq->scx.ops_qseq,
++			  rq->scx.pnt_seq);
++		dump_line(&ns, "          curr=%s[%d] class=%ps",
++			  rq->curr->comm, rq->curr->pid,
++			  rq->curr->sched_class);
++		if (!cpumask_empty(rq->scx.cpus_to_kick))
++			dump_line(&ns, "  cpus_to_kick   : %*pb",
++				  cpumask_pr_args(rq->scx.cpus_to_kick));
++		if (!cpumask_empty(rq->scx.cpus_to_kick_if_idle))
++			dump_line(&ns, "  idle_to_kick   : %*pb",
++				  cpumask_pr_args(rq->scx.cpus_to_kick_if_idle));
++		if (!cpumask_empty(rq->scx.cpus_to_preempt))
++			dump_line(&ns, "  cpus_to_preempt: %*pb",
++				  cpumask_pr_args(rq->scx.cpus_to_preempt));
++		if (!cpumask_empty(rq->scx.cpus_to_wait))
++			dump_line(&ns, "  cpus_to_wait   : %*pb",
++				  cpumask_pr_args(rq->scx.cpus_to_wait));
++
++		used = seq_buf_used(&ns);
++		if (SCX_HAS_OP(dump_cpu)) {
++			ops_dump_init(&ns, "  ");
++			SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle);
++			ops_dump_exit();
++		}
++
++		/*
++		 * If idle && nothing generated by ops.dump_cpu(), there's
++		 * nothing interesting. Skip.
++		 */
++		if (idle && used == seq_buf_used(&ns))
++			goto next;
++
++		/*
++		 * $s may already have overflowed when $ns was created. If so,
++		 * calling commit on it will trigger BUG.
++		 */
++		if (avail) {
++			seq_buf_commit(&s, seq_buf_used(&ns));
++			if (seq_buf_has_overflowed(&ns))
++				seq_buf_set_overflow(&s);
++		}
++
++		if (rq->curr->sched_class == &ext_sched_class)
++			scx_dump_task(&s, &dctx, rq->curr, '*');
++
++		list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node)
++			scx_dump_task(&s, &dctx, p, ' ');
++	next:
++		rq_unlock(rq, &rf);
++	}
++
++	if (seq_buf_has_overflowed(&s) && dump_len >= sizeof(trunc_marker))
++		memcpy(ei->dump + dump_len - sizeof(trunc_marker),
++		       trunc_marker, sizeof(trunc_marker));
++
++	spin_unlock_irqrestore(&dump_lock, flags);
++}
++
++static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
++{
++	struct scx_exit_info *ei = scx_exit_info;
++
++	if (ei->kind >= SCX_EXIT_ERROR)
++		scx_dump_state(ei, scx_ops.exit_dump_len);
++
++	schedule_scx_ops_disable_work();
++}
++
++static DEFINE_IRQ_WORK(scx_ops_error_irq_work, scx_ops_error_irq_workfn);
++
++static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
++					     s64 exit_code,
++					     const char *fmt, ...)
++{
++	struct scx_exit_info *ei = scx_exit_info;
++	int none = SCX_EXIT_NONE;
++	va_list args;
++
++	if (!atomic_try_cmpxchg(&scx_exit_kind, &none, kind))
++		return;
++
++	ei->exit_code = exit_code;
++#ifdef CONFIG_STACKTRACE
++	if (kind >= SCX_EXIT_ERROR)
++		ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
++#endif
++	va_start(args, fmt);
++	vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
++	va_end(args);
++
++	/*
++	 * Set ei->kind and ->reason for scx_dump_state(). They'll be set again
++	 * in scx_ops_disable_workfn().
++	 */
++	ei->kind = kind;
++	ei->reason = scx_exit_reason(ei->kind);
++
++	irq_work_queue(&scx_ops_error_irq_work);
++}
++
++static struct kthread_worker *scx_create_rt_helper(const char *name)
++{
++	struct kthread_worker *helper;
++
++	helper = kthread_create_worker(0, name);
++	if (helper)
++		sched_set_fifo(helper->task);
++	return helper;
++}
++
++static void check_hotplug_seq(const struct sched_ext_ops *ops)
++{
++	unsigned long long global_hotplug_seq;
++
++	/*
++	 * If a hotplug event has occurred between when a scheduler was
++	 * initialized, and when we were able to attach, exit and notify user
++	 * space about it.
++	 */
++	if (ops->hotplug_seq) {
++		global_hotplug_seq = atomic_long_read(&scx_hotplug_seq);
++		if (ops->hotplug_seq != global_hotplug_seq) {
++			scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
++				     "expected hotplug seq %llu did not match actual %llu",
++				     ops->hotplug_seq, global_hotplug_seq);
++		}
++	}
++}
++
++static int validate_ops(const struct sched_ext_ops *ops)
++{
++	/*
++	 * It doesn't make sense to specify the SCX_OPS_ENQ_LAST flag if the
++	 * ops.enqueue() callback isn't implemented.
++	 */
++	if ((ops->flags & SCX_OPS_ENQ_LAST) && !ops->enqueue) {
++		scx_ops_error("SCX_OPS_ENQ_LAST requires ops.enqueue() to be implemented");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
++{
++	struct scx_task_iter sti;
++	struct task_struct *p;
++	unsigned long timeout;
++	int i, cpu, node, ret;
++
++	if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
++			   cpu_possible_mask)) {
++		pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation");
++		return -EINVAL;
++	}
++
++	mutex_lock(&scx_ops_enable_mutex);
++
++	if (!scx_ops_helper) {
++		WRITE_ONCE(scx_ops_helper,
++			   scx_create_rt_helper("sched_ext_ops_helper"));
++		if (!scx_ops_helper) {
++			ret = -ENOMEM;
++			goto err_unlock;
++		}
++	}
++
++	if (!global_dsqs) {
++		struct scx_dispatch_q **dsqs;
++
++		dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL);
++		if (!dsqs) {
++			ret = -ENOMEM;
++			goto err_unlock;
++		}
++
++		for_each_node_state(node, N_POSSIBLE) {
++			struct scx_dispatch_q *dsq;
++
++			dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
++			if (!dsq) {
++				for_each_node_state(node, N_POSSIBLE)
++					kfree(dsqs[node]);
++				kfree(dsqs);
++				ret = -ENOMEM;
++				goto err_unlock;
++			}
++
++			init_dsq(dsq, SCX_DSQ_GLOBAL);
++			dsqs[node] = dsq;
++		}
++
++		global_dsqs = dsqs;
++	}
++
++	if (scx_ops_enable_state() != SCX_OPS_DISABLED) {
++		ret = -EBUSY;
++		goto err_unlock;
++	}
++
++	scx_root_kobj = kzalloc(sizeof(*scx_root_kobj), GFP_KERNEL);
++	if (!scx_root_kobj) {
++		ret = -ENOMEM;
++		goto err_unlock;
++	}
++
++	scx_root_kobj->kset = scx_kset;
++	ret = kobject_init_and_add(scx_root_kobj, &scx_ktype, NULL, "root");
++	if (ret < 0)
++		goto err;
++
++	scx_exit_info = alloc_exit_info(ops->exit_dump_len);
++	if (!scx_exit_info) {
++		ret = -ENOMEM;
++		goto err_del;
++	}
++
++	/*
++	 * Set scx_ops, transition to ENABLING and clear exit info to arm the
++	 * disable path. Failure triggers full disabling from here on.
++	 */
++	scx_ops = *ops;
++
++	WARN_ON_ONCE(scx_ops_set_enable_state(SCX_OPS_ENABLING) !=
++		     SCX_OPS_DISABLED);
++
++	atomic_set(&scx_exit_kind, SCX_EXIT_NONE);
++	scx_warned_zero_slice = false;
++
++	atomic_long_set(&scx_nr_rejected, 0);
++
++	for_each_possible_cpu(cpu)
++		cpu_rq(cpu)->scx.cpuperf_target = SCX_CPUPERF_ONE;
++
++	/*
++	 * Keep CPUs stable during enable so that the BPF scheduler can track
++	 * online CPUs by watching ->on/offline_cpu() after ->init().
++	 */
++	cpus_read_lock();
++
++	if (scx_ops.init) {
++		ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
++		if (ret) {
++			ret = ops_sanitize_err("init", ret);
++			cpus_read_unlock();
++			goto err_disable;
++		}
++	}
++
++	for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++)
++		if (((void (**)(void))ops)[i])
++			static_branch_enable_cpuslocked(&scx_has_op[i]);
++
++	check_hotplug_seq(ops);
++	cpus_read_unlock();
++
++	ret = validate_ops(ops);
++	if (ret)
++		goto err_disable;
++
++	WARN_ON_ONCE(scx_dsp_ctx);
++	scx_dsp_max_batch = ops->dispatch_max_batch ?: SCX_DSP_DFL_MAX_BATCH;
++	scx_dsp_ctx = __alloc_percpu(struct_size_t(struct scx_dsp_ctx, buf,
++						   scx_dsp_max_batch),
++				     __alignof__(struct scx_dsp_ctx));
++	if (!scx_dsp_ctx) {
++		ret = -ENOMEM;
++		goto err_disable;
++	}
++
++	if (ops->timeout_ms)
++		timeout = msecs_to_jiffies(ops->timeout_ms);
++	else
++		timeout = SCX_WATCHDOG_MAX_TIMEOUT;
++
++	WRITE_ONCE(scx_watchdog_timeout, timeout);
++	WRITE_ONCE(scx_watchdog_timestamp, jiffies);
++	queue_delayed_work(system_unbound_wq, &scx_watchdog_work,
++			   scx_watchdog_timeout / 2);
++
++	/*
++	 * Once __scx_ops_enabled is set, %current can be switched to SCX
++	 * anytime. This can lead to stalls as some BPF schedulers (e.g.
++	 * userspace scheduling) may not function correctly before all tasks are
++	 * switched. Init in bypass mode to guarantee forward progress.
++	 */
++	scx_ops_bypass(true);
++
++	for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
++		if (((void (**)(void))ops)[i])
++			static_branch_enable(&scx_has_op[i]);
++
++	if (ops->flags & SCX_OPS_ENQ_LAST)
++		static_branch_enable(&scx_ops_enq_last);
++
++	if (ops->flags & SCX_OPS_ENQ_EXITING)
++		static_branch_enable(&scx_ops_enq_exiting);
++	if (scx_ops.cpu_acquire || scx_ops.cpu_release)
++		static_branch_enable(&scx_ops_cpu_preempt);
++
++	if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE)) {
++		reset_idle_masks();
++		static_branch_enable(&scx_builtin_idle_enabled);
++	} else {
++		static_branch_disable(&scx_builtin_idle_enabled);
++	}
++
++	/*
++	 * Lock out forks, cgroup on/offlining and moves before opening the
++	 * floodgate so that they don't wander into the operations prematurely.
++	 */
++	percpu_down_write(&scx_fork_rwsem);
++
++	WARN_ON_ONCE(scx_ops_init_task_enabled);
++	scx_ops_init_task_enabled = true;
++
++	/*
++	 * Enable ops for every task. Fork is excluded by scx_fork_rwsem
++	 * preventing new tasks from being added. No need to exclude tasks
++	 * leaving as sched_ext_free() can handle both prepped and enabled
++	 * tasks. Prep all tasks first and then enable them with preemption
++	 * disabled.
++	 *
++	 * All cgroups should be initialized before scx_ops_init_task() so that
++	 * the BPF scheduler can reliably track each task's cgroup membership
++	 * from scx_ops_init_task(). Lock out cgroup on/offlining and task
++	 * migrations while tasks are being initialized so that
++	 * scx_cgroup_can_attach() never sees uninitialized tasks.
++	 */
++	scx_cgroup_lock();
++	ret = scx_cgroup_init();
++	if (ret)
++		goto err_disable_unlock_all;
++
++	scx_task_iter_start(&sti);
++	while ((p = scx_task_iter_next_locked(&sti))) {
++		/*
++		 * @p may already be dead, have lost all its usages counts and
++		 * be waiting for RCU grace period before being freed. @p can't
++		 * be initialized for SCX in such cases and should be ignored.
++		 */
++		if (!tryget_task_struct(p))
++			continue;
++
++		scx_task_iter_unlock(&sti);
++
++		ret = scx_ops_init_task(p, task_group(p), false);
++		if (ret) {
++			put_task_struct(p);
++			scx_task_iter_relock(&sti);
++			scx_task_iter_stop(&sti);
++			pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
++			       ret, p->comm, p->pid);
++			goto err_disable_unlock_all;
++		}
++
++		scx_set_task_state(p, SCX_TASK_READY);
++
++		put_task_struct(p);
++		scx_task_iter_relock(&sti);
++	}
++	scx_task_iter_stop(&sti);
++	scx_cgroup_unlock();
++	percpu_up_write(&scx_fork_rwsem);
++
++	/*
++	 * All tasks are READY. It's safe to turn on scx_enabled() and switch
++	 * all eligible tasks.
++	 */
++	WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL));
++	static_branch_enable(&__scx_ops_enabled);
++
++	/*
++	 * We're fully committed and can't fail. The task READY -> ENABLED
++	 * transitions here are synchronized against sched_ext_free() through
++	 * scx_tasks_lock.
++	 */
++	percpu_down_write(&scx_fork_rwsem);
++	scx_task_iter_start(&sti);
++	while ((p = scx_task_iter_next_locked(&sti))) {
++		const struct sched_class *old_class = p->sched_class;
++		struct sched_enq_and_set_ctx ctx;
++
++		sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
++
++		p->scx.slice = SCX_SLICE_DFL;
++		__setscheduler_prio(p, p->prio);
++		check_class_changing(task_rq(p), p, old_class);
++
++		sched_enq_and_set_task(&ctx);
++
++		check_class_changed(task_rq(p), p, old_class, p->prio);
++	}
++	scx_task_iter_stop(&sti);
++	percpu_up_write(&scx_fork_rwsem);
++
++	scx_ops_bypass(false);
++
++	/*
++	 * Returning an error code here would lose the recorded error
++	 * information. Exit indicating success so that the error is notified
++	 * through ops.exit() with all the details.
++	 */
++	if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
++		WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
++		ret = 0;
++		goto err_disable;
++	}
++
++	if (!(ops->flags & SCX_OPS_SWITCH_PARTIAL))
++		static_branch_enable(&__scx_switched_all);
++
++	pr_info("sched_ext: BPF scheduler \"%s\" enabled%s\n",
++		scx_ops.name, scx_switched_all() ? "" : " (partial)");
++	kobject_uevent(scx_root_kobj, KOBJ_ADD);
++	mutex_unlock(&scx_ops_enable_mutex);
++
++	atomic_long_inc(&scx_enable_seq);
++
++	return 0;
++
++err_del:
++	kobject_del(scx_root_kobj);
++err:
++	kobject_put(scx_root_kobj);
++	scx_root_kobj = NULL;
++	if (scx_exit_info) {
++		free_exit_info(scx_exit_info);
++		scx_exit_info = NULL;
++	}
++err_unlock:
++	mutex_unlock(&scx_ops_enable_mutex);
++	return ret;
++
++err_disable_unlock_all:
++	scx_cgroup_unlock();
++	percpu_up_write(&scx_fork_rwsem);
++	scx_ops_bypass(false);
++err_disable:
++	mutex_unlock(&scx_ops_enable_mutex);
++	/* must be fully disabled before returning */
++	scx_ops_disable(SCX_EXIT_ERROR);
++	kthread_flush_work(&scx_ops_disable_work);
++	return ret;
++}
++
++
++/********************************************************************************
++ * bpf_struct_ops plumbing.
++ */
++#include <linux/bpf_verifier.h>
++#include <linux/bpf.h>
++#include <linux/btf.h>
++
++extern struct btf *btf_vmlinux;
++static const struct btf_type *task_struct_type;
++static u32 task_struct_type_id;
++
++static bool set_arg_maybe_null(const char *op, int arg_n, int off, int size,
++			       enum bpf_access_type type,
++			       const struct bpf_prog *prog,
++			       struct bpf_insn_access_aux *info)
++{
++	struct btf *btf = bpf_get_btf_vmlinux();
++	const struct bpf_struct_ops_desc *st_ops_desc;
++	const struct btf_member *member;
++	const struct btf_type *t;
++	u32 btf_id, member_idx;
++	const char *mname;
++
++	/* struct_ops op args are all sequential, 64-bit numbers */
++	if (off != arg_n * sizeof(__u64))
++		return false;
++
++	/* btf_id should be the type id of struct sched_ext_ops */
++	btf_id = prog->aux->attach_btf_id;
++	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
++	if (!st_ops_desc)
++		return false;
++
++	/* BTF type of struct sched_ext_ops */
++	t = st_ops_desc->type;
++
++	member_idx = prog->expected_attach_type;
++	if (member_idx >= btf_type_vlen(t))
++		return false;
++
++	/*
++	 * Get the member name of this struct_ops program, which corresponds to
++	 * a field in struct sched_ext_ops. For example, the member name of the
++	 * dispatch struct_ops program (callback) is "dispatch".
++	 */
++	member = &btf_type_member(t)[member_idx];
++	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
++
++	if (!strcmp(mname, op)) {
++		/*
++		 * The value is a pointer to a type (struct task_struct) given
++		 * by a BTF ID (PTR_TO_BTF_ID). It is trusted (PTR_TRUSTED),
++		 * however, can be a NULL (PTR_MAYBE_NULL). The BPF program
++		 * should check the pointer to make sure it is not NULL before
++		 * using it, or the verifier will reject the program.
++		 *
++		 * Longer term, this is something that should be addressed by
++		 * BTF, and be fully contained within the verifier.
++		 */
++		info->reg_type = PTR_MAYBE_NULL | PTR_TO_BTF_ID | PTR_TRUSTED;
++		info->btf = btf_vmlinux;
++		info->btf_id = task_struct_type_id;
++
++		return true;
++	}
++
++	return false;
++}
++
++static bool bpf_scx_is_valid_access(int off, int size,
++				    enum bpf_access_type type,
++				    const struct bpf_prog *prog,
++				    struct bpf_insn_access_aux *info)
++{
++	if (type != BPF_READ)
++		return false;
++	if (set_arg_maybe_null("dispatch", 1, off, size, type, prog, info) ||
++	    set_arg_maybe_null("yield", 1, off, size, type, prog, info))
++		return true;
++	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
++		return false;
++	if (off % size != 0)
++		return false;
++
++	return btf_ctx_access(off, size, type, prog, info);
++}
++
++static int bpf_scx_btf_struct_access(struct bpf_verifier_log *log,
++				     const struct bpf_reg_state *reg, int off,
++				     int size)
++{
++	const struct btf_type *t;
++
++	t = btf_type_by_id(reg->btf, reg->btf_id);
++	if (t == task_struct_type) {
++		if (off >= offsetof(struct task_struct, scx.slice) &&
++		    off + size <= offsetofend(struct task_struct, scx.slice))
++			return SCALAR_VALUE;
++		if (off >= offsetof(struct task_struct, scx.dsq_vtime) &&
++		    off + size <= offsetofend(struct task_struct, scx.dsq_vtime))
++			return SCALAR_VALUE;
++		if (off >= offsetof(struct task_struct, scx.disallow) &&
++		    off + size <= offsetofend(struct task_struct, scx.disallow))
++			return SCALAR_VALUE;
++	}
++
++	return -EACCES;
++}
++
++static const struct bpf_func_proto *
++bpf_scx_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
++{
++	switch (func_id) {
++	case BPF_FUNC_task_storage_get:
++		return &bpf_task_storage_get_proto;
++	case BPF_FUNC_task_storage_delete:
++		return &bpf_task_storage_delete_proto;
++	default:
++		return bpf_base_func_proto(func_id, prog);
++	}
++}
++
++static const struct bpf_verifier_ops bpf_scx_verifier_ops = {
++	.get_func_proto = bpf_scx_get_func_proto,
++	.is_valid_access = bpf_scx_is_valid_access,
++	.btf_struct_access = bpf_scx_btf_struct_access,
++};
++
++static int bpf_scx_init_member(const struct btf_type *t,
++			       const struct btf_member *member,
++			       void *kdata, const void *udata)
++{
++	const struct sched_ext_ops *uops = udata;
++	struct sched_ext_ops *ops = kdata;
++	u32 moff = __btf_member_bit_offset(t, member) / 8;
++	int ret;
++
++	switch (moff) {
++	case offsetof(struct sched_ext_ops, dispatch_max_batch):
++		if (*(u32 *)(udata + moff) > INT_MAX)
++			return -E2BIG;
++		ops->dispatch_max_batch = *(u32 *)(udata + moff);
++		return 1;
++	case offsetof(struct sched_ext_ops, flags):
++		if (*(u64 *)(udata + moff) & ~SCX_OPS_ALL_FLAGS)
++			return -EINVAL;
++		ops->flags = *(u64 *)(udata + moff);
++		return 1;
++	case offsetof(struct sched_ext_ops, name):
++		ret = bpf_obj_name_cpy(ops->name, uops->name,
++				       sizeof(ops->name));
++		if (ret < 0)
++			return ret;
++		if (ret == 0)
++			return -EINVAL;
++		return 1;
++	case offsetof(struct sched_ext_ops, timeout_ms):
++		if (msecs_to_jiffies(*(u32 *)(udata + moff)) >
++		    SCX_WATCHDOG_MAX_TIMEOUT)
++			return -E2BIG;
++		ops->timeout_ms = *(u32 *)(udata + moff);
++		return 1;
++	case offsetof(struct sched_ext_ops, exit_dump_len):
++		ops->exit_dump_len =
++			*(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN;
++		return 1;
++	case offsetof(struct sched_ext_ops, hotplug_seq):
++		ops->hotplug_seq = *(u64 *)(udata + moff);
++		return 1;
++	}
++
++	return 0;
++}
++
++static int bpf_scx_check_member(const struct btf_type *t,
++				const struct btf_member *member,
++				const struct bpf_prog *prog)
++{
++	u32 moff = __btf_member_bit_offset(t, member) / 8;
++
++	switch (moff) {
++	case offsetof(struct sched_ext_ops, init_task):
++#ifdef CONFIG_EXT_GROUP_SCHED
++	case offsetof(struct sched_ext_ops, cgroup_init):
++	case offsetof(struct sched_ext_ops, cgroup_exit):
++	case offsetof(struct sched_ext_ops, cgroup_prep_move):
++#endif
++	case offsetof(struct sched_ext_ops, cpu_online):
++	case offsetof(struct sched_ext_ops, cpu_offline):
++	case offsetof(struct sched_ext_ops, init):
++	case offsetof(struct sched_ext_ops, exit):
++		break;
++	default:
++		if (prog->sleepable)
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int bpf_scx_reg(void *kdata, struct bpf_link *link)
++{
++	return scx_ops_enable(kdata, link);
++}
++
++static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
++{
++	scx_ops_disable(SCX_EXIT_UNREG);
++	kthread_flush_work(&scx_ops_disable_work);
++}
++
++static int bpf_scx_init(struct btf *btf)
++{
++	s32 type_id;
++
++	type_id = btf_find_by_name_kind(btf, "task_struct", BTF_KIND_STRUCT);
++	if (type_id < 0)
++		return -EINVAL;
++	task_struct_type = btf_type_by_id(btf, type_id);
++	task_struct_type_id = type_id;
++
++	return 0;
++}
++
++static int bpf_scx_update(void *kdata, void *old_kdata, struct bpf_link *link)
++{
++	/*
++	 * sched_ext does not support updating the actively-loaded BPF
++	 * scheduler, as registering a BPF scheduler can always fail if the
++	 * scheduler returns an error code for e.g. ops.init(), ops.init_task(),
++	 * etc. Similarly, we can always race with unregistration happening
++	 * elsewhere, such as with sysrq.
++	 */
++	return -EOPNOTSUPP;
++}
++
++static int bpf_scx_validate(void *kdata)
++{
++	return 0;
++}
++
++static s32 select_cpu_stub(struct task_struct *p, s32 prev_cpu, u64 wake_flags) { return -EINVAL; }
++static void enqueue_stub(struct task_struct *p, u64 enq_flags) {}
++static void dequeue_stub(struct task_struct *p, u64 enq_flags) {}
++static void dispatch_stub(s32 prev_cpu, struct task_struct *p) {}
++static void tick_stub(struct task_struct *p) {}
++static void runnable_stub(struct task_struct *p, u64 enq_flags) {}
++static void running_stub(struct task_struct *p) {}
++static void stopping_stub(struct task_struct *p, bool runnable) {}
++static void quiescent_stub(struct task_struct *p, u64 deq_flags) {}
++static bool yield_stub(struct task_struct *from, struct task_struct *to) { return false; }
++static bool core_sched_before_stub(struct task_struct *a, struct task_struct *b) { return false; }
++static void set_weight_stub(struct task_struct *p, u32 weight) {}
++static void set_cpumask_stub(struct task_struct *p, const struct cpumask *mask) {}
++static void update_idle_stub(s32 cpu, bool idle) {}
++static void cpu_acquire_stub(s32 cpu, struct scx_cpu_acquire_args *args) {}
++static void cpu_release_stub(s32 cpu, struct scx_cpu_release_args *args) {}
++static s32 init_task_stub(struct task_struct *p, struct scx_init_task_args *args) { return -EINVAL; }
++static void exit_task_stub(struct task_struct *p, struct scx_exit_task_args *args) {}
++static void enable_stub(struct task_struct *p) {}
++static void disable_stub(struct task_struct *p) {}
++#ifdef CONFIG_EXT_GROUP_SCHED
++static s32 cgroup_init_stub(struct cgroup *cgrp, struct scx_cgroup_init_args *args) { return -EINVAL; }
++static void cgroup_exit_stub(struct cgroup *cgrp) {}
++static s32 cgroup_prep_move_stub(struct task_struct *p, struct cgroup *from, struct cgroup *to) { return -EINVAL; }
++static void cgroup_move_stub(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
++static void cgroup_cancel_move_stub(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
++static void cgroup_set_weight_stub(struct cgroup *cgrp, u32 weight) {}
++#endif
++static void cpu_online_stub(s32 cpu) {}
++static void cpu_offline_stub(s32 cpu) {}
++static s32 init_stub(void) { return -EINVAL; }
++static void exit_stub(struct scx_exit_info *info) {}
++static void dump_stub(struct scx_dump_ctx *ctx) {}
++static void dump_cpu_stub(struct scx_dump_ctx *ctx, s32 cpu, bool idle) {}
++static void dump_task_stub(struct scx_dump_ctx *ctx, struct task_struct *p) {}
++
++static struct sched_ext_ops __bpf_ops_sched_ext_ops = {
++	.select_cpu = select_cpu_stub,
++	.enqueue = enqueue_stub,
++	.dequeue = dequeue_stub,
++	.dispatch = dispatch_stub,
++	.tick = tick_stub,
++	.runnable = runnable_stub,
++	.running = running_stub,
++	.stopping = stopping_stub,
++	.quiescent = quiescent_stub,
++	.yield = yield_stub,
++	.core_sched_before = core_sched_before_stub,
++	.set_weight = set_weight_stub,
++	.set_cpumask = set_cpumask_stub,
++	.update_idle = update_idle_stub,
++	.cpu_acquire = cpu_acquire_stub,
++	.cpu_release = cpu_release_stub,
++	.init_task = init_task_stub,
++	.exit_task = exit_task_stub,
++	.enable = enable_stub,
++	.disable = disable_stub,
++#ifdef CONFIG_EXT_GROUP_SCHED
++	.cgroup_init = cgroup_init_stub,
++	.cgroup_exit = cgroup_exit_stub,
++	.cgroup_prep_move = cgroup_prep_move_stub,
++	.cgroup_move = cgroup_move_stub,
++	.cgroup_cancel_move = cgroup_cancel_move_stub,
++	.cgroup_set_weight = cgroup_set_weight_stub,
++#endif
++	.cpu_online = cpu_online_stub,
++	.cpu_offline = cpu_offline_stub,
++	.init = init_stub,
++	.exit = exit_stub,
++	.dump = dump_stub,
++	.dump_cpu = dump_cpu_stub,
++	.dump_task = dump_task_stub,
++};
++
++static struct bpf_struct_ops bpf_sched_ext_ops = {
++	.verifier_ops = &bpf_scx_verifier_ops,
++	.reg = bpf_scx_reg,
++	.unreg = bpf_scx_unreg,
++	.check_member = bpf_scx_check_member,
++	.init_member = bpf_scx_init_member,
++	.init = bpf_scx_init,
++	.update = bpf_scx_update,
++	.validate = bpf_scx_validate,
++	.name = "sched_ext_ops",
++	.owner = THIS_MODULE,
++	.cfi_stubs = &__bpf_ops_sched_ext_ops
++};
++
++
++/********************************************************************************
++ * System integration and init.
++ */
++
++static void sysrq_handle_sched_ext_reset(u8 key)
++{
++	if (scx_ops_helper)
++		scx_ops_disable(SCX_EXIT_SYSRQ);
++	else
++		pr_info("sched_ext: BPF scheduler not yet used\n");
++}
++
++static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
++	.handler	= sysrq_handle_sched_ext_reset,
++	.help_msg	= "reset-sched-ext(S)",
++	.action_msg	= "Disable sched_ext and revert all tasks to CFS",
++	.enable_mask	= SYSRQ_ENABLE_RTNICE,
++};
++
++static void sysrq_handle_sched_ext_dump(u8 key)
++{
++	struct scx_exit_info ei = { .kind = SCX_EXIT_NONE, .reason = "SysRq-D" };
++
++	if (scx_enabled())
++		scx_dump_state(&ei, 0);
++}
++
++static const struct sysrq_key_op sysrq_sched_ext_dump_op = {
++	.handler	= sysrq_handle_sched_ext_dump,
++	.help_msg	= "dump-sched-ext(D)",
++	.action_msg	= "Trigger sched_ext debug dump",
++	.enable_mask	= SYSRQ_ENABLE_RTNICE,
++};
++
++static bool can_skip_idle_kick(struct rq *rq)
++{
++	lockdep_assert_rq_held(rq);
++
++	/*
++	 * We can skip idle kicking if @rq is going to go through at least one
++	 * full SCX scheduling cycle before going idle. Just checking whether
++	 * curr is not idle is insufficient because we could be racing
++	 * balance_one() trying to pull the next task from a remote rq, which
++	 * may fail, and @rq may become idle afterwards.
++	 *
++	 * The race window is small and we don't and can't guarantee that @rq is
++	 * only kicked while idle anyway. Skip only when sure.
++	 */
++	return !is_idle_task(rq->curr) && !(rq->scx.flags & SCX_RQ_IN_BALANCE);
++}
++
++static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *pseqs)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct scx_rq *this_scx = &this_rq->scx;
++	bool should_wait = false;
++	unsigned long flags;
++
++	raw_spin_rq_lock_irqsave(rq, flags);
++
++	/*
++	 * During CPU hotplug, a CPU may depend on kicking itself to make
++	 * forward progress. Allow kicking self regardless of online state.
++	 */
++	if (cpu_online(cpu) || cpu == cpu_of(this_rq)) {
++		if (cpumask_test_cpu(cpu, this_scx->cpus_to_preempt)) {
++			if (rq->curr->sched_class == &ext_sched_class)
++				rq->curr->scx.slice = 0;
++			cpumask_clear_cpu(cpu, this_scx->cpus_to_preempt);
++		}
++
++		if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) {
++			pseqs[cpu] = rq->scx.pnt_seq;
++			should_wait = true;
++		}
++
++		resched_curr(rq);
++	} else {
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_preempt);
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
++	}
++
++	raw_spin_rq_unlock_irqrestore(rq, flags);
++
++	return should_wait;
++}
++
++static void kick_one_cpu_if_idle(s32 cpu, struct rq *this_rq)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	raw_spin_rq_lock_irqsave(rq, flags);
++
++	if (!can_skip_idle_kick(rq) &&
++	    (cpu_online(cpu) || cpu == cpu_of(this_rq)))
++		resched_curr(rq);
++
++	raw_spin_rq_unlock_irqrestore(rq, flags);
++}
++
++static void kick_cpus_irq_workfn(struct irq_work *irq_work)
++{
++	struct rq *this_rq = this_rq();
++	struct scx_rq *this_scx = &this_rq->scx;
++	unsigned long *pseqs = this_cpu_ptr(scx_kick_cpus_pnt_seqs);
++	bool should_wait = false;
++	s32 cpu;
++
++	for_each_cpu(cpu, this_scx->cpus_to_kick) {
++		should_wait |= kick_one_cpu(cpu, this_rq, pseqs);
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_kick);
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
++	}
++
++	for_each_cpu(cpu, this_scx->cpus_to_kick_if_idle) {
++		kick_one_cpu_if_idle(cpu, this_rq);
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
++	}
++
++	if (!should_wait)
++		return;
++
++	for_each_cpu(cpu, this_scx->cpus_to_wait) {
++		unsigned long *wait_pnt_seq = &cpu_rq(cpu)->scx.pnt_seq;
++
++		if (cpu != cpu_of(this_rq)) {
++			/*
++			 * Pairs with smp_store_release() issued by this CPU in
++			 * scx_next_task_picked() on the resched path.
++			 *
++			 * We busy-wait here to guarantee that no other task can
++			 * be scheduled on our core before the target CPU has
++			 * entered the resched path.
++			 */
++			while (smp_load_acquire(wait_pnt_seq) == pseqs[cpu])
++				cpu_relax();
++		}
++
++		cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
++	}
++}
++
++/**
++ * print_scx_info - print out sched_ext scheduler state
++ * @log_lvl: the log level to use when printing
++ * @p: target task
++ *
++ * If a sched_ext scheduler is enabled, print the name and state of the
++ * scheduler. If @p is on sched_ext, print further information about the task.
++ *
++ * This function can be safely called on any task as long as the task_struct
++ * itself is accessible. While safe, this function isn't synchronized and may
++ * print out mixups or garbages of limited length.
++ */
++void print_scx_info(const char *log_lvl, struct task_struct *p)
++{
++	enum scx_ops_enable_state state = scx_ops_enable_state();
++	const char *all = READ_ONCE(scx_switching_all) ? "+all" : "";
++	char runnable_at_buf[22] = "?";
++	struct sched_class *class;
++	unsigned long runnable_at;
++
++	if (state == SCX_OPS_DISABLED)
++		return;
++
++	/*
++	 * Carefully check if the task was running on sched_ext, and then
++	 * carefully copy the time it's been runnable, and its state.
++	 */
++	if (copy_from_kernel_nofault(&class, &p->sched_class, sizeof(class)) ||
++	    class != &ext_sched_class) {
++		printk("%sSched_ext: %s (%s%s)", log_lvl, scx_ops.name,
++		       scx_ops_enable_state_str[state], all);
++		return;
++	}
++
++	if (!copy_from_kernel_nofault(&runnable_at, &p->scx.runnable_at,
++				      sizeof(runnable_at)))
++		scnprintf(runnable_at_buf, sizeof(runnable_at_buf), "%+ldms",
++			  jiffies_delta_msecs(runnable_at, jiffies));
++
++	/* print everything onto one line to conserve console space */
++	printk("%sSched_ext: %s (%s%s), task: runnable_at=%s",
++	       log_lvl, scx_ops.name, scx_ops_enable_state_str[state], all,
++	       runnable_at_buf);
++}
++
++static int scx_pm_handler(struct notifier_block *nb, unsigned long event, void *ptr)
++{
++	/*
++	 * SCX schedulers often have userspace components which are sometimes
++	 * involved in critial scheduling paths. PM operations involve freezing
++	 * userspace which can lead to scheduling misbehaviors including stalls.
++	 * Let's bypass while PM operations are in progress.
++	 */
++	switch (event) {
++	case PM_HIBERNATION_PREPARE:
++	case PM_SUSPEND_PREPARE:
++	case PM_RESTORE_PREPARE:
++		scx_ops_bypass(true);
++		break;
++	case PM_POST_HIBERNATION:
++	case PM_POST_SUSPEND:
++	case PM_POST_RESTORE:
++		scx_ops_bypass(false);
++		break;
++	}
++
++	return NOTIFY_OK;
++}
++
++static struct notifier_block scx_pm_notifier = {
++	.notifier_call = scx_pm_handler,
++};
++
++void __init init_sched_ext_class(void)
++{
++	s32 cpu, v;
++
++	/*
++	 * The following is to prevent the compiler from optimizing out the enum
++	 * definitions so that BPF scheduler implementations can use them
++	 * through the generated vmlinux.h.
++	 */
++	WRITE_ONCE(v, SCX_ENQ_WAKEUP | SCX_DEQ_SLEEP | SCX_KICK_PREEMPT |
++		   SCX_TG_ONLINE);
++
++	BUG_ON(rhashtable_init(&dsq_hash, &dsq_hash_params));
++#ifdef CONFIG_SMP
++	BUG_ON(!alloc_cpumask_var(&idle_masks.cpu, GFP_KERNEL));
++	BUG_ON(!alloc_cpumask_var(&idle_masks.smt, GFP_KERNEL));
++#endif
++	scx_kick_cpus_pnt_seqs =
++		__alloc_percpu(sizeof(scx_kick_cpus_pnt_seqs[0]) * nr_cpu_ids,
++			       __alignof__(scx_kick_cpus_pnt_seqs[0]));
++	BUG_ON(!scx_kick_cpus_pnt_seqs);
++
++	for_each_possible_cpu(cpu) {
++		struct rq *rq = cpu_rq(cpu);
++
++		init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL);
++		INIT_LIST_HEAD(&rq->scx.runnable_list);
++		INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals);
++
++		BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_kick, GFP_KERNEL));
++		BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL));
++		BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_preempt, GFP_KERNEL));
++		BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_wait, GFP_KERNEL));
++		init_irq_work(&rq->scx.deferred_irq_work, deferred_irq_workfn);
++		init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn);
++
++		if (cpu_online(cpu))
++			cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE;
++	}
++
++	register_sysrq_key('S', &sysrq_sched_ext_reset_op);
++	register_sysrq_key('D', &sysrq_sched_ext_dump_op);
++	INIT_DELAYED_WORK(&scx_watchdog_work, scx_watchdog_workfn);
++}
++
++
++/********************************************************************************
++ * Helpers that can be called from the BPF scheduler.
++ */
++#include <linux/btf_ids.h>
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu()
++ * @p: task_struct to select a CPU for
++ * @prev_cpu: CPU @p was on previously
++ * @wake_flags: %SCX_WAKE_* flags
++ * @is_idle: out parameter indicating whether the returned CPU is idle
++ *
++ * Can only be called from ops.select_cpu() if the built-in CPU selection is
++ * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set.
++ * @p, @prev_cpu and @wake_flags match ops.select_cpu().
++ *
++ * Returns the picked CPU with *@is_idle indicating whether the picked CPU is
++ * currently idle and thus a good candidate for direct dispatching.
++ */
++__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
++				       u64 wake_flags, bool *is_idle)
++{
++	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
++		scx_ops_error("built-in idle tracking is disabled");
++		goto prev_cpu;
++	}
++
++	if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
++		goto prev_cpu;
++
++#ifdef CONFIG_SMP
++	return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
++#endif
++
++prev_cpu:
++	*is_idle = false;
++	return prev_cpu;
++}
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_select_cpu)
++BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
++BTF_KFUNCS_END(scx_kfunc_ids_select_cpu)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_select_cpu,
++};
++
++static bool scx_dispatch_preamble(struct task_struct *p, u64 enq_flags)
++{
++	if (!scx_kf_allowed(SCX_KF_ENQUEUE | SCX_KF_DISPATCH))
++		return false;
++
++	lockdep_assert_irqs_disabled();
++
++	if (unlikely(!p)) {
++		scx_ops_error("called with NULL task");
++		return false;
++	}
++
++	if (unlikely(enq_flags & __SCX_ENQ_INTERNAL_MASK)) {
++		scx_ops_error("invalid enq_flags 0x%llx", enq_flags);
++		return false;
++	}
++
++	return true;
++}
++
++static void scx_dispatch_commit(struct task_struct *p, u64 dsq_id, u64 enq_flags)
++{
++	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
++	struct task_struct *ddsp_task;
++
++	ddsp_task = __this_cpu_read(direct_dispatch_task);
++	if (ddsp_task) {
++		mark_direct_dispatch(ddsp_task, p, dsq_id, enq_flags);
++		return;
++	}
++
++	if (unlikely(dspc->cursor >= scx_dsp_max_batch)) {
++		scx_ops_error("dispatch buffer overflow");
++		return;
++	}
++
++	dspc->buf[dspc->cursor++] = (struct scx_dsp_buf_ent){
++		.task = p,
++		.qseq = atomic_long_read(&p->scx.ops_state) & SCX_OPSS_QSEQ_MASK,
++		.dsq_id = dsq_id,
++		.enq_flags = enq_flags,
++	};
++}
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_dispatch - Dispatch a task into the FIFO queue of a DSQ
++ * @p: task_struct to dispatch
++ * @dsq_id: DSQ to dispatch to
++ * @slice: duration @p can run for in nsecs, 0 to keep the current value
++ * @enq_flags: SCX_ENQ_*
++ *
++ * Dispatch @p into the FIFO queue of the DSQ identified by @dsq_id. It is safe
++ * to call this function spuriously. Can be called from ops.enqueue(),
++ * ops.select_cpu(), and ops.dispatch().
++ *
++ * When called from ops.select_cpu() or ops.enqueue(), it's for direct dispatch
++ * and @p must match the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be
++ * used to target the local DSQ of a CPU other than the enqueueing one. Use
++ * ops.select_cpu() to be on the target CPU in the first place.
++ *
++ * When called from ops.select_cpu(), @enq_flags and @dsp_id are stored, and @p
++ * will be directly dispatched to the corresponding dispatch queue after
++ * ops.select_cpu() returns. If @p is dispatched to SCX_DSQ_LOCAL, it will be
++ * dispatched to the local DSQ of the CPU returned by ops.select_cpu().
++ * @enq_flags are OR'd with the enqueue flags on the enqueue path before the
++ * task is dispatched.
++ *
++ * When called from ops.dispatch(), there are no restrictions on @p or @dsq_id
++ * and this function can be called upto ops.dispatch_max_batch times to dispatch
++ * multiple tasks. scx_bpf_dispatch_nr_slots() returns the number of the
++ * remaining slots. scx_bpf_consume() flushes the batch and resets the counter.
++ *
++ * This function doesn't have any locking restrictions and may be called under
++ * BPF locks (in the future when BPF introduces more flexible locking).
++ *
++ * @p is allowed to run for @slice. The scheduling path is triggered on slice
++ * exhaustion. If zero, the current residual slice is maintained. If
++ * %SCX_SLICE_INF, @p never expires and the BPF scheduler must kick the CPU with
++ * scx_bpf_kick_cpu() to trigger scheduling.
++ */
++__bpf_kfunc void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice,
++				  u64 enq_flags)
++{
++	if (!scx_dispatch_preamble(p, enq_flags))
++		return;
++
++	if (slice)
++		p->scx.slice = slice;
++	else
++		p->scx.slice = p->scx.slice ?: 1;
++
++	scx_dispatch_commit(p, dsq_id, enq_flags);
++}
++
++/**
++ * scx_bpf_dispatch_vtime - Dispatch a task into the vtime priority queue of a DSQ
++ * @p: task_struct to dispatch
++ * @dsq_id: DSQ to dispatch to
++ * @slice: duration @p can run for in nsecs, 0 to keep the current value
++ * @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
++ * @enq_flags: SCX_ENQ_*
++ *
++ * Dispatch @p into the vtime priority queue of the DSQ identified by @dsq_id.
++ * Tasks queued into the priority queue are ordered by @vtime and always
++ * consumed after the tasks in the FIFO queue. All other aspects are identical
++ * to scx_bpf_dispatch().
++ *
++ * @vtime ordering is according to time_before64() which considers wrapping. A
++ * numerically larger vtime may indicate an earlier position in the ordering and
++ * vice-versa.
++ */
++__bpf_kfunc void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id,
++					u64 slice, u64 vtime, u64 enq_flags)
++{
++	if (!scx_dispatch_preamble(p, enq_flags))
++		return;
++
++	if (slice)
++		p->scx.slice = slice;
++	else
++		p->scx.slice = p->scx.slice ?: 1;
++
++	p->scx.dsq_vtime = vtime;
++
++	scx_dispatch_commit(p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
++}
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch)
++BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
++BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_enqueue_dispatch,
++};
++
++static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit,
++				  struct task_struct *p, u64 dsq_id,
++				  u64 enq_flags)
++{
++	struct scx_dispatch_q *src_dsq = kit->dsq, *dst_dsq;
++	struct rq *this_rq, *src_rq, *dst_rq, *locked_rq;
++	bool dispatched = false;
++	bool in_balance;
++	unsigned long flags;
++
++	if (!scx_kf_allowed_if_unlocked() && !scx_kf_allowed(SCX_KF_DISPATCH))
++		return false;
++
++	/*
++	 * Can be called from either ops.dispatch() locking this_rq() or any
++	 * context where no rq lock is held. If latter, lock @p's task_rq which
++	 * we'll likely need anyway.
++	 */
++	src_rq = task_rq(p);
++
++	local_irq_save(flags);
++	this_rq = this_rq();
++	in_balance = this_rq->scx.flags & SCX_RQ_IN_BALANCE;
++
++	if (in_balance) {
++		if (this_rq != src_rq) {
++			raw_spin_rq_unlock(this_rq);
++			raw_spin_rq_lock(src_rq);
++		}
++	} else {
++		raw_spin_rq_lock(src_rq);
++	}
++
++	locked_rq = src_rq;
++	raw_spin_lock(&src_dsq->lock);
++
++	/*
++	 * Did someone else get to it? @p could have already left $src_dsq, got
++	 * re-enqueud, or be in the process of being consumed by someone else.
++	 */
++	if (unlikely(p->scx.dsq != src_dsq ||
++		     u32_before(kit->cursor.priv, p->scx.dsq_seq) ||
++		     p->scx.holding_cpu >= 0) ||
++	    WARN_ON_ONCE(src_rq != task_rq(p))) {
++		raw_spin_unlock(&src_dsq->lock);
++		goto out;
++	}
++
++	/* @p is still on $src_dsq and stable, determine the destination */
++	dst_dsq = find_dsq_for_dispatch(this_rq, dsq_id, p);
++
++	if (dst_dsq->id == SCX_DSQ_LOCAL) {
++		dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
++		if (!task_can_run_on_remote_rq(p, dst_rq, true)) {
++			dst_dsq = find_global_dsq(p);
++			dst_rq = src_rq;
++		}
++	} else {
++		/* no need to migrate if destination is a non-local DSQ */
++		dst_rq = src_rq;
++	}
++
++	/*
++	 * Move @p into $dst_dsq. If $dst_dsq is the local DSQ of a different
++	 * CPU, @p will be migrated.
++	 */
++	if (dst_dsq->id == SCX_DSQ_LOCAL) {
++		/* @p is going from a non-local DSQ to a local DSQ */
++		if (src_rq == dst_rq) {
++			task_unlink_from_dsq(p, src_dsq);
++			move_local_task_to_local_dsq(p, enq_flags,
++						     src_dsq, dst_rq);
++			raw_spin_unlock(&src_dsq->lock);
++		} else {
++			raw_spin_unlock(&src_dsq->lock);
++			move_remote_task_to_local_dsq(p, enq_flags,
++						      src_rq, dst_rq);
++			locked_rq = dst_rq;
++		}
++	} else {
++		/*
++		 * @p is going from a non-local DSQ to a non-local DSQ. As
++		 * $src_dsq is already locked, do an abbreviated dequeue.
++		 */
++		task_unlink_from_dsq(p, src_dsq);
++		p->scx.dsq = NULL;
++		raw_spin_unlock(&src_dsq->lock);
++
++		if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_VTIME)
++			p->scx.dsq_vtime = kit->vtime;
++		dispatch_enqueue(dst_dsq, p, enq_flags);
++	}
++
++	if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_SLICE)
++		p->scx.slice = kit->slice;
++
++	dispatched = true;
++out:
++	if (in_balance) {
++		if (this_rq != locked_rq) {
++			raw_spin_rq_unlock(locked_rq);
++			raw_spin_rq_lock(this_rq);
++		}
++	} else {
++		raw_spin_rq_unlock_irqrestore(locked_rq, flags);
++	}
++
++	kit->cursor.flags &= ~(__SCX_DSQ_ITER_HAS_SLICE |
++			       __SCX_DSQ_ITER_HAS_VTIME);
++	return dispatched;
++}
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_dispatch_nr_slots - Return the number of remaining dispatch slots
++ *
++ * Can only be called from ops.dispatch().
++ */
++__bpf_kfunc u32 scx_bpf_dispatch_nr_slots(void)
++{
++	if (!scx_kf_allowed(SCX_KF_DISPATCH))
++		return 0;
++
++	return scx_dsp_max_batch - __this_cpu_read(scx_dsp_ctx->cursor);
++}
++
++/**
++ * scx_bpf_dispatch_cancel - Cancel the latest dispatch
++ *
++ * Cancel the latest dispatch. Can be called multiple times to cancel further
++ * dispatches. Can only be called from ops.dispatch().
++ */
++__bpf_kfunc void scx_bpf_dispatch_cancel(void)
++{
++	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
++
++	if (!scx_kf_allowed(SCX_KF_DISPATCH))
++		return;
++
++	if (dspc->cursor > 0)
++		dspc->cursor--;
++	else
++		scx_ops_error("dispatch buffer underflow");
++}
++
++/**
++ * scx_bpf_consume - Transfer a task from a DSQ to the current CPU's local DSQ
++ * @dsq_id: DSQ to consume
++ *
++ * Consume a task from the non-local DSQ identified by @dsq_id and transfer it
++ * to the current CPU's local DSQ for execution. Can only be called from
++ * ops.dispatch().
++ *
++ * This function flushes the in-flight dispatches from scx_bpf_dispatch() before
++ * trying to consume the specified DSQ. It may also grab rq locks and thus can't
++ * be called under any BPF locks.
++ *
++ * Returns %true if a task has been consumed, %false if there isn't any task to
++ * consume.
++ */
++__bpf_kfunc bool scx_bpf_consume(u64 dsq_id)
++{
++	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
++	struct scx_dispatch_q *dsq;
++
++	if (!scx_kf_allowed(SCX_KF_DISPATCH))
++		return false;
++
++	flush_dispatch_buf(dspc->rq);
++
++	dsq = find_user_dsq(dsq_id);
++	if (unlikely(!dsq)) {
++		scx_ops_error("invalid DSQ ID 0x%016llx", dsq_id);
++		return false;
++	}
++
++	if (consume_dispatch_q(dspc->rq, dsq)) {
++		/*
++		 * A successfully consumed task can be dequeued before it starts
++		 * running while the CPU is trying to migrate other dispatched
++		 * tasks. Bump nr_tasks to tell balance_scx() to retry on empty
++		 * local DSQ.
++		 */
++		dspc->nr_tasks++;
++		return true;
++	} else {
++		return false;
++	}
++}
++
++/**
++ * scx_bpf_dispatch_from_dsq_set_slice - Override slice when dispatching from DSQ
++ * @it__iter: DSQ iterator in progress
++ * @slice: duration the dispatched task can run for in nsecs
++ *
++ * Override the slice of the next task that will be dispatched from @it__iter
++ * using scx_bpf_dispatch_from_dsq[_vtime](). If this function is not called,
++ * the previous slice duration is kept.
++ */
++__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_slice(
++				struct bpf_iter_scx_dsq *it__iter, u64 slice)
++{
++	struct bpf_iter_scx_dsq_kern *kit = (void *)it__iter;
++
++	kit->slice = slice;
++	kit->cursor.flags |= __SCX_DSQ_ITER_HAS_SLICE;
++}
++
++/**
++ * scx_bpf_dispatch_from_dsq_set_vtime - Override vtime when dispatching from DSQ
++ * @it__iter: DSQ iterator in progress
++ * @vtime: task's ordering inside the vtime-sorted queue of the target DSQ
++ *
++ * Override the vtime of the next task that will be dispatched from @it__iter
++ * using scx_bpf_dispatch_from_dsq_vtime(). If this function is not called, the
++ * previous slice vtime is kept. If scx_bpf_dispatch_from_dsq() is used to
++ * dispatch the next task, the override is ignored and cleared.
++ */
++__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_vtime(
++				struct bpf_iter_scx_dsq *it__iter, u64 vtime)
++{
++	struct bpf_iter_scx_dsq_kern *kit = (void *)it__iter;
++
++	kit->vtime = vtime;
++	kit->cursor.flags |= __SCX_DSQ_ITER_HAS_VTIME;
++}
++
++/**
++ * scx_bpf_dispatch_from_dsq - Move a task from DSQ iteration to a DSQ
++ * @it__iter: DSQ iterator in progress
++ * @p: task to transfer
++ * @dsq_id: DSQ to move @p to
++ * @enq_flags: SCX_ENQ_*
++ *
++ * Transfer @p which is on the DSQ currently iterated by @it__iter to the DSQ
++ * specified by @dsq_id. All DSQs - local DSQs, global DSQ and user DSQs - can
++ * be the destination.
++ *
++ * For the transfer to be successful, @p must still be on the DSQ and have been
++ * queued before the DSQ iteration started. This function doesn't care whether
++ * @p was obtained from the DSQ iteration. @p just has to be on the DSQ and have
++ * been queued before the iteration started.
++ *
++ * @p's slice is kept by default. Use scx_bpf_dispatch_from_dsq_set_slice() to
++ * update.
++ *
++ * Can be called from ops.dispatch() or any BPF context which doesn't hold a rq
++ * lock (e.g. BPF timers or SYSCALL programs).
++ *
++ * Returns %true if @p has been consumed, %false if @p had already been consumed
++ * or dequeued.
++ */
++__bpf_kfunc bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter,
++					   struct task_struct *p, u64 dsq_id,
++					   u64 enq_flags)
++{
++	return scx_dispatch_from_dsq((struct bpf_iter_scx_dsq_kern *)it__iter,
++				     p, dsq_id, enq_flags);
++}
++
++/**
++ * scx_bpf_dispatch_vtime_from_dsq - Move a task from DSQ iteration to a PRIQ DSQ
++ * @it__iter: DSQ iterator in progress
++ * @p: task to transfer
++ * @dsq_id: DSQ to move @p to
++ * @enq_flags: SCX_ENQ_*
++ *
++ * Transfer @p which is on the DSQ currently iterated by @it__iter to the
++ * priority queue of the DSQ specified by @dsq_id. The destination must be a
++ * user DSQ as only user DSQs support priority queue.
++ *
++ * @p's slice and vtime are kept by default. Use
++ * scx_bpf_dispatch_from_dsq_set_slice() and
++ * scx_bpf_dispatch_from_dsq_set_vtime() to update.
++ *
++ * All other aspects are identical to scx_bpf_dispatch_from_dsq(). See
++ * scx_bpf_dispatch_vtime() for more information on @vtime.
++ */
++__bpf_kfunc bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter,
++						 struct task_struct *p, u64 dsq_id,
++						 u64 enq_flags)
++{
++	return scx_dispatch_from_dsq((struct bpf_iter_scx_dsq_kern *)it__iter,
++				     p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
++}
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_dispatch)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
++BTF_ID_FLAGS(func, scx_bpf_consume)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
++BTF_KFUNCS_END(scx_kfunc_ids_dispatch)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_dispatch,
++};
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_reenqueue_local - Re-enqueue tasks on a local DSQ
++ *
++ * Iterate over all of the tasks currently enqueued on the local DSQ of the
++ * caller's CPU, and re-enqueue them in the BPF scheduler. Returns the number of
++ * processed tasks. Can only be called from ops.cpu_release().
++ */
++__bpf_kfunc u32 scx_bpf_reenqueue_local(void)
++{
++	LIST_HEAD(tasks);
++	u32 nr_enqueued = 0;
++	struct rq *rq;
++	struct task_struct *p, *n;
++
++	if (!scx_kf_allowed(SCX_KF_CPU_RELEASE))
++		return 0;
++
++	rq = cpu_rq(smp_processor_id());
++	lockdep_assert_rq_held(rq);
++
++	/*
++	 * The BPF scheduler may choose to dispatch tasks back to
++	 * @rq->scx.local_dsq. Move all candidate tasks off to a private list
++	 * first to avoid processing the same tasks repeatedly.
++	 */
++	list_for_each_entry_safe(p, n, &rq->scx.local_dsq.list,
++				 scx.dsq_list.node) {
++		/*
++		 * If @p is being migrated, @p's current CPU may not agree with
++		 * its allowed CPUs and the migration_cpu_stop is about to
++		 * deactivate and re-activate @p anyway. Skip re-enqueueing.
++		 *
++		 * While racing sched property changes may also dequeue and
++		 * re-enqueue a migrating task while its current CPU and allowed
++		 * CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
++		 * the current local DSQ for running tasks and thus are not
++		 * visible to the BPF scheduler.
++		 */
++		if (p->migration_pending)
++			continue;
++
++		dispatch_dequeue(rq, p);
++		list_add_tail(&p->scx.dsq_list.node, &tasks);
++	}
++
++	list_for_each_entry_safe(p, n, &tasks, scx.dsq_list.node) {
++		list_del_init(&p->scx.dsq_list.node);
++		do_enqueue_task(rq, p, SCX_ENQ_REENQ, -1);
++		nr_enqueued++;
++	}
++
++	return nr_enqueued;
++}
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_cpu_release)
++BTF_ID_FLAGS(func, scx_bpf_reenqueue_local)
++BTF_KFUNCS_END(scx_kfunc_ids_cpu_release)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_cpu_release = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_cpu_release,
++};
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_create_dsq - Create a custom DSQ
++ * @dsq_id: DSQ to create
++ * @node: NUMA node to allocate from
++ *
++ * Create a custom DSQ identified by @dsq_id. Can be called from any sleepable
++ * scx callback, and any BPF_PROG_TYPE_SYSCALL prog.
++ */
++__bpf_kfunc s32 scx_bpf_create_dsq(u64 dsq_id, s32 node)
++{
++	if (unlikely(node >= (int)nr_node_ids ||
++		     (node < 0 && node != NUMA_NO_NODE)))
++		return -EINVAL;
++	return PTR_ERR_OR_ZERO(create_dsq(dsq_id, node));
++}
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_unlocked)
++BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
++BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_unlocked,
++};
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_kick_cpu - Trigger reschedule on a CPU
++ * @cpu: cpu to kick
++ * @flags: %SCX_KICK_* flags
++ *
++ * Kick @cpu into rescheduling. This can be used to wake up an idle CPU or
++ * trigger rescheduling on a busy CPU. This can be called from any online
++ * scx_ops operation and the actual kicking is performed asynchronously through
++ * an irq work.
++ */
++__bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
++{
++	struct rq *this_rq;
++	unsigned long irq_flags;
++
++	if (!ops_cpu_valid(cpu, NULL))
++		return;
++
++	local_irq_save(irq_flags);
++
++	this_rq = this_rq();
++
++	/*
++	 * While bypassing for PM ops, IRQ handling may not be online which can
++	 * lead to irq_work_queue() malfunction such as infinite busy wait for
++	 * IRQ status update. Suppress kicking.
++	 */
++	if (scx_rq_bypassing(this_rq))
++		goto out;
++
++	/*
++	 * Actual kicking is bounced to kick_cpus_irq_workfn() to avoid nesting
++	 * rq locks. We can probably be smarter and avoid bouncing if called
++	 * from ops which don't hold a rq lock.
++	 */
++	if (flags & SCX_KICK_IDLE) {
++		struct rq *target_rq = cpu_rq(cpu);
++
++		if (unlikely(flags & (SCX_KICK_PREEMPT | SCX_KICK_WAIT)))
++			scx_ops_error("PREEMPT/WAIT cannot be used with SCX_KICK_IDLE");
++
++		if (raw_spin_rq_trylock(target_rq)) {
++			if (can_skip_idle_kick(target_rq)) {
++				raw_spin_rq_unlock(target_rq);
++				goto out;
++			}
++			raw_spin_rq_unlock(target_rq);
++		}
++		cpumask_set_cpu(cpu, this_rq->scx.cpus_to_kick_if_idle);
++	} else {
++		cpumask_set_cpu(cpu, this_rq->scx.cpus_to_kick);
++
++		if (flags & SCX_KICK_PREEMPT)
++			cpumask_set_cpu(cpu, this_rq->scx.cpus_to_preempt);
++		if (flags & SCX_KICK_WAIT)
++			cpumask_set_cpu(cpu, this_rq->scx.cpus_to_wait);
++	}
++
++	irq_work_queue(&this_rq->scx.kick_cpus_irq_work);
++out:
++	local_irq_restore(irq_flags);
++}
++
++/**
++ * scx_bpf_dsq_nr_queued - Return the number of queued tasks
++ * @dsq_id: id of the DSQ
++ *
++ * Return the number of tasks in the DSQ matching @dsq_id. If not found,
++ * -%ENOENT is returned.
++ */
++__bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
++{
++	struct scx_dispatch_q *dsq;
++	s32 ret;
++
++	preempt_disable();
++
++	if (dsq_id == SCX_DSQ_LOCAL) {
++		ret = READ_ONCE(this_rq()->scx.local_dsq.nr);
++		goto out;
++	} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
++		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
++
++		if (ops_cpu_valid(cpu, NULL)) {
++			ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
++			goto out;
++		}
++	} else {
++		dsq = find_user_dsq(dsq_id);
++		if (dsq) {
++			ret = READ_ONCE(dsq->nr);
++			goto out;
++		}
++	}
++	ret = -ENOENT;
++out:
++	preempt_enable();
++	return ret;
++}
++
++/**
++ * scx_bpf_destroy_dsq - Destroy a custom DSQ
++ * @dsq_id: DSQ to destroy
++ *
++ * Destroy the custom DSQ identified by @dsq_id. Only DSQs created with
++ * scx_bpf_create_dsq() can be destroyed. The caller must ensure that the DSQ is
++ * empty and no further tasks are dispatched to it. Ignored if called on a DSQ
++ * which doesn't exist. Can be called from any online scx_ops operations.
++ */
++__bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id)
++{
++	destroy_dsq(dsq_id);
++}
++
++/**
++ * bpf_iter_scx_dsq_new - Create a DSQ iterator
++ * @it: iterator to initialize
++ * @dsq_id: DSQ to iterate
++ * @flags: %SCX_DSQ_ITER_*
++ *
++ * Initialize BPF iterator @it which can be used with bpf_for_each() to walk
++ * tasks in the DSQ specified by @dsq_id. Iteration using @it only includes
++ * tasks which are already queued when this function is invoked.
++ */
++__bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
++				     u64 flags)
++{
++	struct bpf_iter_scx_dsq_kern *kit = (void *)it;
++
++	BUILD_BUG_ON(sizeof(struct bpf_iter_scx_dsq_kern) >
++		     sizeof(struct bpf_iter_scx_dsq));
++	BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) !=
++		     __alignof__(struct bpf_iter_scx_dsq));
++
++	if (flags & ~__SCX_DSQ_ITER_USER_FLAGS)
++		return -EINVAL;
++
++	kit->dsq = find_user_dsq(dsq_id);
++	if (!kit->dsq)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&kit->cursor.node);
++	kit->cursor.flags |= SCX_DSQ_LNODE_ITER_CURSOR | flags;
++	kit->cursor.priv = READ_ONCE(kit->dsq->seq);
++
++	return 0;
++}
++
++/**
++ * bpf_iter_scx_dsq_next - Progress a DSQ iterator
++ * @it: iterator to progress
++ *
++ * Return the next task. See bpf_iter_scx_dsq_new().
++ */
++__bpf_kfunc struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it)
++{
++	struct bpf_iter_scx_dsq_kern *kit = (void *)it;
++	bool rev = kit->cursor.flags & SCX_DSQ_ITER_REV;
++	struct task_struct *p;
++	unsigned long flags;
++
++	if (!kit->dsq)
++		return NULL;
++
++	raw_spin_lock_irqsave(&kit->dsq->lock, flags);
++
++	if (list_empty(&kit->cursor.node))
++		p = NULL;
++	else
++		p = container_of(&kit->cursor, struct task_struct, scx.dsq_list);
++
++	/*
++	 * Only tasks which were queued before the iteration started are
++	 * visible. This bounds BPF iterations and guarantees that vtime never
++	 * jumps in the other direction while iterating.
++	 */
++	do {
++		p = nldsq_next_task(kit->dsq, p, rev);
++	} while (p && unlikely(u32_before(kit->cursor.priv, p->scx.dsq_seq)));
++
++	if (p) {
++		if (rev)
++			list_move_tail(&kit->cursor.node, &p->scx.dsq_list.node);
++		else
++			list_move(&kit->cursor.node, &p->scx.dsq_list.node);
++	} else {
++		list_del_init(&kit->cursor.node);
++	}
++
++	raw_spin_unlock_irqrestore(&kit->dsq->lock, flags);
++
++	return p;
++}
++
++/**
++ * bpf_iter_scx_dsq_destroy - Destroy a DSQ iterator
++ * @it: iterator to destroy
++ *
++ * Undo scx_iter_scx_dsq_new().
++ */
++__bpf_kfunc void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it)
++{
++	struct bpf_iter_scx_dsq_kern *kit = (void *)it;
++
++	if (!kit->dsq)
++		return;
++
++	if (!list_empty(&kit->cursor.node)) {
++		unsigned long flags;
++
++		raw_spin_lock_irqsave(&kit->dsq->lock, flags);
++		list_del_init(&kit->cursor.node);
++		raw_spin_unlock_irqrestore(&kit->dsq->lock, flags);
++	}
++	kit->dsq = NULL;
++}
++
++__bpf_kfunc_end_defs();
++
++static s32 __bstr_format(u64 *data_buf, char *line_buf, size_t line_size,
++			 char *fmt, unsigned long long *data, u32 data__sz)
++{
++	struct bpf_bprintf_data bprintf_data = { .get_bin_args = true };
++	s32 ret;
++
++	if (data__sz % 8 || data__sz > MAX_BPRINTF_VARARGS * 8 ||
++	    (data__sz && !data)) {
++		scx_ops_error("invalid data=%p and data__sz=%u",
++			      (void *)data, data__sz);
++		return -EINVAL;
++	}
++
++	ret = copy_from_kernel_nofault(data_buf, data, data__sz);
++	if (ret < 0) {
++		scx_ops_error("failed to read data fields (%d)", ret);
++		return ret;
++	}
++
++	ret = bpf_bprintf_prepare(fmt, UINT_MAX, data_buf, data__sz / 8,
++				  &bprintf_data);
++	if (ret < 0) {
++		scx_ops_error("format preparation failed (%d)", ret);
++		return ret;
++	}
++
++	ret = bstr_printf(line_buf, line_size, fmt,
++			  bprintf_data.bin_args);
++	bpf_bprintf_cleanup(&bprintf_data);
++	if (ret < 0) {
++		scx_ops_error("(\"%s\", %p, %u) failed to format",
++			      fmt, data, data__sz);
++		return ret;
++	}
++
++	return ret;
++}
++
++static s32 bstr_format(struct scx_bstr_buf *buf,
++		       char *fmt, unsigned long long *data, u32 data__sz)
++{
++	return __bstr_format(buf->data, buf->line, sizeof(buf->line),
++			     fmt, data, data__sz);
++}
++
++__bpf_kfunc_start_defs();
++
++/**
++ * scx_bpf_exit_bstr - Gracefully exit the BPF scheduler.
++ * @exit_code: Exit value to pass to user space via struct scx_exit_info.
++ * @fmt: error message format string
++ * @data: format string parameters packaged using ___bpf_fill() macro
++ * @data__sz: @data len, must end in '__sz' for the verifier
++ *
++ * Indicate that the BPF scheduler wants to exit gracefully, and initiate ops
++ * disabling.
++ */
++__bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
++				   unsigned long long *data, u32 data__sz)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&scx_exit_bstr_buf_lock, flags);
++	if (bstr_format(&scx_exit_bstr_buf, fmt, data, data__sz) >= 0)
++		scx_ops_exit_kind(SCX_EXIT_UNREG_BPF, exit_code, "%s",
++				  scx_exit_bstr_buf.line);
++	raw_spin_unlock_irqrestore(&scx_exit_bstr_buf_lock, flags);
++}
++
++/**
++ * scx_bpf_error_bstr - Indicate fatal error
++ * @fmt: error message format string
++ * @data: format string parameters packaged using ___bpf_fill() macro
++ * @data__sz: @data len, must end in '__sz' for the verifier
++ *
++ * Indicate that the BPF scheduler encountered a fatal error and initiate ops
++ * disabling.
++ */
++__bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
++				    u32 data__sz)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&scx_exit_bstr_buf_lock, flags);
++	if (bstr_format(&scx_exit_bstr_buf, fmt, data, data__sz) >= 0)
++		scx_ops_exit_kind(SCX_EXIT_ERROR_BPF, 0, "%s",
++				  scx_exit_bstr_buf.line);
++	raw_spin_unlock_irqrestore(&scx_exit_bstr_buf_lock, flags);
++}
++
++/**
++ * scx_bpf_dump - Generate extra debug dump specific to the BPF scheduler
++ * @fmt: format string
++ * @data: format string parameters packaged using ___bpf_fill() macro
++ * @data__sz: @data len, must end in '__sz' for the verifier
++ *
++ * To be called through scx_bpf_dump() helper from ops.dump(), dump_cpu() and
++ * dump_task() to generate extra debug dump specific to the BPF scheduler.
++ *
++ * The extra dump may be multiple lines. A single line may be split over
++ * multiple calls. The last line is automatically terminated.
++ */
++__bpf_kfunc void scx_bpf_dump_bstr(char *fmt, unsigned long long *data,
++				   u32 data__sz)
++{
++	struct scx_dump_data *dd = &scx_dump_data;
++	struct scx_bstr_buf *buf = &dd->buf;
++	s32 ret;
++
++	if (raw_smp_processor_id() != dd->cpu) {
++		scx_ops_error("scx_bpf_dump() must only be called from ops.dump() and friends");
++		return;
++	}
++
++	/* append the formatted string to the line buf */
++	ret = __bstr_format(buf->data, buf->line + dd->cursor,
++			    sizeof(buf->line) - dd->cursor, fmt, data, data__sz);
++	if (ret < 0) {
++		dump_line(dd->s, "%s[!] (\"%s\", %p, %u) failed to format (%d)",
++			  dd->prefix, fmt, data, data__sz, ret);
++		return;
++	}
++
++	dd->cursor += ret;
++	dd->cursor = min_t(s32, dd->cursor, sizeof(buf->line));
++
++	if (!dd->cursor)
++		return;
++
++	/*
++	 * If the line buf overflowed or ends in a newline, flush it into the
++	 * dump. This is to allow the caller to generate a single line over
++	 * multiple calls. As ops_dump_flush() can also handle multiple lines in
++	 * the line buf, the only case which can lead to an unexpected
++	 * truncation is when the caller keeps generating newlines in the middle
++	 * instead of the end consecutively. Don't do that.
++	 */
++	if (dd->cursor >= sizeof(buf->line) || buf->line[dd->cursor - 1] == '\n')
++		ops_dump_flush();
++}
++
++/**
++ * scx_bpf_cpuperf_cap - Query the maximum relative capacity of a CPU
++ * @cpu: CPU of interest
++ *
++ * Return the maximum relative capacity of @cpu in relation to the most
++ * performant CPU in the system. The return value is in the range [1,
++ * %SCX_CPUPERF_ONE]. See scx_bpf_cpuperf_cur().
++ */
++__bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu)
++{
++	if (ops_cpu_valid(cpu, NULL))
++		return arch_scale_cpu_capacity(cpu);
++	else
++		return SCX_CPUPERF_ONE;
++}
++
++/**
++ * scx_bpf_cpuperf_cur - Query the current relative performance of a CPU
++ * @cpu: CPU of interest
++ *
++ * Return the current relative performance of @cpu in relation to its maximum.
++ * The return value is in the range [1, %SCX_CPUPERF_ONE].
++ *
++ * The current performance level of a CPU in relation to the maximum performance
++ * available in the system can be calculated as follows:
++ *
++ *   scx_bpf_cpuperf_cap() * scx_bpf_cpuperf_cur() / %SCX_CPUPERF_ONE
++ *
++ * The result is in the range [1, %SCX_CPUPERF_ONE].
++ */
++__bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu)
++{
++	if (ops_cpu_valid(cpu, NULL))
++		return arch_scale_freq_capacity(cpu);
++	else
++		return SCX_CPUPERF_ONE;
++}
++
++/**
++ * scx_bpf_cpuperf_set - Set the relative performance target of a CPU
++ * @cpu: CPU of interest
++ * @perf: target performance level [0, %SCX_CPUPERF_ONE]
++ * @flags: %SCX_CPUPERF_* flags
++ *
++ * Set the target performance level of @cpu to @perf. @perf is in linear
++ * relative scale between 0 and %SCX_CPUPERF_ONE. This determines how the
++ * schedutil cpufreq governor chooses the target frequency.
++ *
++ * The actual performance level chosen, CPU grouping, and the overhead and
++ * latency of the operations are dependent on the hardware and cpufreq driver in
++ * use. Consult hardware and cpufreq documentation for more information. The
++ * current performance level can be monitored using scx_bpf_cpuperf_cur().
++ */
++__bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf)
++{
++	if (unlikely(perf > SCX_CPUPERF_ONE)) {
++		scx_ops_error("Invalid cpuperf target %u for CPU %d", perf, cpu);
++		return;
++	}
++
++	if (ops_cpu_valid(cpu, NULL)) {
++		struct rq *rq = cpu_rq(cpu);
++
++		rq->scx.cpuperf_target = perf;
++
++		rcu_read_lock_sched_notrace();
++		cpufreq_update_util(cpu_rq(cpu), 0);
++		rcu_read_unlock_sched_notrace();
++	}
++}
++
++/**
++ * scx_bpf_nr_cpu_ids - Return the number of possible CPU IDs
++ *
++ * All valid CPU IDs in the system are smaller than the returned value.
++ */
++__bpf_kfunc u32 scx_bpf_nr_cpu_ids(void)
++{
++	return nr_cpu_ids;
++}
++
++/**
++ * scx_bpf_get_possible_cpumask - Get a referenced kptr to cpu_possible_mask
++ */
++__bpf_kfunc const struct cpumask *scx_bpf_get_possible_cpumask(void)
++{
++	return cpu_possible_mask;
++}
++
++/**
++ * scx_bpf_get_online_cpumask - Get a referenced kptr to cpu_online_mask
++ */
++__bpf_kfunc const struct cpumask *scx_bpf_get_online_cpumask(void)
++{
++	return cpu_online_mask;
++}
++
++/**
++ * scx_bpf_put_cpumask - Release a possible/online cpumask
++ * @cpumask: cpumask to release
++ */
++__bpf_kfunc void scx_bpf_put_cpumask(const struct cpumask *cpumask)
++{
++	/*
++	 * Empty function body because we aren't actually acquiring or releasing
++	 * a reference to a global cpumask, which is read-only in the caller and
++	 * is never released. The acquire / release semantics here are just used
++	 * to make the cpumask is a trusted pointer in the caller.
++	 */
++}
++
++/**
++ * scx_bpf_get_idle_cpumask - Get a referenced kptr to the idle-tracking
++ * per-CPU cpumask.
++ *
++ * Returns NULL if idle tracking is not enabled, or running on a UP kernel.
++ */
++__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
++{
++	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
++		scx_ops_error("built-in idle tracking is disabled");
++		return cpu_none_mask;
++	}
++
++#ifdef CONFIG_SMP
++	return idle_masks.cpu;
++#else
++	return cpu_none_mask;
++#endif
++}
++
++/**
++ * scx_bpf_get_idle_smtmask - Get a referenced kptr to the idle-tracking,
++ * per-physical-core cpumask. Can be used to determine if an entire physical
++ * core is free.
++ *
++ * Returns NULL if idle tracking is not enabled, or running on a UP kernel.
++ */
++__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void)
++{
++	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
++		scx_ops_error("built-in idle tracking is disabled");
++		return cpu_none_mask;
++	}
++
++#ifdef CONFIG_SMP
++	if (sched_smt_active())
++		return idle_masks.smt;
++	else
++		return idle_masks.cpu;
++#else
++	return cpu_none_mask;
++#endif
++}
++
++/**
++ * scx_bpf_put_idle_cpumask - Release a previously acquired referenced kptr to
++ * either the percpu, or SMT idle-tracking cpumask.
++ */
++__bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask)
++{
++	/*
++	 * Empty function body because we aren't actually acquiring or releasing
++	 * a reference to a global idle cpumask, which is read-only in the
++	 * caller and is never released. The acquire / release semantics here
++	 * are just used to make the cpumask a trusted pointer in the caller.
++	 */
++}
++
++/**
++ * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state
++ * @cpu: cpu to test and clear idle for
++ *
++ * Returns %true if @cpu was idle and its idle state was successfully cleared.
++ * %false otherwise.
++ *
++ * Unavailable if ops.update_idle() is implemented and
++ * %SCX_OPS_KEEP_BUILTIN_IDLE is not set.
++ */
++__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
++{
++	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
++		scx_ops_error("built-in idle tracking is disabled");
++		return false;
++	}
++
++	if (ops_cpu_valid(cpu, NULL))
++		return test_and_clear_cpu_idle(cpu);
++	else
++		return false;
++}
++
++/**
++ * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu
++ * @cpus_allowed: Allowed cpumask
++ * @flags: %SCX_PICK_IDLE_CPU_* flags
++ *
++ * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu
++ * number on success. -%EBUSY if no matching cpu was found.
++ *
++ * Idle CPU tracking may race against CPU scheduling state transitions. For
++ * example, this function may return -%EBUSY as CPUs are transitioning into the
++ * idle state. If the caller then assumes that there will be dispatch events on
++ * the CPUs as they were all busy, the scheduler may end up stalling with CPUs
++ * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and
++ * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch
++ * event in the near future.
++ *
++ * Unavailable if ops.update_idle() is implemented and
++ * %SCX_OPS_KEEP_BUILTIN_IDLE is not set.
++ */
++__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed,
++				      u64 flags)
++{
++	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
++		scx_ops_error("built-in idle tracking is disabled");
++		return -EBUSY;
++	}
++
++	return scx_pick_idle_cpu(cpus_allowed, flags);
++}
++
++/**
++ * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU
++ * @cpus_allowed: Allowed cpumask
++ * @flags: %SCX_PICK_IDLE_CPU_* flags
++ *
++ * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any
++ * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu
++ * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is
++ * empty.
++ *
++ * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not
++ * set, this function can't tell which CPUs are idle and will always pick any
++ * CPU.
++ */
++__bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed,
++				     u64 flags)
++{
++	s32 cpu;
++
++	if (static_branch_likely(&scx_builtin_idle_enabled)) {
++		cpu = scx_pick_idle_cpu(cpus_allowed, flags);
++		if (cpu >= 0)
++			return cpu;
++	}
++
++	cpu = cpumask_any_distribute(cpus_allowed);
++	if (cpu < nr_cpu_ids)
++		return cpu;
++	else
++		return -EBUSY;
++}
++
++/**
++ * scx_bpf_task_running - Is task currently running?
++ * @p: task of interest
++ */
++__bpf_kfunc bool scx_bpf_task_running(const struct task_struct *p)
++{
++	return task_rq(p)->curr == p;
++}
++
++/**
++ * scx_bpf_task_cpu - CPU a task is currently associated with
++ * @p: task of interest
++ */
++__bpf_kfunc s32 scx_bpf_task_cpu(const struct task_struct *p)
++{
++	return task_cpu(p);
++}
++
++/**
++ * scx_bpf_cpu_rq - Fetch the rq of a CPU
++ * @cpu: CPU of the rq
++ */
++__bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu)
++{
++	if (!ops_cpu_valid(cpu, NULL))
++		return NULL;
++
++	return cpu_rq(cpu);
++}
++
++/**
++ * scx_bpf_task_cgroup - Return the sched cgroup of a task
++ * @p: task of interest
++ *
++ * @p->sched_task_group->css.cgroup represents the cgroup @p is associated with
++ * from the scheduler's POV. SCX operations should use this function to
++ * determine @p's current cgroup as, unlike following @p->cgroups,
++ * @p->sched_task_group is protected by @p's rq lock and thus atomic w.r.t. all
++ * rq-locked operations. Can be called on the parameter tasks of rq-locked
++ * operations. The restriction guarantees that @p's rq is locked by the caller.
++ */
++#ifdef CONFIG_CGROUP_SCHED
++__bpf_kfunc struct cgroup *scx_bpf_task_cgroup(struct task_struct *p)
++{
++	struct task_group *tg = p->sched_task_group;
++	struct cgroup *cgrp = &cgrp_dfl_root.cgrp;
++
++	if (!scx_kf_allowed_on_arg_tasks(__SCX_KF_RQ_LOCKED, p))
++		goto out;
++
++	/*
++	 * A task_group may either be a cgroup or an autogroup. In the latter
++	 * case, @tg->css.cgroup is %NULL. A task_group can't become the other
++	 * kind once created.
++	 */
++	if (tg && tg->css.cgroup)
++		cgrp = tg->css.cgroup;
++	else
++		cgrp = &cgrp_dfl_root.cgrp;
++out:
++	cgroup_get(cgrp);
++	return cgrp;
++}
++#endif
++
++__bpf_kfunc_end_defs();
++
++BTF_KFUNCS_START(scx_kfunc_ids_any)
++BTF_ID_FLAGS(func, scx_bpf_kick_cpu)
++BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued)
++BTF_ID_FLAGS(func, scx_bpf_destroy_dsq)
++BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_ITER_NEW | KF_RCU_PROTECTED)
++BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL)
++BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY)
++BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS)
++BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS)
++BTF_ID_FLAGS(func, scx_bpf_dump_bstr, KF_TRUSTED_ARGS)
++BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap)
++BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur)
++BTF_ID_FLAGS(func, scx_bpf_cpuperf_set)
++BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids)
++BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE)
++BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE)
++BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE)
++BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE)
++BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE)
++BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE)
++BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle)
++BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
++BTF_ID_FLAGS(func, scx_bpf_cpu_rq)
++#ifdef CONFIG_CGROUP_SCHED
++BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE)
++#endif
++BTF_KFUNCS_END(scx_kfunc_ids_any)
++
++static const struct btf_kfunc_id_set scx_kfunc_set_any = {
++	.owner			= THIS_MODULE,
++	.set			= &scx_kfunc_ids_any,
++};
++
++static int __init scx_init(void)
++{
++	int ret;
++
++	/*
++	 * kfunc registration can't be done from init_sched_ext_class() as
++	 * register_btf_kfunc_id_set() needs most of the system to be up.
++	 *
++	 * Some kfuncs are context-sensitive and can only be called from
++	 * specific SCX ops. They are grouped into BTF sets accordingly.
++	 * Unfortunately, BPF currently doesn't have a way of enforcing such
++	 * restrictions. Eventually, the verifier should be able to enforce
++	 * them. For now, register them the same and make each kfunc explicitly
++	 * check using scx_kf_allowed().
++	 */
++	if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_select_cpu)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_enqueue_dispatch)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_dispatch)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_cpu_release)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_unlocked)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL,
++					     &scx_kfunc_set_unlocked)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
++					     &scx_kfunc_set_any)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
++					     &scx_kfunc_set_any)) ||
++	    (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL,
++					     &scx_kfunc_set_any))) {
++		pr_err("sched_ext: Failed to register kfunc sets (%d)\n", ret);
++		return ret;
++	}
++
++	ret = register_bpf_struct_ops(&bpf_sched_ext_ops, sched_ext_ops);
++	if (ret) {
++		pr_err("sched_ext: Failed to register struct_ops (%d)\n", ret);
++		return ret;
++	}
++
++	ret = register_pm_notifier(&scx_pm_notifier);
++	if (ret) {
++		pr_err("sched_ext: Failed to register PM notifier (%d)\n", ret);
++		return ret;
++	}
++
++	scx_kset = kset_create_and_add("sched_ext", &scx_uevent_ops, kernel_kobj);
++	if (!scx_kset) {
++		pr_err("sched_ext: Failed to create /sys/kernel/sched_ext\n");
++		return -ENOMEM;
++	}
++
++	ret = sysfs_create_group(&scx_kset->kobj, &scx_global_attr_group);
++	if (ret < 0) {
++		pr_err("sched_ext: Failed to add global attributes\n");
++		return ret;
++	}
++
++	return 0;
++}
++__initcall(scx_init);
+diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
+new file mode 100644
+index 000000000000..246019519231
+--- /dev/null
++++ b/kernel/sched/ext.h
+@@ -0,0 +1,91 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#ifdef CONFIG_SCHED_CLASS_EXT
++
++void scx_tick(struct rq *rq);
++void init_scx_entity(struct sched_ext_entity *scx);
++void scx_pre_fork(struct task_struct *p);
++int scx_fork(struct task_struct *p);
++void scx_post_fork(struct task_struct *p);
++void scx_cancel_fork(struct task_struct *p);
++bool scx_can_stop_tick(struct rq *rq);
++void scx_rq_activate(struct rq *rq);
++void scx_rq_deactivate(struct rq *rq);
++int scx_check_setscheduler(struct task_struct *p, int policy);
++bool task_should_scx(struct task_struct *p);
++void init_sched_ext_class(void);
++
++static inline u32 scx_cpuperf_target(s32 cpu)
++{
++	if (scx_enabled())
++		return cpu_rq(cpu)->scx.cpuperf_target;
++	else
++		return 0;
++}
++
++static inline bool task_on_scx(const struct task_struct *p)
++{
++	return scx_enabled() && p->sched_class == &ext_sched_class;
++}
++
++#ifdef CONFIG_SCHED_CORE
++bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
++		   bool in_fi);
++#endif
++
++#else	/* CONFIG_SCHED_CLASS_EXT */
++
++static inline void scx_tick(struct rq *rq) {}
++static inline void scx_pre_fork(struct task_struct *p) {}
++static inline int scx_fork(struct task_struct *p) { return 0; }
++static inline void scx_post_fork(struct task_struct *p) {}
++static inline void scx_cancel_fork(struct task_struct *p) {}
++static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }
++static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
++static inline void scx_rq_activate(struct rq *rq) {}
++static inline void scx_rq_deactivate(struct rq *rq) {}
++static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
++static inline bool task_on_scx(const struct task_struct *p) { return false; }
++static inline void init_sched_ext_class(void) {}
++
++#endif	/* CONFIG_SCHED_CLASS_EXT */
++
++#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
++void __scx_update_idle(struct rq *rq, bool idle);
++
++static inline void scx_update_idle(struct rq *rq, bool idle)
++{
++	if (scx_enabled())
++		__scx_update_idle(rq, idle);
++}
++#else
++static inline void scx_update_idle(struct rq *rq, bool idle) {}
++#endif
++
++#ifdef CONFIG_CGROUP_SCHED
++#ifdef CONFIG_EXT_GROUP_SCHED
++int scx_tg_online(struct task_group *tg);
++void scx_tg_offline(struct task_group *tg);
++int scx_cgroup_can_attach(struct cgroup_taskset *tset);
++void scx_move_task(struct task_struct *p);
++void scx_cgroup_finish_attach(void);
++void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
++void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
++void scx_group_set_idle(struct task_group *tg, bool idle);
++#else	/* CONFIG_EXT_GROUP_SCHED */
++static inline int scx_tg_online(struct task_group *tg) { return 0; }
++static inline void scx_tg_offline(struct task_group *tg) {}
++static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; }
++static inline void scx_move_task(struct task_struct *p) {}
++static inline void scx_cgroup_finish_attach(void) {}
++static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
++static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
++static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
++#endif	/* CONFIG_EXT_GROUP_SCHED */
++#endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 91b242e47db7..a36e37a674e8 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3857,7 +3857,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ 	}
+ }
+ 
+-void reweight_task(struct task_struct *p, const struct load_weight *lw)
++static void reweight_task_fair(struct rq *rq, struct task_struct *p,
++			       const struct load_weight *lw)
+ {
+ 	struct sched_entity *se = &p->se;
+ 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+@@ -9365,29 +9366,18 @@ static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {
+ 
+ static bool __update_blocked_others(struct rq *rq, bool *done)
+ {
+-	const struct sched_class *curr_class;
+-	u64 now = rq_clock_pelt(rq);
+-	unsigned long hw_pressure;
+-	bool decayed;
++	bool updated;
+ 
+ 	/*
+ 	 * update_load_avg() can call cpufreq_update_util(). Make sure that RT,
+ 	 * DL and IRQ signals have been updated before updating CFS.
+ 	 */
+-	curr_class = rq->curr->sched_class;
+-
+-	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
+-
+-	/* hw_pressure doesn't care about invariance */
+-	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
+-		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
+-		  update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure) |
+-		  update_irq_load_avg(rq, 0);
++	updated = update_other_load_avgs(rq);
+ 
+ 	if (others_have_blocked(rq))
+ 		*done = false;
+ 
+-	return decayed;
++	return updated;
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -13233,6 +13223,7 @@ DEFINE_SCHED_CLASS(fair) = {
+ 	.task_tick		= task_tick_fair,
+ 	.task_fork		= task_fork_fair,
+ 
++	.reweight_task		= reweight_task_fair,
+ 	.prio_changed		= prio_changed_fair,
+ 	.switched_from		= switched_from_fair,
+ 	.switched_to		= switched_to_fair,
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index 6e78d071beb5..c7a218123b7a 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -452,11 +452,13 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
+ 
+ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
+ {
++	scx_update_idle(rq, false);
+ }
+ 
+ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
+ {
+ 	update_idle_core(rq);
++	scx_update_idle(rq, true);
+ 	schedstat_inc(rq->sched_goidle);
+ }
+ 
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 432b43aa091c..48d893de632b 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -192,9 +192,18 @@ static inline int idle_policy(int policy)
+ 	return policy == SCHED_IDLE;
+ }
+ 
++static inline int normal_policy(int policy)
++{
++#ifdef CONFIG_SCHED_CLASS_EXT
++	if (policy == SCHED_EXT)
++		return true;
++#endif
++	return policy == SCHED_NORMAL;
++}
++
+ static inline int fair_policy(int policy)
+ {
+-	return policy == SCHED_NORMAL || policy == SCHED_BATCH;
++	return normal_policy(policy) || policy == SCHED_BATCH;
+ }
+ 
+ static inline int rt_policy(int policy)
+@@ -244,6 +253,24 @@ static inline void update_avg(u64 *avg, u64 sample)
+ #define shr_bound(val, shift)							\
+ 	(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
+ 
++/*
++ * cgroup weight knobs should use the common MIN, DFL and MAX values which are
++ * 1, 100 and 10000 respectively. While it loses a bit of range on both ends, it
++ * maps pretty well onto the shares value used by scheduler and the round-trip
++ * conversions preserve the original value over the entire range.
++ */
++static inline unsigned long sched_weight_from_cgroup(unsigned long cgrp_weight)
++{
++	return DIV_ROUND_CLOSEST_ULL(cgrp_weight * 1024, CGROUP_WEIGHT_DFL);
++}
++
++static inline unsigned long sched_weight_to_cgroup(unsigned long weight)
++{
++	return clamp_t(unsigned long,
++		       DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024),
++		       CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
++}
++
+ /*
+  * !! For sched_setattr_nocheck() (kernel) only !!
+  *
+@@ -397,16 +424,17 @@ struct cfs_bandwidth {
+ struct task_group {
+ 	struct cgroup_subsys_state css;
+ 
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
++	/* A positive value indicates that this is a SCHED_IDLE group. */
++	int			idle;
++#endif
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	/* schedulable entities of this group on each CPU */
+ 	struct sched_entity	**se;
+ 	/* runqueue "owned" by this group on each CPU */
+ 	struct cfs_rq		**cfs_rq;
+ 	unsigned long		shares;
+-
+-	/* A positive value indicates that this is a SCHED_IDLE group. */
+-	int			idle;
+-
+ #ifdef	CONFIG_SMP
+ 	/*
+ 	 * load_avg can be heavily contended at clock tick time, so put
+@@ -424,6 +452,11 @@ struct task_group {
+ 	struct rt_bandwidth	rt_bandwidth;
+ #endif
+ 
++#ifdef CONFIG_EXT_GROUP_SCHED
++	u32			scx_flags;	/* SCX_TG_* */
++	u32			scx_weight;
++#endif
++
+ 	struct rcu_head		rcu;
+ 	struct list_head	list;
+ 
+@@ -448,7 +481,7 @@ struct task_group {
+ 
+ };
+ 
+-#ifdef CONFIG_FAIR_GROUP_SCHED
++#ifdef CONFIG_GROUP_SCHED_WEIGHT
+ #define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
+ 
+ /*
+@@ -479,6 +512,11 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+ 	return walk_tg_tree_from(&root_task_group, down, up, data);
+ }
+ 
++static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
++{
++	return css ? container_of(css, struct task_group, css) : NULL;
++}
++
+ extern int tg_nop(struct task_group *tg, void *data);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -535,6 +573,9 @@ extern void set_task_rq_fair(struct sched_entity *se,
+ static inline void set_task_rq_fair(struct sched_entity *se,
+ 			     struct cfs_rq *prev, struct cfs_rq *next) { }
+ #endif /* CONFIG_SMP */
++#else /* !CONFIG_FAIR_GROUP_SCHED */
++static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; }
++static inline int sched_group_set_idle(struct task_group *tg, long idle) { return 0; }
+ #endif /* CONFIG_FAIR_GROUP_SCHED */
+ 
+ #else /* CONFIG_CGROUP_SCHED */
+@@ -588,6 +629,11 @@ do {									\
+ # define u64_u32_load(var)		u64_u32_load_copy(var, var##_copy)
+ # define u64_u32_store(var, val)	u64_u32_store_copy(var, var##_copy, val)
+ 
++struct balance_callback {
++	struct balance_callback *next;
++	void (*func)(struct rq *rq);
++};
++
+ /* CFS-related fields in a runqueue */
+ struct cfs_rq {
+ 	struct load_weight	load;
+@@ -696,6 +742,43 @@ struct cfs_rq {
+ #endif /* CONFIG_FAIR_GROUP_SCHED */
+ };
+ 
++#ifdef CONFIG_SCHED_CLASS_EXT
++/* scx_rq->flags, protected by the rq lock */
++enum scx_rq_flags {
++	/*
++	 * A hotplugged CPU starts scheduling before rq_online_scx(). Track
++	 * ops.cpu_on/offline() state so that ops.enqueue/dispatch() are called
++	 * only while the BPF scheduler considers the CPU to be online.
++	 */
++	SCX_RQ_ONLINE		= 1 << 0,
++	SCX_RQ_CAN_STOP_TICK	= 1 << 1,
++	SCX_RQ_BYPASSING	= 1 << 3,
++
++	SCX_RQ_IN_WAKEUP	= 1 << 16,
++	SCX_RQ_IN_BALANCE	= 1 << 17,
++};
++
++struct scx_rq {
++	struct scx_dispatch_q	local_dsq;
++	struct list_head	runnable_list;		/* runnable tasks on this rq */
++	struct list_head	ddsp_deferred_locals;	/* deferred ddsps from enq */
++	unsigned long		ops_qseq;
++	u64			extra_enq_flags;	/* see move_task_to_local_dsq() */
++	u32			nr_running;
++	u32			flags;
++	u32			cpuperf_target;		/* [0, SCHED_CAPACITY_SCALE] */
++	bool			cpu_released;
++	cpumask_var_t		cpus_to_kick;
++	cpumask_var_t		cpus_to_kick_if_idle;
++	cpumask_var_t		cpus_to_preempt;
++	cpumask_var_t		cpus_to_wait;
++	unsigned long		pnt_seq;
++	struct balance_callback	deferred_bal_cb;
++	struct irq_work		deferred_irq_work;
++	struct irq_work		kick_cpus_irq_work;
++};
++#endif /* CONFIG_SCHED_CLASS_EXT */
++
+ static inline int rt_bandwidth_enabled(void)
+ {
+ 	return sysctl_sched_rt_runtime >= 0;
+@@ -996,11 +1079,6 @@ struct uclamp_rq {
+ DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
+ #endif /* CONFIG_UCLAMP_TASK */
+ 
+-struct balance_callback {
+-	struct balance_callback *next;
+-	void (*func)(struct rq *rq);
+-};
+-
+ /*
+  * This is the main, per-CPU runqueue data structure.
+  *
+@@ -1043,6 +1121,9 @@ struct rq {
+ 	struct cfs_rq		cfs;
+ 	struct rt_rq		rt;
+ 	struct dl_rq		dl;
++#ifdef CONFIG_SCHED_CLASS_EXT
++	struct scx_rq		scx;
++#endif
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	/* list of leaf cfs_rq on this CPU: */
+@@ -2291,13 +2372,15 @@ struct sched_class {
+ 
+ 	void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+ 
++	int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+ 	struct task_struct *(*pick_next_task)(struct rq *rq);
+ 
+ 	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
+ 	void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
+ 
++	void (*switch_class)(struct rq *rq, struct task_struct *next);
++
+ #ifdef CONFIG_SMP
+-	int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+ 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
+ 
+ 	struct task_struct * (*pick_task)(struct rq *rq);
+@@ -2323,8 +2406,11 @@ struct sched_class {
+ 	 * cannot assume the switched_from/switched_to pair is serialized by
+ 	 * rq->lock. They are however serialized by p->pi_lock.
+ 	 */
++	void (*switching_to) (struct rq *this_rq, struct task_struct *task);
+ 	void (*switched_from)(struct rq *this_rq, struct task_struct *task);
+ 	void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
++	void (*reweight_task)(struct rq *this_rq, struct task_struct *task,
++			      const struct load_weight *lw);
+ 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ 			      int oldprio);
+ 
+@@ -2373,19 +2459,54 @@ const struct sched_class name##_sched_class \
+ extern struct sched_class __sched_class_highest[];
+ extern struct sched_class __sched_class_lowest[];
+ 
++extern const struct sched_class stop_sched_class;
++extern const struct sched_class dl_sched_class;
++extern const struct sched_class rt_sched_class;
++extern const struct sched_class fair_sched_class;
++extern const struct sched_class idle_sched_class;
++
++#ifdef CONFIG_SCHED_CLASS_EXT
++extern const struct sched_class ext_sched_class;
++
++DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled);	/* SCX BPF scheduler loaded */
++DECLARE_STATIC_KEY_FALSE(__scx_switched_all);	/* all fair class tasks on SCX */
++
++#define scx_enabled()		static_branch_unlikely(&__scx_ops_enabled)
++#define scx_switched_all()	static_branch_unlikely(&__scx_switched_all)
++#else /* !CONFIG_SCHED_CLASS_EXT */
++#define scx_enabled()		false
++#define scx_switched_all()	false
++#endif /* !CONFIG_SCHED_CLASS_EXT */
++
++/*
++ * Iterate only active classes. SCX can take over all fair tasks or be
++ * completely disabled. If the former, skip fair. If the latter, skip SCX.
++ */
++static inline const struct sched_class *next_active_class(const struct sched_class *class)
++{
++	class++;
++#ifdef CONFIG_SCHED_CLASS_EXT
++	if (scx_switched_all() && class == &fair_sched_class)
++		class++;
++	if (!scx_enabled() && class == &ext_sched_class)
++		class++;
++#endif
++	return class;
++}
++
+ #define for_class_range(class, _from, _to) \
+ 	for (class = (_from); class < (_to); class++)
+ 
+ #define for_each_class(class) \
+ 	for_class_range(class, __sched_class_highest, __sched_class_lowest)
+ 
+-#define sched_class_above(_a, _b)	((_a) < (_b))
++#define for_active_class_range(class, _from, _to)				\
++	for (class = (_from); class != (_to); class = next_active_class(class))
+ 
+-extern const struct sched_class stop_sched_class;
+-extern const struct sched_class dl_sched_class;
+-extern const struct sched_class rt_sched_class;
+-extern const struct sched_class fair_sched_class;
+-extern const struct sched_class idle_sched_class;
++#define for_each_active_class(class)						\
++	for_active_class_range(class, __sched_class_highest, __sched_class_lowest)
++
++#define sched_class_above(_a, _b)	((_a) < (_b))
+ 
+ static inline bool sched_stop_runnable(struct rq *rq)
+ {
+@@ -2424,6 +2545,19 @@ extern void sched_balance_trigger(struct rq *rq);
+ extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx);
+ extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
+ 
++static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
++{
++	/* When not in the task's cpumask, no point in looking further. */
++	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
++		return false;
++
++	/* Can @cpu run a user thread? */
++	if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p))
++		return false;
++
++	return true;
++}
++
+ static inline cpumask_t *alloc_user_cpus_ptr(int node)
+ {
+ 	/*
+@@ -2457,6 +2591,11 @@ extern int push_cpu_stop(void *arg);
+ 
+ #else /* !CONFIG_SMP: */
+ 
++static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
++{
++	return true;
++}
++
+ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+ 					 struct affinity_context *ctx)
+ {
+@@ -2510,8 +2649,6 @@ extern void init_sched_dl_class(void);
+ extern void init_sched_rt_class(void);
+ extern void init_sched_fair_class(void);
+ 
+-extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
+-
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
+ 
+@@ -3056,6 +3193,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { }
+ 
+ #ifdef CONFIG_SMP
+ 
++bool update_other_load_avgs(struct rq *rq);
++
+ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
+ 				 unsigned long *min,
+ 				 unsigned long *max);
+@@ -3099,6 +3238,8 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
+ 	return READ_ONCE(rq->avg_rt.util_avg);
+ }
+ 
++#else /* !CONFIG_SMP */
++static inline bool update_other_load_avgs(struct rq *rq) { return false; }
+ #endif /* CONFIG_SMP */
+ 
+ #ifdef CONFIG_UCLAMP_TASK
+@@ -3609,6 +3750,8 @@ extern void set_load_weight(struct task_struct *p, bool update_load);
+ extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
+ extern void dequeue_task(struct rq *rq, struct task_struct *p, int flags);
+ 
++extern void check_class_changing(struct rq *rq, struct task_struct *p,
++				 const struct sched_class *prev_class);
+ extern void check_class_changed(struct rq *rq, struct task_struct *p,
+ 				const struct sched_class *prev_class,
+ 				int oldprio);
+@@ -3629,4 +3772,24 @@ static inline void balance_callbacks(struct rq *rq, struct balance_callback *hea
+ 
+ #endif
+ 
++#ifdef CONFIG_SCHED_CLASS_EXT
++/*
++ * Used by SCX in the enable/disable paths to move tasks between sched_classes
++ * and establish invariants.
++ */
++struct sched_enq_and_set_ctx {
++	struct task_struct	*p;
++	int			queue_flags;
++	bool			queued;
++	bool			running;
++};
++
++void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
++			    struct sched_enq_and_set_ctx *ctx);
++void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
++
++#endif /* CONFIG_SCHED_CLASS_EXT */
++
++#include "ext.h"
++
+ #endif /* _KERNEL_SCHED_SCHED_H */
+diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
+index ae1b42775ef9..4fa59c9f69ac 100644
+--- a/kernel/sched/syscalls.c
++++ b/kernel/sched/syscalls.c
+@@ -259,6 +259,25 @@ int sched_core_idle_cpu(int cpu)
+ #endif
+ 
+ #ifdef CONFIG_SMP
++/*
++ * Load avg and utiliztion metrics need to be updated periodically and before
++ * consumption. This function updates the metrics for all subsystems except for
++ * the fair class. @rq must be locked and have its clock updated.
++ */
++bool update_other_load_avgs(struct rq *rq)
++{
++	u64 now = rq_clock_pelt(rq);
++	const struct sched_class *curr_class = rq->curr->sched_class;
++	unsigned long hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
++
++	lockdep_assert_rq_held(rq);
++
++	return update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
++		update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
++		update_hw_load_avg(now, rq, hw_pressure) |
++		update_irq_load_avg(rq, 0);
++}
++
+ /*
+  * This function computes an effective utilization for the given CPU, to be
+  * used for frequency selection given the linear relation: f = u * f_max.
+@@ -695,6 +714,10 @@ int __sched_setscheduler(struct task_struct *p,
+ 		goto unlock;
+ 	}
+ 
++	retval = scx_check_setscheduler(p, policy);
++	if (retval)
++		goto unlock;
++
+ 	/*
+ 	 * If not changing anything there's no need to proceed further,
+ 	 * but store a possible modification of reset_on_fork.
+@@ -797,6 +820,7 @@ int __sched_setscheduler(struct task_struct *p,
+ 		__setscheduler_prio(p, newprio);
+ 	}
+ 	__setscheduler_uclamp(p, attr);
++	check_class_changing(rq, p, prev_class);
+ 
+ 	if (queued) {
+ 		/*
+@@ -1602,6 +1626,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
+ 	case SCHED_NORMAL:
+ 	case SCHED_BATCH:
+ 	case SCHED_IDLE:
++	case SCHED_EXT:
+ 		ret = 0;
+ 		break;
+ 	}
+@@ -1629,6 +1654,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
+ 	case SCHED_NORMAL:
+ 	case SCHED_BATCH:
+ 	case SCHED_IDLE:
++	case SCHED_EXT:
+ 		ret = 0;
+ 	}
+ 	return ret;
+diff --git a/lib/dump_stack.c b/lib/dump_stack.c
+index 1a996fbbf50a..388da1aea14a 100644
+--- a/lib/dump_stack.c
++++ b/lib/dump_stack.c
+@@ -73,6 +73,7 @@ void dump_stack_print_info(const char *log_lvl)
+ 
+ 	print_worker_info(log_lvl, current);
+ 	print_stop_info(log_lvl, current);
++	print_scx_info(log_lvl, current);
+ }
+ 
+ /**
+diff --git a/tools/Makefile b/tools/Makefile
+index 276f5d0d53a4..278d24723b74 100644
+--- a/tools/Makefile
++++ b/tools/Makefile
+@@ -28,6 +28,7 @@ help:
+ 	@echo '  pci                    - PCI tools'
+ 	@echo '  perf                   - Linux performance measurement and analysis tool'
+ 	@echo '  selftests              - various kernel selftests'
++	@echo '  sched_ext              - sched_ext example schedulers'
+ 	@echo '  bootconfig             - boot config tool'
+ 	@echo '  spi                    - spi tools'
+ 	@echo '  tmon                   - thermal monitoring and tuning tool'
+@@ -91,6 +92,9 @@ perf: FORCE
+ 	$(Q)mkdir -p $(PERF_O) .
+ 	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
+ 
++sched_ext: FORCE
++	$(call descend,sched_ext)
++
+ selftests: FORCE
+ 	$(call descend,testing/$@)
+ 
+@@ -184,6 +188,9 @@ perf_clean:
+ 	$(Q)mkdir -p $(PERF_O) .
+ 	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
+ 
++sched_ext_clean:
++	$(call descend,sched_ext,clean)
++
+ selftests_clean:
+ 	$(call descend,testing/$(@:_clean=),clean)
+ 
+@@ -213,6 +220,7 @@ clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
+ 		mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+ 		freefall_clean build_clean libbpf_clean libsubcmd_clean \
+ 		gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
+-		intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
++		intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
++		sched_ext_clean
+ 
+ .PHONY: FORCE
+diff --git a/tools/sched_ext/.gitignore b/tools/sched_ext/.gitignore
+new file mode 100644
+index 000000000000..d6264fe1c8cd
+--- /dev/null
++++ b/tools/sched_ext/.gitignore
+@@ -0,0 +1,2 @@
++tools/
++build/
+diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
+new file mode 100644
+index 000000000000..ca3815e572d8
+--- /dev/null
++++ b/tools/sched_ext/Makefile
+@@ -0,0 +1,246 @@
++# SPDX-License-Identifier: GPL-2.0
++# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++include ../build/Build.include
++include ../scripts/Makefile.arch
++include ../scripts/Makefile.include
++
++all: all_targets
++
++ifneq ($(LLVM),)
++ifneq ($(filter %/,$(LLVM)),)
++LLVM_PREFIX := $(LLVM)
++else ifneq ($(filter -%,$(LLVM)),)
++LLVM_SUFFIX := $(LLVM)
++endif
++
++CLANG_TARGET_FLAGS_arm          := arm-linux-gnueabi
++CLANG_TARGET_FLAGS_arm64        := aarch64-linux-gnu
++CLANG_TARGET_FLAGS_hexagon      := hexagon-linux-musl
++CLANG_TARGET_FLAGS_m68k         := m68k-linux-gnu
++CLANG_TARGET_FLAGS_mips         := mipsel-linux-gnu
++CLANG_TARGET_FLAGS_powerpc      := powerpc64le-linux-gnu
++CLANG_TARGET_FLAGS_riscv        := riscv64-linux-gnu
++CLANG_TARGET_FLAGS_s390         := s390x-linux-gnu
++CLANG_TARGET_FLAGS_x86          := x86_64-linux-gnu
++CLANG_TARGET_FLAGS              := $(CLANG_TARGET_FLAGS_$(ARCH))
++
++ifeq ($(CROSS_COMPILE),)
++ifeq ($(CLANG_TARGET_FLAGS),)
++$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk)
++else
++CLANG_FLAGS     += --target=$(CLANG_TARGET_FLAGS)
++endif # CLANG_TARGET_FLAGS
++else
++CLANG_FLAGS     += --target=$(notdir $(CROSS_COMPILE:%-=%))
++endif # CROSS_COMPILE
++
++CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
++else
++CC := $(CROSS_COMPILE)gcc
++endif # LLVM
++
++CURDIR := $(abspath .)
++TOOLSDIR := $(abspath ..)
++LIBDIR := $(TOOLSDIR)/lib
++BPFDIR := $(LIBDIR)/bpf
++TOOLSINCDIR := $(TOOLSDIR)/include
++BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
++APIDIR := $(TOOLSINCDIR)/uapi
++GENDIR := $(abspath ../../include/generated)
++GENHDR := $(GENDIR)/autoconf.h
++
++ifeq ($(O),)
++OUTPUT_DIR := $(CURDIR)/build
++else
++OUTPUT_DIR := $(O)/build
++endif # O
++OBJ_DIR := $(OUTPUT_DIR)/obj
++INCLUDE_DIR := $(OUTPUT_DIR)/include
++BPFOBJ_DIR := $(OBJ_DIR)/libbpf
++SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
++BINDIR := $(OUTPUT_DIR)/bin
++BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
++ifneq ($(CROSS_COMPILE),)
++HOST_BUILD_DIR		:= $(OBJ_DIR)/host
++HOST_OUTPUT_DIR	:= host-tools
++HOST_INCLUDE_DIR	:= $(HOST_OUTPUT_DIR)/include
++else
++HOST_BUILD_DIR		:= $(OBJ_DIR)
++HOST_OUTPUT_DIR	:= $(OUTPUT_DIR)
++HOST_INCLUDE_DIR	:= $(INCLUDE_DIR)
++endif
++HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
++RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
++DEFAULT_BPFTOOL := $(HOST_OUTPUT_DIR)/sbin/bpftool
++
++VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)					\
++		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)		\
++		     ../../vmlinux						\
++		     /sys/kernel/btf/vmlinux					\
++		     /boot/vmlinux-$(shell uname -r)
++VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
++ifeq ($(VMLINUX_BTF),)
++$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
++endif
++
++BPFTOOL ?= $(DEFAULT_BPFTOOL)
++
++ifneq ($(wildcard $(GENHDR)),)
++  GENFLAGS := -DHAVE_GENHDR
++endif
++
++CFLAGS += -g -O2 -rdynamic -pthread -Wall -Werror $(GENFLAGS)			\
++	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)				\
++	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(CURDIR)/include
++
++# Silence some warnings when compiled with clang
++ifneq ($(LLVM),)
++CFLAGS += -Wno-unused-command-line-argument
++endif
++
++LDFLAGS = -lelf -lz -lpthread
++
++IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null |				\
++			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
++
++# Get Clang's default includes on this system, as opposed to those seen by
++# '-target bpf'. This fixes "missing" files on some architectures/distros,
++# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
++#
++# Use '-idirafter': Don't interfere with include mechanics except where the
++# build would have failed anyways.
++define get_sys_includes
++$(shell $(1) -v -E - </dev/null 2>&1 \
++	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
++$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
++endef
++
++BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH)					\
++	     $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)		\
++	     -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat			\
++	     -I$(INCLUDE_DIR) -I$(APIDIR)					\
++	     -I../../include							\
++	     $(call get_sys_includes,$(CLANG))					\
++	     -Wall -Wno-compare-distinct-pointer-types				\
++	     -O2 -mcpu=v3
++
++# sort removes libbpf duplicates when not cross-building
++MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf			\
++	       $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids	\
++	       $(INCLUDE_DIR) $(SCXOBJ_DIR) $(BINDIR))
++
++$(MAKE_DIRS):
++	$(call msg,MKDIR,,$@)
++	$(Q)mkdir -p $@
++
++$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)			\
++	   $(APIDIR)/linux/bpf.h						\
++	   | $(OBJ_DIR)/libbpf
++	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/	\
++		    EXTRA_CFLAGS='-g -O0 -fPIC'					\
++		    DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
++
++$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
++		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
++	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)				\
++		    ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD)		\
++		    EXTRA_CFLAGS='-g -O0'					\
++		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/				\
++		    LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/			\
++		    LIBBPF_DESTDIR=$(HOST_OUTPUT_DIR)/				\
++		    prefix= DESTDIR=$(HOST_OUTPUT_DIR)/ install-bin
++
++$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
++ifeq ($(VMLINUX_H),)
++	$(call msg,GEN,,$@)
++	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
++else
++	$(call msg,CP,,$@)
++	$(Q)cp "$(VMLINUX_H)" $@
++endif
++
++$(SCXOBJ_DIR)/%.bpf.o: %.bpf.c $(INCLUDE_DIR)/vmlinux.h include/scx/*.h		\
++		       | $(BPFOBJ) $(SCXOBJ_DIR)
++	$(call msg,CLNG-BPF,,$(notdir $@))
++	$(Q)$(CLANG) $(BPF_CFLAGS) -target bpf -c $< -o $@
++
++$(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BPFTOOL)
++	$(eval sched=$(notdir $@))
++	$(call msg,GEN-SKEL,,$(sched))
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked2.o) $(<:.o=.linked1.o)
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked3.o) $(<:.o=.linked2.o)
++	$(Q)diff $(<:.o=.linked2.o) $(<:.o=.linked3.o)
++	$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $@
++	$(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $(@:.skel.h=.subskel.h)
++
++SCX_COMMON_DEPS := include/scx/common.h include/scx/user_exit_info.h | $(BINDIR)
++
++c-sched-targets = scx_simple scx_qmap scx_central scx_flatcg
++
++$(addprefix $(BINDIR)/,$(c-sched-targets)): \
++	$(BINDIR)/%: \
++		$(filter-out %.bpf.c,%.c) \
++		$(INCLUDE_DIR)/%.bpf.skel.h \
++		$(SCX_COMMON_DEPS)
++	$(eval sched=$(notdir $@))
++	$(CC) $(CFLAGS) -c $(sched).c -o $(SCXOBJ_DIR)/$(sched).o
++	$(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(HOST_BPFOBJ) $(LDFLAGS)
++
++$(c-sched-targets): %: $(BINDIR)/%
++
++install: all
++	$(Q)mkdir -p $(DESTDIR)/usr/local/bin/
++	$(Q)cp $(BINDIR)/* $(DESTDIR)/usr/local/bin/
++
++clean:
++	rm -rf $(OUTPUT_DIR) $(HOST_OUTPUT_DIR)
++	rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h
++	rm -f $(c-sched-targets)
++
++help:
++	@echo   'Building targets'
++	@echo   '================'
++	@echo   ''
++	@echo   '  all		  - Compile all schedulers'
++	@echo   ''
++	@echo   'Alternatively, you may compile individual schedulers:'
++	@echo   ''
++	@printf '  %s\n' $(c-sched-targets)
++	@echo   ''
++	@echo   'For any scheduler build target, you may specify an alternative'
++	@echo   'build output path with the O= environment variable. For example:'
++	@echo   ''
++	@echo   '   O=/tmp/sched_ext make all'
++	@echo   ''
++	@echo   'will compile all schedulers, and emit the build artifacts to'
++	@echo   '/tmp/sched_ext/build.'
++	@echo   ''
++	@echo   ''
++	@echo   'Installing targets'
++	@echo   '=================='
++	@echo   ''
++	@echo   '  install	  - Compile and install all schedulers to /usr/bin.'
++	@echo   '		    You may specify the DESTDIR= environment variable'
++	@echo   '		    to indicate a prefix for /usr/bin. For example:'
++	@echo   ''
++	@echo   '                     DESTDIR=/tmp/sched_ext make install'
++	@echo   ''
++	@echo   '		    will build the schedulers in CWD/build, and'
++	@echo   '		    install the schedulers to /tmp/sched_ext/usr/bin.'
++	@echo   ''
++	@echo   ''
++	@echo   'Cleaning targets'
++	@echo   '================'
++	@echo   ''
++	@echo   '  clean		  - Remove all generated files'
++
++all_targets: $(c-sched-targets)
++
++.PHONY: all all_targets $(c-sched-targets) clean help
++
++# delete failed targets
++.DELETE_ON_ERROR:
++
++# keep intermediate (.bpf.skel.h, .bpf.o, etc) targets
++.SECONDARY:
+diff --git a/tools/sched_ext/README.md b/tools/sched_ext/README.md
+new file mode 100644
+index 000000000000..16a42e4060f6
+--- /dev/null
++++ b/tools/sched_ext/README.md
+@@ -0,0 +1,270 @@
++SCHED_EXT EXAMPLE SCHEDULERS
++============================
++
++# Introduction
++
++This directory contains a number of example sched_ext schedulers. These
++schedulers are meant to provide examples of different types of schedulers
++that can be built using sched_ext, and illustrate how various features of
++sched_ext can be used.
++
++Some of the examples are performant, production-ready schedulers. That is, for
++the correct workload and with the correct tuning, they may be deployed in a
++production environment with acceptable or possibly even improved performance.
++Others are just examples that in practice, would not provide acceptable
++performance (though they could be improved to get there).
++
++This README will describe these example schedulers, including describing the
++types of workloads or scenarios they're designed to accommodate, and whether or
++not they're production ready. For more details on any of these schedulers,
++please see the header comment in their .bpf.c file.
++
++
++# Compiling the examples
++
++There are a few toolchain dependencies for compiling the example schedulers.
++
++## Toolchain dependencies
++
++1. clang >= 16.0.0
++
++The schedulers are BPF programs, and therefore must be compiled with clang. gcc
++is actively working on adding a BPF backend compiler as well, but are still
++missing some features such as BTF type tags which are necessary for using
++kptrs.
++
++2. pahole >= 1.25
++
++You may need pahole in order to generate BTF from DWARF.
++
++3. rust >= 1.70.0
++
++Rust schedulers uses features present in the rust toolchain >= 1.70.0. You
++should be able to use the stable build from rustup, but if that doesn't
++work, try using the rustup nightly build.
++
++There are other requirements as well, such as make, but these are the main /
++non-trivial ones.
++
++## Compiling the kernel
++
++In order to run a sched_ext scheduler, you'll have to run a kernel compiled
++with the patches in this repository, and with a minimum set of necessary
++Kconfig options:
++
++```
++CONFIG_BPF=y
++CONFIG_SCHED_CLASS_EXT=y
++CONFIG_BPF_SYSCALL=y
++CONFIG_BPF_JIT=y
++CONFIG_DEBUG_INFO_BTF=y
++```
++
++It's also recommended that you also include the following Kconfig options:
++
++```
++CONFIG_BPF_JIT_ALWAYS_ON=y
++CONFIG_BPF_JIT_DEFAULT_ON=y
++CONFIG_PAHOLE_HAS_SPLIT_BTF=y
++CONFIG_PAHOLE_HAS_BTF_TAG=y
++```
++
++There is a `Kconfig` file in this directory whose contents you can append to
++your local `.config` file, as long as there are no conflicts with any existing
++options in the file.
++
++## Getting a vmlinux.h file
++
++You may notice that most of the example schedulers include a "vmlinux.h" file.
++This is a large, auto-generated header file that contains all of the types
++defined in some vmlinux binary that was compiled with
++[BTF](https://docs.kernel.org/bpf/btf.html) (i.e. with the BTF-related Kconfig
++options specified above).
++
++The header file is created using `bpftool`, by passing it a vmlinux binary
++compiled with BTF as follows:
++
++```bash
++$ bpftool btf dump file /path/to/vmlinux format c > vmlinux.h
++```
++
++`bpftool` analyzes all of the BTF encodings in the binary, and produces a
++header file that can be included by BPF programs to access those types.  For
++example, using vmlinux.h allows a scheduler to access fields defined directly
++in vmlinux as follows:
++
++```c
++#include "vmlinux.h"
++// vmlinux.h is also implicitly included by scx_common.bpf.h.
++#include "scx_common.bpf.h"
++
++/*
++ * vmlinux.h provides definitions for struct task_struct and
++ * struct scx_enable_args.
++ */
++void BPF_STRUCT_OPS(example_enable, struct task_struct *p,
++		    struct scx_enable_args *args)
++{
++	bpf_printk("Task %s enabled in example scheduler", p->comm);
++}
++
++// vmlinux.h provides the definition for struct sched_ext_ops.
++SEC(".struct_ops.link")
++struct sched_ext_ops example_ops {
++	.enable	= (void *)example_enable,
++	.name	= "example",
++}
++```
++
++The scheduler build system will generate this vmlinux.h file as part of the
++scheduler build pipeline. It looks for a vmlinux file in the following
++dependency order:
++
++1. If the O= environment variable is defined, at `$O/vmlinux`
++2. If the KBUILD_OUTPUT= environment variable is defined, at
++   `$KBUILD_OUTPUT/vmlinux`
++3. At `../../vmlinux` (i.e. at the root of the kernel tree where you're
++   compiling the schedulers)
++3. `/sys/kernel/btf/vmlinux`
++4. `/boot/vmlinux-$(uname -r)`
++
++In other words, if you have compiled a kernel in your local repo, its vmlinux
++file will be used to generate vmlinux.h. Otherwise, it will be the vmlinux of
++the kernel you're currently running on. This means that if you're running on a
++kernel with sched_ext support, you may not need to compile a local kernel at
++all.
++
++### Aside on CO-RE
++
++One of the cooler features of BPF is that it supports
++[CO-RE](https://nakryiko.com/posts/bpf-core-reference-guide/) (Compile Once Run
++Everywhere). This feature allows you to reference fields inside of structs with
++types defined internal to the kernel, and not have to recompile if you load the
++BPF program on a different kernel with the field at a different offset. In our
++example above, we print out a task name with `p->comm`. CO-RE would perform
++relocations for that access when the program is loaded to ensure that it's
++referencing the correct offset for the currently running kernel.
++
++## Compiling the schedulers
++
++Once you have your toolchain setup, and a vmlinux that can be used to generate
++a full vmlinux.h file, you can compile the schedulers using `make`:
++
++```bash
++$ make -j($nproc)
++```
++
++# Example schedulers
++
++This directory contains the following example schedulers. These schedulers are
++for testing and demonstrating different aspects of sched_ext. While some may be
++useful in limited scenarios, they are not intended to be practical.
++
++For more scheduler implementations, tools and documentation, visit
++https://github.com/sched-ext/scx.
++
++## scx_simple
++
++A simple scheduler that provides an example of a minimal sched_ext scheduler.
++scx_simple can be run in either global weighted vtime mode, or FIFO mode.
++
++Though very simple, in limited scenarios, this scheduler can perform reasonably
++well on single-socket systems with a unified L3 cache.
++
++## scx_qmap
++
++Another simple, yet slightly more complex scheduler that provides an example of
++a basic weighted FIFO queuing policy. It also provides examples of some common
++useful BPF features, such as sleepable per-task storage allocation in the
++`ops.prep_enable()` callback, and using the `BPF_MAP_TYPE_QUEUE` map type to
++enqueue tasks. It also illustrates how core-sched support could be implemented.
++
++## scx_central
++
++A "central" scheduler where scheduling decisions are made from a single CPU.
++This scheduler illustrates how scheduling decisions can be dispatched from a
++single CPU, allowing other cores to run with infinite slices, without timer
++ticks, and without having to incur the overhead of making scheduling decisions.
++
++The approach demonstrated by this scheduler may be useful for any workload that
++benefits from minimizing scheduling overhead and timer ticks. An example of
++where this could be particularly useful is running VMs, where running with
++infinite slices and no timer ticks allows the VM to avoid unnecessary expensive
++vmexits.
++
++## scx_flatcg
++
++A flattened cgroup hierarchy scheduler. This scheduler implements hierarchical
++weight-based cgroup CPU control by flattening the cgroup hierarchy into a single
++layer, by compounding the active weight share at each level. The effect of this
++is a much more performant CPU controller, which does not need to descend down
++cgroup trees in order to properly compute a cgroup's share.
++
++Similar to scx_simple, in limited scenarios, this scheduler can perform
++reasonably well on single socket-socket systems with a unified L3 cache and show
++significantly lowered hierarchical scheduling overhead.
++
++
++# Troubleshooting
++
++There are a number of common issues that you may run into when building the
++schedulers. We'll go over some of the common ones here.
++
++## Build Failures
++
++### Old version of clang
++
++```
++error: static assertion failed due to requirement 'SCX_DSQ_FLAG_BUILTIN': bpftool generated vmlinux.h is missing high bits for 64bit enums, upgrade clang and pahole
++        _Static_assert(SCX_DSQ_FLAG_BUILTIN,
++                       ^~~~~~~~~~~~~~~~~~~~
++1 error generated.
++```
++
++This means you built the kernel or the schedulers with an older version of
++clang than what's supported (i.e. older than 16.0.0). To remediate this:
++
++1. `which clang` to make sure you're using a sufficiently new version of clang.
++
++2. `make fullclean` in the root path of the repository, and rebuild the kernel
++   and schedulers.
++
++3. Rebuild the kernel, and then your example schedulers.
++
++The schedulers are also cleaned if you invoke `make mrproper` in the root
++directory of the tree.
++
++### Stale kernel build / incomplete vmlinux.h file
++
++As described above, you'll need a `vmlinux.h` file that was generated from a
++vmlinux built with BTF, and with sched_ext support enabled. If you don't,
++you'll see errors such as the following which indicate that a type being
++referenced in a scheduler is unknown:
++
++```
++/path/to/sched_ext/tools/sched_ext/user_exit_info.h:25:23: note: forward declaration of 'struct scx_exit_info'
++
++const struct scx_exit_info *ei)
++
++^
++```
++
++In order to resolve this, please follow the steps above in
++[Getting a vmlinux.h file](#getting-a-vmlinuxh-file) in order to ensure your
++schedulers are using a vmlinux.h file that includes the requisite types.
++
++## Misc
++
++### llvm: [OFF]
++
++You may see the following output when building the schedulers:
++
++```
++Auto-detecting system features:
++...                         clang-bpf-co-re: [ on  ]
++...                                    llvm: [ OFF ]
++...                                  libcap: [ on  ]
++...                                  libbfd: [ on  ]
++```
++
++Seeing `llvm: [ OFF ]` here is not an issue. You can safely ignore.
+diff --git a/tools/sched_ext/include/bpf-compat/gnu/stubs.h b/tools/sched_ext/include/bpf-compat/gnu/stubs.h
+new file mode 100644
+index 000000000000..ad7d139ce907
+--- /dev/null
++++ b/tools/sched_ext/include/bpf-compat/gnu/stubs.h
+@@ -0,0 +1,11 @@
++/*
++ * Dummy gnu/stubs.h. clang can end up including /usr/include/gnu/stubs.h when
++ * compiling BPF files although its content doesn't play any role. The file in
++ * turn includes stubs-64.h or stubs-32.h depending on whether __x86_64__ is
++ * defined. When compiling a BPF source, __x86_64__ isn't set and thus
++ * stubs-32.h is selected. However, the file is not there if the system doesn't
++ * have 32bit glibc devel package installed leading to a build failure.
++ *
++ * The problem is worked around by making this file available in the include
++ * search paths before the system one when building BPF.
++ */
+diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
+new file mode 100644
+index 000000000000..225f61f9bfca
+--- /dev/null
++++ b/tools/sched_ext/include/scx/common.bpf.h
+@@ -0,0 +1,427 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#ifndef __SCX_COMMON_BPF_H
++#define __SCX_COMMON_BPF_H
++
++#ifdef LSP
++#define __bpf__
++#include "../vmlinux/vmlinux.h"
++#else
++#include "vmlinux.h"
++#endif
++
++#include <bpf/bpf_helpers.h>
++#include <bpf/bpf_tracing.h>
++#include <asm-generic/errno.h>
++#include "user_exit_info.h"
++
++#define PF_WQ_WORKER			0x00000020	/* I'm a workqueue worker */
++#define PF_KTHREAD			0x00200000	/* I am a kernel thread */
++#define PF_EXITING			0x00000004
++#define CLOCK_MONOTONIC			1
++
++/*
++ * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can
++ * lead to really confusing misbehaviors. Let's trigger a build failure.
++ */
++static inline void ___vmlinux_h_sanity_check___(void)
++{
++	_Static_assert(SCX_DSQ_FLAG_BUILTIN,
++		       "bpftool generated vmlinux.h is missing high bits for 64bit enums, upgrade clang and pahole");
++}
++
++s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
++s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
++void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;
++void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym;
++u32 scx_bpf_dispatch_nr_slots(void) __ksym;
++void scx_bpf_dispatch_cancel(void) __ksym;
++bool scx_bpf_consume(u64 dsq_id) __ksym;
++void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
++void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
++bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
++bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
++u32 scx_bpf_reenqueue_local(void) __ksym;
++void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
++s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
++void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
++int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, u64 flags) __ksym __weak;
++struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
++void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
++void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak;
++void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
++void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym __weak;
++u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
++u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
++void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
++u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
++const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
++const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
++void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
++const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
++const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
++void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
++bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
++s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
++s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
++bool scx_bpf_task_running(const struct task_struct *p) __ksym;
++s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
++struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
++struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
++
++/*
++ * Use the following as @it__iter when calling
++ * scx_bpf_dispatch[_vtime]_from_dsq() from within bpf_for_each() loops.
++ */
++#define BPF_FOR_EACH_ITER	(&___it)
++
++static inline __attribute__((format(printf, 1, 2)))
++void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
++
++/*
++ * Helper macro for initializing the fmt and variadic argument inputs to both
++ * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
++ * refer to the initialized list of inputs to the bstr kfunc.
++ */
++#define scx_bpf_bstr_preamble(fmt, args...)					\
++	static char ___fmt[] = fmt;						\
++	/*									\
++	 * Note that __param[] must have at least one				\
++	 * element to keep the verifier happy.					\
++	 */									\
++	unsigned long long ___param[___bpf_narg(args) ?: 1] = {};		\
++										\
++	_Pragma("GCC diagnostic push")						\
++	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")			\
++	___bpf_fill(___param, args);						\
++	_Pragma("GCC diagnostic pop")						\
++
++/*
++ * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
++ * instead of an array of u64. Using this macro will cause the scheduler to
++ * exit cleanly with the specified exit code being passed to user space.
++ */
++#define scx_bpf_exit(code, fmt, args...)					\
++({										\
++	scx_bpf_bstr_preamble(fmt, args)					\
++	scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param));		\
++	___scx_bpf_bstr_format_checker(fmt, ##args);				\
++})
++
++/*
++ * scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
++ * instead of an array of u64. Invoking this macro will cause the scheduler to
++ * exit in an erroneous state, with diagnostic information being passed to the
++ * user.
++ */
++#define scx_bpf_error(fmt, args...)						\
++({										\
++	scx_bpf_bstr_preamble(fmt, args)					\
++	scx_bpf_error_bstr(___fmt, ___param, sizeof(___param));			\
++	___scx_bpf_bstr_format_checker(fmt, ##args);				\
++})
++
++/*
++ * scx_bpf_dump() wraps the scx_bpf_dump_bstr() kfunc with variadic arguments
++ * instead of an array of u64. To be used from ops.dump() and friends.
++ */
++#define scx_bpf_dump(fmt, args...)						\
++({										\
++	scx_bpf_bstr_preamble(fmt, args)					\
++	scx_bpf_dump_bstr(___fmt, ___param, sizeof(___param));			\
++	___scx_bpf_bstr_format_checker(fmt, ##args);				\
++})
++
++#define BPF_STRUCT_OPS(name, args...)						\
++SEC("struct_ops/"#name)								\
++BPF_PROG(name, ##args)
++
++#define BPF_STRUCT_OPS_SLEEPABLE(name, args...)					\
++SEC("struct_ops.s/"#name)							\
++BPF_PROG(name, ##args)
++
++/**
++ * RESIZABLE_ARRAY - Generates annotations for an array that may be resized
++ * @elfsec: the data section of the BPF program in which to place the array
++ * @arr: the name of the array
++ *
++ * libbpf has an API for setting map value sizes. Since data sections (i.e.
++ * bss, data, rodata) themselves are maps, a data section can be resized. If
++ * a data section has an array as its last element, the BTF info for that
++ * array will be adjusted so that length of the array is extended to meet the
++ * new length of the data section. This macro annotates an array to have an
++ * element count of one with the assumption that this array can be resized
++ * within the userspace program. It also annotates the section specifier so
++ * this array exists in a custom sub data section which can be resized
++ * independently.
++ *
++ * See RESIZE_ARRAY() for the userspace convenience macro for resizing an
++ * array declared with RESIZABLE_ARRAY().
++ */
++#define RESIZABLE_ARRAY(elfsec, arr) arr[1] SEC("."#elfsec"."#arr)
++
++/**
++ * MEMBER_VPTR - Obtain the verified pointer to a struct or array member
++ * @base: struct or array to index
++ * @member: dereferenced member (e.g. .field, [idx0][idx1], .field[idx0] ...)
++ *
++ * The verifier often gets confused by the instruction sequence the compiler
++ * generates for indexing struct fields or arrays. This macro forces the
++ * compiler to generate a code sequence which first calculates the byte offset,
++ * checks it against the struct or array size and add that byte offset to
++ * generate the pointer to the member to help the verifier.
++ *
++ * Ideally, we want to abort if the calculated offset is out-of-bounds. However,
++ * BPF currently doesn't support abort, so evaluate to %NULL instead. The caller
++ * must check for %NULL and take appropriate action to appease the verifier. To
++ * avoid confusing the verifier, it's best to check for %NULL and dereference
++ * immediately.
++ *
++ *	vptr = MEMBER_VPTR(my_array, [i][j]);
++ *	if (!vptr)
++ *		return error;
++ *	*vptr = new_value;
++ *
++ * sizeof(@base) should encompass the memory area to be accessed and thus can't
++ * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
++ * `MEMBER_VPTR(ptr, ->member)`.
++ */
++#define MEMBER_VPTR(base, member) (typeof((base) member) *)			\
++({										\
++	u64 __base = (u64)&(base);						\
++	u64 __addr = (u64)&((base) member) - __base;				\
++	_Static_assert(sizeof(base) >= sizeof((base) member),			\
++		       "@base is smaller than @member, is @base a pointer?");	\
++	asm volatile (								\
++		"if %0 <= %[max] goto +2\n"					\
++		"%0 = 0\n"							\
++		"goto +1\n"							\
++		"%0 += %1\n"							\
++		: "+r"(__addr)							\
++		: "r"(__base),							\
++		  [max]"i"(sizeof(base) - sizeof((base) member)));		\
++	__addr;									\
++})
++
++/**
++ * ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
++ * @arr: array to index into
++ * @i: array index
++ * @n: number of elements in array
++ *
++ * Similar to MEMBER_VPTR() but is intended for use with arrays where the
++ * element count needs to be explicit.
++ * It can be used in cases where a global array is defined with an initial
++ * size but is intended to be be resized before loading the BPF program.
++ * Without this version of the macro, MEMBER_VPTR() will use the compile time
++ * size of the array to compute the max, which will result in rejection by
++ * the verifier.
++ */
++#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)				\
++({										\
++	u64 __base = (u64)arr;							\
++	u64 __addr = (u64)&(arr[i]) - __base;					\
++	asm volatile (								\
++		"if %0 <= %[max] goto +2\n"					\
++		"%0 = 0\n"							\
++		"goto +1\n"							\
++		"%0 += %1\n"							\
++		: "+r"(__addr)							\
++		: "r"(__base),							\
++		  [max]"r"(sizeof(arr[0]) * ((n) - 1)));			\
++	__addr;									\
++})
++
++
++/*
++ * BPF declarations and helpers
++ */
++
++/* list and rbtree */
++#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
++#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
++
++void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
++void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
++
++#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
++#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
++
++void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
++void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
++struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
++struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
++struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
++				      struct bpf_rb_node *node) __ksym;
++int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
++			bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
++			void *meta, __u64 off) __ksym;
++#define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0)
++
++struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
++
++void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
++#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
++
++/* task */
++struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
++struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
++void bpf_task_release(struct task_struct *p) __ksym;
++
++/* cgroup */
++struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
++void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
++struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
++
++/* css iteration */
++struct bpf_iter_css;
++struct cgroup_subsys_state;
++extern int bpf_iter_css_new(struct bpf_iter_css *it,
++			    struct cgroup_subsys_state *start,
++			    unsigned int flags) __weak __ksym;
++extern struct cgroup_subsys_state *
++bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
++extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
++
++/* cpumask */
++struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
++struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
++void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
++u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
++u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
++void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
++void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
++bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
++bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
++bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
++void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym;
++void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym;
++bool bpf_cpumask_and(struct bpf_cpumask *dst, const struct cpumask *src1,
++		     const struct cpumask *src2) __ksym;
++void bpf_cpumask_or(struct bpf_cpumask *dst, const struct cpumask *src1,
++		    const struct cpumask *src2) __ksym;
++void bpf_cpumask_xor(struct bpf_cpumask *dst, const struct cpumask *src1,
++		     const struct cpumask *src2) __ksym;
++bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym;
++bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym;
++bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym;
++bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
++bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
++void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
++u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) __ksym;
++u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
++				   const struct cpumask *src2) __ksym;
++u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
++
++/*
++ * Access a cpumask in read-only mode (typically to check bits).
++ */
++const struct cpumask *cast_mask(struct bpf_cpumask *mask)
++{
++	return (const struct cpumask *)mask;
++}
++
++/* rcu */
++void bpf_rcu_read_lock(void) __ksym;
++void bpf_rcu_read_unlock(void) __ksym;
++
++
++/*
++ * Other helpers
++ */
++
++/* useful compiler attributes */
++#define likely(x) __builtin_expect(!!(x), 1)
++#define unlikely(x) __builtin_expect(!!(x), 0)
++#define __maybe_unused __attribute__((__unused__))
++
++/*
++ * READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
++ * prevent compiler from caching, redoing or reordering reads or writes.
++ */
++typedef __u8  __attribute__((__may_alias__))  __u8_alias_t;
++typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
++typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
++typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
++
++static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
++{
++	switch (size) {
++	case 1: *(__u8_alias_t  *) res = *(volatile __u8_alias_t  *) p; break;
++	case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
++	case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
++	case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
++	default:
++		barrier();
++		__builtin_memcpy((void *)res, (const void *)p, size);
++		barrier();
++	}
++}
++
++static __always_inline void __write_once_size(volatile void *p, void *res, int size)
++{
++	switch (size) {
++	case 1: *(volatile  __u8_alias_t *) p = *(__u8_alias_t  *) res; break;
++	case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
++	case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
++	case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
++	default:
++		barrier();
++		__builtin_memcpy((void *)p, (const void *)res, size);
++		barrier();
++	}
++}
++
++#define READ_ONCE(x)					\
++({							\
++	union { typeof(x) __val; char __c[1]; } __u =	\
++		{ .__c = { 0 } };			\
++	__read_once_size(&(x), __u.__c, sizeof(x));	\
++	__u.__val;					\
++})
++
++#define WRITE_ONCE(x, val)				\
++({							\
++	union { typeof(x) __val; char __c[1]; } __u =	\
++		{ .__val = (val) }; 			\
++	__write_once_size(&(x), __u.__c, sizeof(x));	\
++	__u.__val;					\
++})
++
++/*
++ * log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
++ * @v: The value for which we're computing the base 2 logarithm.
++ */
++static inline u32 log2_u32(u32 v)
++{
++        u32 r;
++        u32 shift;
++
++        r = (v > 0xFFFF) << 4; v >>= r;
++        shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
++        shift = (v > 0xF) << 2; v >>= shift; r |= shift;
++        shift = (v > 0x3) << 1; v >>= shift; r |= shift;
++        r |= (v >> 1);
++        return r;
++}
++
++/*
++ * log2_u64 - Compute the base 2 logarithm of a 64-bit exponential value.
++ * @v: The value for which we're computing the base 2 logarithm.
++ */
++static inline u32 log2_u64(u64 v)
++{
++        u32 hi = v >> 32;
++        if (hi)
++                return log2_u32(hi) + 32 + 1;
++        else
++                return log2_u32(v) + 1;
++}
++
++#include "compat.bpf.h"
++
++#endif	/* __SCX_COMMON_BPF_H */
+diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h
+new file mode 100644
+index 000000000000..5b0f90152152
+--- /dev/null
++++ b/tools/sched_ext/include/scx/common.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ */
++#ifndef __SCHED_EXT_COMMON_H
++#define __SCHED_EXT_COMMON_H
++
++#ifdef __KERNEL__
++#error "Should not be included by BPF programs"
++#endif
++
++#include <stdarg.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <errno.h>
++
++typedef uint8_t u8;
++typedef uint16_t u16;
++typedef uint32_t u32;
++typedef uint64_t u64;
++typedef int8_t s8;
++typedef int16_t s16;
++typedef int32_t s32;
++typedef int64_t s64;
++
++#define SCX_BUG(__fmt, ...)							\
++	do {									\
++		fprintf(stderr, "[SCX_BUG] %s:%d", __FILE__, __LINE__);		\
++		if (errno)							\
++			fprintf(stderr, " (%s)\n", strerror(errno));		\
++		else								\
++			fprintf(stderr, "\n");					\
++		fprintf(stderr, __fmt __VA_OPT__(,) __VA_ARGS__);		\
++		fprintf(stderr, "\n");						\
++										\
++		exit(EXIT_FAILURE);						\
++	} while (0)
++
++#define SCX_BUG_ON(__cond, __fmt, ...)					\
++	do {								\
++		if (__cond)						\
++			SCX_BUG((__fmt) __VA_OPT__(,) __VA_ARGS__);	\
++	} while (0)
++
++/**
++ * RESIZE_ARRAY - Convenience macro for resizing a BPF array
++ * @__skel: the skeleton containing the array
++ * @elfsec: the data section of the BPF program in which the array exists
++ * @arr: the name of the array
++ * @n: the desired array element count
++ *
++ * For BPF arrays declared with RESIZABLE_ARRAY(), this macro performs two
++ * operations. It resizes the map which corresponds to the custom data
++ * section that contains the target array. As a side effect, the BTF info for
++ * the array is adjusted so that the array length is sized to cover the new
++ * data section size. The second operation is reassigning the skeleton pointer
++ * for that custom data section so that it points to the newly memory mapped
++ * region.
++ */
++#define RESIZE_ARRAY(__skel, elfsec, arr, n)						\
++	do {										\
++		size_t __sz;								\
++		bpf_map__set_value_size((__skel)->maps.elfsec##_##arr,			\
++				sizeof((__skel)->elfsec##_##arr->arr[0]) * (n));	\
++		(__skel)->elfsec##_##arr =						\
++			bpf_map__initial_value((__skel)->maps.elfsec##_##arr, &__sz);	\
++	} while (0)
++
++#include "user_exit_info.h"
++#include "compat.h"
++
++#endif	/* __SCHED_EXT_COMMON_H */
+diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
+new file mode 100644
+index 000000000000..e5afe9efd3f3
+--- /dev/null
++++ b/tools/sched_ext/include/scx/compat.bpf.h
+@@ -0,0 +1,47 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#ifndef __SCX_COMPAT_BPF_H
++#define __SCX_COMPAT_BPF_H
++
++#define __COMPAT_ENUM_OR_ZERO(__type, __ent)					\
++({										\
++	__type __ret = 0;							\
++	if (bpf_core_enum_value_exists(__type, __ent))				\
++		__ret = __ent;							\
++	__ret;									\
++})
++
++/* v6.12: 819513666966 ("sched_ext: Add cgroup support") */
++#define __COMPAT_scx_bpf_task_cgroup(p)						\
++	(bpf_ksym_exists(scx_bpf_task_cgroup) ?					\
++	 scx_bpf_task_cgroup((p)) : NULL)
++
++/* v6.12: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") */
++#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it, slice)			\
++	(bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice) ?			\
++	 scx_bpf_dispatch_from_dsq_set_slice((it), (slice)) : (void)0)
++#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it, vtime)			\
++	(bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime) ?			\
++	 scx_bpf_dispatch_from_dsq_set_vtime((it), (vtime)) : (void)0)
++#define __COMPAT_scx_bpf_dispatch_from_dsq(it, p, dsq_id, enq_flags)		\
++	(bpf_ksym_exists(scx_bpf_dispatch_from_dsq) ?				\
++	 scx_bpf_dispatch_from_dsq((it), (p), (dsq_id), (enq_flags)) : false)
++#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it, p, dsq_id, enq_flags)	\
++	(bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq) ?			\
++	 scx_bpf_dispatch_vtime_from_dsq((it), (p), (dsq_id), (enq_flags)) : false)
++
++/*
++ * Define sched_ext_ops. This may be expanded to define multiple variants for
++ * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
++ */
++#define SCX_OPS_DEFINE(__name, ...)						\
++	SEC(".struct_ops.link")							\
++	struct sched_ext_ops __name = {						\
++		__VA_ARGS__,							\
++	};
++
++#endif	/* __SCX_COMPAT_BPF_H */
+diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
+new file mode 100644
+index 000000000000..cc56ff9aa252
+--- /dev/null
++++ b/tools/sched_ext/include/scx/compat.h
+@@ -0,0 +1,186 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#ifndef __SCX_COMPAT_H
++#define __SCX_COMPAT_H
++
++#include <bpf/btf.h>
++#include <fcntl.h>
++#include <stdlib.h>
++#include <unistd.h>
++
++struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
++
++static inline void __COMPAT_load_vmlinux_btf(void)
++{
++	if (!__COMPAT_vmlinux_btf) {
++		__COMPAT_vmlinux_btf = btf__load_vmlinux_btf();
++		SCX_BUG_ON(!__COMPAT_vmlinux_btf, "btf__load_vmlinux_btf()");
++	}
++}
++
++static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v)
++{
++	const struct btf_type *t;
++	const char *n;
++	s32 tid;
++	int i;
++
++	__COMPAT_load_vmlinux_btf();
++
++	tid = btf__find_by_name(__COMPAT_vmlinux_btf, type);
++	if (tid < 0)
++		return false;
++
++	t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
++	SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
++
++	if (btf_is_enum(t)) {
++		struct btf_enum *e = btf_enum(t);
++
++		for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
++			n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
++			SCX_BUG_ON(!n, "btf__name_by_offset()");
++			if (!strcmp(n, name)) {
++				*v = e[i].val;
++				return true;
++			}
++		}
++	} else if (btf_is_enum64(t)) {
++		struct btf_enum64 *e = btf_enum64(t);
++
++		for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
++			n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
++			SCX_BUG_ON(!n, "btf__name_by_offset()");
++			if (!strcmp(n, name)) {
++				*v = btf_enum64_value(&e[i]);
++				return true;
++			}
++		}
++	}
++
++	return false;
++}
++
++#define __COMPAT_ENUM_OR_ZERO(__type, __ent)					\
++({										\
++	u64 __val = 0;								\
++	__COMPAT_read_enum(__type, __ent, &__val);				\
++	__val;									\
++})
++
++static inline bool __COMPAT_has_ksym(const char *ksym)
++{
++	__COMPAT_load_vmlinux_btf();
++	return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0;
++}
++
++static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
++{
++	const struct btf_type *t;
++	const struct btf_member *m;
++	const char *n;
++	s32 tid;
++	int i;
++
++	__COMPAT_load_vmlinux_btf();
++	tid = btf__find_by_name_kind(__COMPAT_vmlinux_btf, type, BTF_KIND_STRUCT);
++	if (tid < 0)
++		return false;
++
++	t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
++	SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
++
++	m = btf_members(t);
++
++	for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
++		n = btf__name_by_offset(__COMPAT_vmlinux_btf, m[i].name_off);
++		SCX_BUG_ON(!n, "btf__name_by_offset()");
++			if (!strcmp(n, field))
++				return true;
++	}
++
++	return false;
++}
++
++#define SCX_OPS_SWITCH_PARTIAL							\
++	__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
++
++static inline long scx_hotplug_seq(void)
++{
++	int fd;
++	char buf[32];
++	ssize_t len;
++	long val;
++
++	fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY);
++	if (fd < 0)
++		return -ENOENT;
++
++	len = read(fd, buf, sizeof(buf) - 1);
++	SCX_BUG_ON(len <= 0, "read failed (%ld)", len);
++	buf[len] = 0;
++	close(fd);
++
++	val = strtoul(buf, NULL, 10);
++	SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);
++
++	return val;
++}
++
++/*
++ * struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
++ * is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
++ * and attach it, backward compatibility is automatically maintained where
++ * reasonable.
++ *
++ * ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext is
++ * the current minimum required kernel version.
++ */
++#define SCX_OPS_OPEN(__ops_name, __scx_name) ({					\
++	struct __scx_name *__skel;						\
++										\
++	SCX_BUG_ON(!__COMPAT_struct_has_field("sched_ext_ops", "dump"),		\
++		   "sched_ext_ops.dump() missing, kernel too old?");		\
++										\
++	__skel = __scx_name##__open();						\
++	SCX_BUG_ON(!__skel, "Could not open " #__scx_name);			\
++	__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq();		\
++	__skel; 								\
++})
++
++#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({		\
++	UEI_SET_SIZE(__skel, __ops_name, __uei_name);				\
++	SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel");	\
++})
++
++/*
++ * New versions of bpftool now emit additional link placeholders for BPF maps,
++ * and set up BPF skeleton in such a way that libbpf will auto-attach BPF maps
++ * automatically, assumming libbpf is recent enough (v1.5+). Old libbpf will do
++ * nothing with those links and won't attempt to auto-attach maps.
++ *
++ * To maintain compatibility with older libbpf while avoiding trying to attach
++ * twice, disable the autoattach feature on newer libbpf.
++ */
++#if LIBBPF_MAJOR_VERSION > 1 ||							\
++	(LIBBPF_MAJOR_VERSION == 1 && LIBBPF_MINOR_VERSION >= 5)
++#define __SCX_OPS_DISABLE_AUTOATTACH(__skel, __ops_name)			\
++	bpf_map__set_autoattach((__skel)->maps.__ops_name, false)
++#else
++#define __SCX_OPS_DISABLE_AUTOATTACH(__skel, __ops_name) do {} while (0)
++#endif
++
++#define SCX_OPS_ATTACH(__skel, __ops_name, __scx_name) ({			\
++	struct bpf_link *__link;						\
++	__SCX_OPS_DISABLE_AUTOATTACH(__skel, __ops_name);			\
++	SCX_BUG_ON(__scx_name##__attach((__skel)), "Failed to attach skel");	\
++	__link = bpf_map__attach_struct_ops((__skel)->maps.__ops_name);		\
++	SCX_BUG_ON(!__link, "Failed to attach struct_ops");			\
++	__link;									\
++})
++
++#endif	/* __SCX_COMPAT_H */
+diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h
+new file mode 100644
+index 000000000000..8ce2734402e1
+--- /dev/null
++++ b/tools/sched_ext/include/scx/user_exit_info.h
+@@ -0,0 +1,115 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Define struct user_exit_info which is shared between BPF and userspace parts
++ * to communicate exit status and other information.
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#ifndef __USER_EXIT_INFO_H
++#define __USER_EXIT_INFO_H
++
++enum uei_sizes {
++	UEI_REASON_LEN		= 128,
++	UEI_MSG_LEN		= 1024,
++	UEI_DUMP_DFL_LEN	= 32768,
++};
++
++struct user_exit_info {
++	int		kind;
++	s64		exit_code;
++	char		reason[UEI_REASON_LEN];
++	char		msg[UEI_MSG_LEN];
++};
++
++#ifdef __bpf__
++
++#ifdef LSP
++#include "../vmlinux/vmlinux.h"
++#else
++#include "vmlinux.h"
++#endif
++#include <bpf/bpf_core_read.h>
++
++#define UEI_DEFINE(__name)							\
++	char RESIZABLE_ARRAY(data, __name##_dump);				\
++	const volatile u32 __name##_dump_len;					\
++	struct user_exit_info __name SEC(".data")
++
++#define UEI_RECORD(__uei_name, __ei) ({						\
++	bpf_probe_read_kernel_str(__uei_name.reason,				\
++				  sizeof(__uei_name.reason), (__ei)->reason);	\
++	bpf_probe_read_kernel_str(__uei_name.msg,				\
++				  sizeof(__uei_name.msg), (__ei)->msg);		\
++	bpf_probe_read_kernel_str(__uei_name##_dump,				\
++				  __uei_name##_dump_len, (__ei)->dump);		\
++	if (bpf_core_field_exists((__ei)->exit_code))				\
++		__uei_name.exit_code = (__ei)->exit_code;			\
++	/* use __sync to force memory barrier */				\
++	__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind,		\
++				    (__ei)->kind);				\
++})
++
++#else	/* !__bpf__ */
++
++#include <stdio.h>
++#include <stdbool.h>
++
++/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
++#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({					\
++	u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN;	\
++	(__skel)->rodata->__uei_name##_dump_len = __len;				\
++	RESIZE_ARRAY((__skel), data, __uei_name##_dump, __len);				\
++})
++
++#define UEI_EXITED(__skel, __uei_name) ({					\
++	/* use __sync to force memory barrier */				\
++	__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1);	\
++})
++
++#define UEI_REPORT(__skel, __uei_name) ({					\
++	struct user_exit_info *__uei = &(__skel)->data->__uei_name;		\
++	char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
++	if (__uei_dump[0] != '\0') {						\
++		fputs("\nDEBUG DUMP\n", stderr);				\
++		fputs("================================================================================\n\n", stderr); \
++		fputs(__uei_dump, stderr);					\
++		fputs("\n================================================================================\n\n", stderr); \
++	}									\
++	fprintf(stderr, "EXIT: %s", __uei->reason);				\
++	if (__uei->msg[0] != '\0')						\
++		fprintf(stderr, " (%s)", __uei->msg);				\
++	fputs("\n", stderr);							\
++	__uei->exit_code;							\
++})
++
++/*
++ * We can't import vmlinux.h while compiling user C code. Let's duplicate
++ * scx_exit_code definition.
++ */
++enum scx_exit_code {
++	/* Reasons */
++	SCX_ECODE_RSN_HOTPLUG		= 1LLU << 32,
++
++	/* Actions */
++	SCX_ECODE_ACT_RESTART		= 1LLU << 48,
++};
++
++enum uei_ecode_mask {
++	UEI_ECODE_USER_MASK		= ((1LLU << 32) - 1),
++	UEI_ECODE_SYS_RSN_MASK		= ((1LLU << 16) - 1) << 32,
++	UEI_ECODE_SYS_ACT_MASK		= ((1LLU << 16) - 1) << 48,
++};
++
++/*
++ * These macro interpret the ecode returned from UEI_REPORT().
++ */
++#define UEI_ECODE_USER(__ecode)		((__ecode) & UEI_ECODE_USER_MASK)
++#define UEI_ECODE_SYS_RSN(__ecode)	((__ecode) & UEI_ECODE_SYS_RSN_MASK)
++#define UEI_ECODE_SYS_ACT(__ecode)	((__ecode) & UEI_ECODE_SYS_ACT_MASK)
++
++#define UEI_ECODE_RESTART(__ecode)	(UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
++
++#endif	/* __bpf__ */
++#endif	/* __USER_EXIT_INFO_H */
+diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c
+new file mode 100644
+index 000000000000..8dd8eb73b6b8
+--- /dev/null
++++ b/tools/sched_ext/scx_central.bpf.c
+@@ -0,0 +1,361 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A central FIFO sched_ext scheduler which demonstrates the followings:
++ *
++ * a. Making all scheduling decisions from one CPU:
++ *
++ *    The central CPU is the only one making scheduling decisions. All other
++ *    CPUs kick the central CPU when they run out of tasks to run.
++ *
++ *    There is one global BPF queue and the central CPU schedules all CPUs by
++ *    dispatching from the global queue to each CPU's local dsq from dispatch().
++ *    This isn't the most straightforward. e.g. It'd be easier to bounce
++ *    through per-CPU BPF queues. The current design is chosen to maximally
++ *    utilize and verify various SCX mechanisms such as LOCAL_ON dispatching.
++ *
++ * b. Tickless operation
++ *
++ *    All tasks are dispatched with the infinite slice which allows stopping the
++ *    ticks on CONFIG_NO_HZ_FULL kernels running with the proper nohz_full
++ *    parameter. The tickless operation can be observed through
++ *    /proc/interrupts.
++ *
++ *    Periodic switching is enforced by a periodic timer checking all CPUs and
++ *    preempting them as necessary. Unfortunately, BPF timer currently doesn't
++ *    have a way to pin to a specific CPU, so the periodic timer isn't pinned to
++ *    the central CPU.
++ *
++ * c. Preemption
++ *
++ *    Kthreads are unconditionally queued to the head of a matching local dsq
++ *    and dispatched with SCX_DSQ_PREEMPT. This ensures that a kthread is always
++ *    prioritized over user threads, which is required for ensuring forward
++ *    progress as e.g. the periodic timer may run on a ksoftirqd and if the
++ *    ksoftirqd gets starved by a user thread, there may not be anything else to
++ *    vacate that user thread.
++ *
++ *    SCX_KICK_PREEMPT is used to trigger scheduling and CPUs to move to the
++ *    next tasks.
++ *
++ * This scheduler is designed to maximize usage of various SCX mechanisms. A
++ * more practical implementation would likely put the scheduling loop outside
++ * the central CPU's dispatch() path and add some form of priority mechanism.
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++enum {
++	FALLBACK_DSQ_ID		= 0,
++	MS_TO_NS		= 1000LLU * 1000,
++	TIMER_INTERVAL_NS	= 1 * MS_TO_NS,
++};
++
++const volatile s32 central_cpu;
++const volatile u32 nr_cpu_ids = 1;	/* !0 for veristat, set during init */
++const volatile u64 slice_ns = SCX_SLICE_DFL;
++
++bool timer_pinned = true;
++u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
++u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries;
++u64 nr_overflows;
++
++UEI_DEFINE(uei);
++
++struct {
++	__uint(type, BPF_MAP_TYPE_QUEUE);
++	__uint(max_entries, 4096);
++	__type(value, s32);
++} central_q SEC(".maps");
++
++/* can't use percpu map due to bad lookups */
++bool RESIZABLE_ARRAY(data, cpu_gimme_task);
++u64 RESIZABLE_ARRAY(data, cpu_started_at);
++
++struct central_timer {
++	struct bpf_timer timer;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_ARRAY);
++	__uint(max_entries, 1);
++	__type(key, u32);
++	__type(value, struct central_timer);
++} central_timer SEC(".maps");
++
++static bool vtime_before(u64 a, u64 b)
++{
++	return (s64)(a - b) < 0;
++}
++
++s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	/*
++	 * Steer wakeups to the central CPU as much as possible to avoid
++	 * disturbing other CPUs. It's safe to blindly return the central cpu as
++	 * select_cpu() is a hint and if @p can't be on it, the kernel will
++	 * automatically pick a fallback CPU.
++	 */
++	return central_cpu;
++}
++
++void BPF_STRUCT_OPS(central_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	s32 pid = p->pid;
++
++	__sync_fetch_and_add(&nr_total, 1);
++
++	/*
++	 * Push per-cpu kthreads at the head of local dsq's and preempt the
++	 * corresponding CPU. This ensures that e.g. ksoftirqd isn't blocked
++	 * behind other threads which is necessary for forward progress
++	 * guarantee as we depend on the BPF timer which may run from ksoftirqd.
++	 */
++	if ((p->flags & PF_KTHREAD) && p->nr_cpus_allowed == 1) {
++		__sync_fetch_and_add(&nr_locals, 1);
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_INF,
++				 enq_flags | SCX_ENQ_PREEMPT);
++		return;
++	}
++
++	if (bpf_map_push_elem(&central_q, &pid, 0)) {
++		__sync_fetch_and_add(&nr_overflows, 1);
++		scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, enq_flags);
++		return;
++	}
++
++	__sync_fetch_and_add(&nr_queued, 1);
++
++	if (!scx_bpf_task_running(p))
++		scx_bpf_kick_cpu(central_cpu, SCX_KICK_PREEMPT);
++}
++
++static bool dispatch_to_cpu(s32 cpu)
++{
++	struct task_struct *p;
++	s32 pid;
++
++	bpf_repeat(BPF_MAX_LOOPS) {
++		if (bpf_map_pop_elem(&central_q, &pid))
++			break;
++
++		__sync_fetch_and_sub(&nr_queued, 1);
++
++		p = bpf_task_from_pid(pid);
++		if (!p) {
++			__sync_fetch_and_add(&nr_lost_pids, 1);
++			continue;
++		}
++
++		/*
++		 * If we can't run the task at the top, do the dumb thing and
++		 * bounce it to the fallback dsq.
++		 */
++		if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr)) {
++			__sync_fetch_and_add(&nr_mismatches, 1);
++			scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, 0);
++			bpf_task_release(p);
++			/*
++			 * We might run out of dispatch buffer slots if we continue dispatching
++			 * to the fallback DSQ, without dispatching to the local DSQ of the
++			 * target CPU. In such a case, break the loop now as will fail the
++			 * next dispatch operation.
++			 */
++			if (!scx_bpf_dispatch_nr_slots())
++				break;
++			continue;
++		}
++
++		/* dispatch to local and mark that @cpu doesn't need more */
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0);
++
++		if (cpu != central_cpu)
++			scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
++
++		bpf_task_release(p);
++		return true;
++	}
++
++	return false;
++}
++
++void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev)
++{
++	if (cpu == central_cpu) {
++		/* dispatch for all other CPUs first */
++		__sync_fetch_and_add(&nr_dispatches, 1);
++
++		bpf_for(cpu, 0, nr_cpu_ids) {
++			bool *gimme;
++
++			if (!scx_bpf_dispatch_nr_slots())
++				break;
++
++			/* central's gimme is never set */
++			gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids);
++			if (!gimme || !*gimme)
++				continue;
++
++			if (dispatch_to_cpu(cpu))
++				*gimme = false;
++		}
++
++		/*
++		 * Retry if we ran out of dispatch buffer slots as we might have
++		 * skipped some CPUs and also need to dispatch for self. The ext
++		 * core automatically retries if the local dsq is empty but we
++		 * can't rely on that as we're dispatching for other CPUs too.
++		 * Kick self explicitly to retry.
++		 */
++		if (!scx_bpf_dispatch_nr_slots()) {
++			__sync_fetch_and_add(&nr_retries, 1);
++			scx_bpf_kick_cpu(central_cpu, SCX_KICK_PREEMPT);
++			return;
++		}
++
++		/* look for a task to run on the central CPU */
++		if (scx_bpf_consume(FALLBACK_DSQ_ID))
++			return;
++		dispatch_to_cpu(central_cpu);
++	} else {
++		bool *gimme;
++
++		if (scx_bpf_consume(FALLBACK_DSQ_ID))
++			return;
++
++		gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids);
++		if (gimme)
++			*gimme = true;
++
++		/*
++		 * Force dispatch on the scheduling CPU so that it finds a task
++		 * to run for us.
++		 */
++		scx_bpf_kick_cpu(central_cpu, SCX_KICK_PREEMPT);
++	}
++}
++
++void BPF_STRUCT_OPS(central_running, struct task_struct *p)
++{
++	s32 cpu = scx_bpf_task_cpu(p);
++	u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
++	if (started_at)
++		*started_at = bpf_ktime_get_ns() ?: 1;	/* 0 indicates idle */
++}
++
++void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
++{
++	s32 cpu = scx_bpf_task_cpu(p);
++	u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
++	if (started_at)
++		*started_at = 0;
++}
++
++static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
++{
++	u64 now = bpf_ktime_get_ns();
++	u64 nr_to_kick = nr_queued;
++	s32 i, curr_cpu;
++
++	curr_cpu = bpf_get_smp_processor_id();
++	if (timer_pinned && (curr_cpu != central_cpu)) {
++		scx_bpf_error("Central timer ran on CPU %d, not central CPU %d",
++			      curr_cpu, central_cpu);
++		return 0;
++	}
++
++	bpf_for(i, 0, nr_cpu_ids) {
++		s32 cpu = (nr_timers + i) % nr_cpu_ids;
++		u64 *started_at;
++
++		if (cpu == central_cpu)
++			continue;
++
++		/* kick iff the current one exhausted its slice */
++		started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
++		if (started_at && *started_at &&
++		    vtime_before(now, *started_at + slice_ns))
++			continue;
++
++		/* and there's something pending */
++		if (scx_bpf_dsq_nr_queued(FALLBACK_DSQ_ID) ||
++		    scx_bpf_dsq_nr_queued(SCX_DSQ_LOCAL_ON | cpu))
++			;
++		else if (nr_to_kick)
++			nr_to_kick--;
++		else
++			continue;
++
++		scx_bpf_kick_cpu(cpu, SCX_KICK_PREEMPT);
++	}
++
++	bpf_timer_start(timer, TIMER_INTERVAL_NS, BPF_F_TIMER_CPU_PIN);
++	__sync_fetch_and_add(&nr_timers, 1);
++	return 0;
++}
++
++int BPF_STRUCT_OPS_SLEEPABLE(central_init)
++{
++	u32 key = 0;
++	struct bpf_timer *timer;
++	int ret;
++
++	ret = scx_bpf_create_dsq(FALLBACK_DSQ_ID, -1);
++	if (ret)
++		return ret;
++
++	timer = bpf_map_lookup_elem(&central_timer, &key);
++	if (!timer)
++		return -ESRCH;
++
++	if (bpf_get_smp_processor_id() != central_cpu) {
++		scx_bpf_error("init from non-central CPU");
++		return -EINVAL;
++	}
++
++	bpf_timer_init(timer, &central_timer, CLOCK_MONOTONIC);
++	bpf_timer_set_callback(timer, central_timerfn);
++
++	ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, BPF_F_TIMER_CPU_PIN);
++	/*
++	 * BPF_F_TIMER_CPU_PIN is pretty new (>=6.7). If we're running in a
++	 * kernel which doesn't have it, bpf_timer_start() will return -EINVAL.
++	 * Retry without the PIN. This would be the perfect use case for
++	 * bpf_core_enum_value_exists() but the enum type doesn't have a name
++	 * and can't be used with bpf_core_enum_value_exists(). Oh well...
++	 */
++	if (ret == -EINVAL) {
++		timer_pinned = false;
++		ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, 0);
++	}
++	if (ret)
++		scx_bpf_error("bpf_timer_start failed (%d)", ret);
++	return ret;
++}
++
++void BPF_STRUCT_OPS(central_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SCX_OPS_DEFINE(central_ops,
++	       /*
++		* We are offloading all scheduling decisions to the central CPU
++		* and thus being the last task on a given CPU doesn't mean
++		* anything special. Enqueue the last tasks like any other tasks.
++		*/
++	       .flags			= SCX_OPS_ENQ_LAST,
++
++	       .select_cpu		= (void *)central_select_cpu,
++	       .enqueue			= (void *)central_enqueue,
++	       .dispatch		= (void *)central_dispatch,
++	       .running			= (void *)central_running,
++	       .stopping		= (void *)central_stopping,
++	       .init			= (void *)central_init,
++	       .exit			= (void *)central_exit,
++	       .name			= "central");
+diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
+new file mode 100644
+index 000000000000..21deea320bd7
+--- /dev/null
++++ b/tools/sched_ext/scx_central.c
+@@ -0,0 +1,135 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#define _GNU_SOURCE
++#include <sched.h>
++#include <stdio.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <signal.h>
++#include <libgen.h>
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include "scx_central.bpf.skel.h"
++
++const char help_fmt[] =
++"A central FIFO sched_ext scheduler.\n"
++"\n"
++"See the top-level comment in .bpf.c for more details.\n"
++"\n"
++"Usage: %s [-s SLICE_US] [-c CPU]\n"
++"\n"
++"  -s SLICE_US   Override slice duration\n"
++"  -c CPU        Override the central CPU (default: 0)\n"
++"  -v            Print libbpf debug messages\n"
++"  -h            Display this help and exit\n";
++
++static bool verbose;
++static volatile int exit_req;
++
++static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
++{
++	if (level == LIBBPF_DEBUG && !verbose)
++		return 0;
++	return vfprintf(stderr, format, args);
++}
++
++static void sigint_handler(int dummy)
++{
++	exit_req = 1;
++}
++
++int main(int argc, char **argv)
++{
++	struct scx_central *skel;
++	struct bpf_link *link;
++	__u64 seq = 0, ecode;
++	__s32 opt;
++	cpu_set_t *cpuset;
++
++	libbpf_set_print(libbpf_print_fn);
++	signal(SIGINT, sigint_handler);
++	signal(SIGTERM, sigint_handler);
++restart:
++	skel = SCX_OPS_OPEN(central_ops, scx_central);
++
++	skel->rodata->central_cpu = 0;
++	skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
++
++	while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
++		switch (opt) {
++		case 's':
++			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
++			break;
++		case 'c':
++			skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
++			break;
++		case 'v':
++			verbose = true;
++			break;
++		default:
++			fprintf(stderr, help_fmt, basename(argv[0]));
++			return opt != 'h';
++		}
++	}
++
++	/* Resize arrays so their element count is equal to cpu count. */
++	RESIZE_ARRAY(skel, data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
++	RESIZE_ARRAY(skel, data, cpu_started_at, skel->rodata->nr_cpu_ids);
++
++	SCX_OPS_LOAD(skel, central_ops, scx_central, uei);
++
++	/*
++	 * Affinitize the loading thread to the central CPU, as:
++	 * - That's where the BPF timer is first invoked in the BPF program.
++	 * - We probably don't want this user space component to take up a core
++	 *   from a task that would benefit from avoiding preemption on one of
++	 *   the tickless cores.
++	 *
++	 * Until BPF supports pinning the timer, it's not guaranteed that it
++	 * will always be invoked on the central CPU. In practice, this
++	 * suffices the majority of the time.
++	 */
++	cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
++	SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
++	CPU_ZERO(cpuset);
++	CPU_SET(skel->rodata->central_cpu, cpuset);
++	SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset),
++		   "Failed to affinitize to central CPU %d (max %d)",
++		   skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
++	CPU_FREE(cpuset);
++
++	link = SCX_OPS_ATTACH(skel, central_ops, scx_central);
++
++	if (!skel->data->timer_pinned)
++		printf("WARNING : BPF_F_TIMER_CPU_PIN not available, timer not pinned to central\n");
++
++	while (!exit_req && !UEI_EXITED(skel, uei)) {
++		printf("[SEQ %llu]\n", seq++);
++		printf("total   :%10" PRIu64 "    local:%10" PRIu64 "   queued:%10" PRIu64 "  lost:%10" PRIu64 "\n",
++		       skel->bss->nr_total,
++		       skel->bss->nr_locals,
++		       skel->bss->nr_queued,
++		       skel->bss->nr_lost_pids);
++		printf("timer   :%10" PRIu64 " dispatch:%10" PRIu64 " mismatch:%10" PRIu64 " retry:%10" PRIu64 "\n",
++		       skel->bss->nr_timers,
++		       skel->bss->nr_dispatches,
++		       skel->bss->nr_mismatches,
++		       skel->bss->nr_retries);
++		printf("overflow:%10" PRIu64 "\n",
++		       skel->bss->nr_overflows);
++		fflush(stdout);
++		sleep(1);
++	}
++
++	bpf_link__destroy(link);
++	ecode = UEI_REPORT(skel, uei);
++	scx_central__destroy(skel);
++
++	if (UEI_ECODE_RESTART(ecode))
++		goto restart;
++	return 0;
++}
+diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
+new file mode 100644
+index 000000000000..b722baf6da4b
+--- /dev/null
++++ b/tools/sched_ext/scx_flatcg.bpf.c
+@@ -0,0 +1,957 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A demo sched_ext flattened cgroup hierarchy scheduler. It implements
++ * hierarchical weight-based cgroup CPU control by flattening the cgroup
++ * hierarchy into a single layer by compounding the active weight share at each
++ * level. Consider the following hierarchy with weights in parentheses:
++ *
++ * R + A (100) + B (100)
++ *   |         \ C (100)
++ *   \ D (200)
++ *
++ * Ignoring the root and threaded cgroups, only B, C and D can contain tasks.
++ * Let's say all three have runnable tasks. The total share that each of these
++ * three cgroups is entitled to can be calculated by compounding its share at
++ * each level.
++ *
++ * For example, B is competing against C and in that competition its share is
++ * 100/(100+100) == 1/2. At its parent level, A is competing against D and A's
++ * share in that competition is 100/(200+100) == 1/3. B's eventual share in the
++ * system can be calculated by multiplying the two shares, 1/2 * 1/3 == 1/6. C's
++ * eventual shaer is the same at 1/6. D is only competing at the top level and
++ * its share is 200/(100+200) == 2/3.
++ *
++ * So, instead of hierarchically scheduling level-by-level, we can consider it
++ * as B, C and D competing each other with respective share of 1/6, 1/6 and 2/3
++ * and keep updating the eventual shares as the cgroups' runnable states change.
++ *
++ * This flattening of hierarchy can bring a substantial performance gain when
++ * the cgroup hierarchy is nested multiple levels. in a simple benchmark using
++ * wrk[8] on apache serving a CGI script calculating sha1sum of a small file, it
++ * outperforms CFS by ~3% with CPU controller disabled and by ~10% with two
++ * apache instances competing with 2:1 weight ratio nested four level deep.
++ *
++ * However, the gain comes at the cost of not being able to properly handle
++ * thundering herd of cgroups. For example, if many cgroups which are nested
++ * behind a low priority parent cgroup wake up around the same time, they may be
++ * able to consume more CPU cycles than they are entitled to. In many use cases,
++ * this isn't a real concern especially given the performance gain. Also, there
++ * are ways to mitigate the problem further by e.g. introducing an extra
++ * scheduling layer on cgroup delegation boundaries.
++ *
++ * The scheduler first picks the cgroup to run and then schedule the tasks
++ * within by using nested weighted vtime scheduling by default. The
++ * cgroup-internal scheduling can be switched to FIFO with the -f option.
++ */
++#include <scx/common.bpf.h>
++#include "scx_flatcg.h"
++
++/*
++ * Maximum amount of retries to find a valid cgroup.
++ */
++enum {
++	FALLBACK_DSQ		= 0,
++	CGROUP_MAX_RETRIES	= 1024,
++};
++
++char _license[] SEC("license") = "GPL";
++
++const volatile u32 nr_cpus = 32;	/* !0 for veristat, set during init */
++const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
++const volatile bool fifo_sched;
++
++u64 cvtime_now;
++UEI_DEFINE(uei);
++
++struct {
++	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++	__type(key, u32);
++	__type(value, u64);
++	__uint(max_entries, FCG_NR_STATS);
++} stats SEC(".maps");
++
++static void stat_inc(enum fcg_stat_idx idx)
++{
++	u32 idx_v = idx;
++
++	u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx_v);
++	if (cnt_p)
++		(*cnt_p)++;
++}
++
++struct fcg_cpu_ctx {
++	u64			cur_cgid;
++	u64			cur_at;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++	__type(key, u32);
++	__type(value, struct fcg_cpu_ctx);
++	__uint(max_entries, 1);
++} cpu_ctx SEC(".maps");
++
++struct {
++	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
++	__uint(map_flags, BPF_F_NO_PREALLOC);
++	__type(key, int);
++	__type(value, struct fcg_cgrp_ctx);
++} cgrp_ctx SEC(".maps");
++
++struct cgv_node {
++	struct bpf_rb_node	rb_node;
++	__u64			cvtime;
++	__u64			cgid;
++};
++
++private(CGV_TREE) struct bpf_spin_lock cgv_tree_lock;
++private(CGV_TREE) struct bpf_rb_root cgv_tree __contains(cgv_node, rb_node);
++
++struct cgv_node_stash {
++	struct cgv_node __kptr *node;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_HASH);
++	__uint(max_entries, 16384);
++	__type(key, __u64);
++	__type(value, struct cgv_node_stash);
++} cgv_node_stash SEC(".maps");
++
++struct fcg_task_ctx {
++	u64		bypassed_at;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
++	__uint(map_flags, BPF_F_NO_PREALLOC);
++	__type(key, int);
++	__type(value, struct fcg_task_ctx);
++} task_ctx SEC(".maps");
++
++/* gets inc'd on weight tree changes to expire the cached hweights */
++u64 hweight_gen = 1;
++
++static u64 div_round_up(u64 dividend, u64 divisor)
++{
++	return (dividend + divisor - 1) / divisor;
++}
++
++static bool vtime_before(u64 a, u64 b)
++{
++	return (s64)(a - b) < 0;
++}
++
++static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
++{
++	struct cgv_node *cgc_a, *cgc_b;
++
++	cgc_a = container_of(a, struct cgv_node, rb_node);
++	cgc_b = container_of(b, struct cgv_node, rb_node);
++
++	return cgc_a->cvtime < cgc_b->cvtime;
++}
++
++static struct fcg_cpu_ctx *find_cpu_ctx(void)
++{
++	struct fcg_cpu_ctx *cpuc;
++	u32 idx = 0;
++
++	cpuc = bpf_map_lookup_elem(&cpu_ctx, &idx);
++	if (!cpuc) {
++		scx_bpf_error("cpu_ctx lookup failed");
++		return NULL;
++	}
++	return cpuc;
++}
++
++static struct fcg_cgrp_ctx *find_cgrp_ctx(struct cgroup *cgrp)
++{
++	struct fcg_cgrp_ctx *cgc;
++
++	cgc = bpf_cgrp_storage_get(&cgrp_ctx, cgrp, 0, 0);
++	if (!cgc) {
++		scx_bpf_error("cgrp_ctx lookup failed for cgid %llu", cgrp->kn->id);
++		return NULL;
++	}
++	return cgc;
++}
++
++static struct fcg_cgrp_ctx *find_ancestor_cgrp_ctx(struct cgroup *cgrp, int level)
++{
++	struct fcg_cgrp_ctx *cgc;
++
++	cgrp = bpf_cgroup_ancestor(cgrp, level);
++	if (!cgrp) {
++		scx_bpf_error("ancestor cgroup lookup failed");
++		return NULL;
++	}
++
++	cgc = find_cgrp_ctx(cgrp);
++	if (!cgc)
++		scx_bpf_error("ancestor cgrp_ctx lookup failed");
++	bpf_cgroup_release(cgrp);
++	return cgc;
++}
++
++static void cgrp_refresh_hweight(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc)
++{
++	int level;
++
++	if (!cgc->nr_active) {
++		stat_inc(FCG_STAT_HWT_SKIP);
++		return;
++	}
++
++	if (cgc->hweight_gen == hweight_gen) {
++		stat_inc(FCG_STAT_HWT_CACHE);
++		return;
++	}
++
++	stat_inc(FCG_STAT_HWT_UPDATES);
++	bpf_for(level, 0, cgrp->level + 1) {
++		struct fcg_cgrp_ctx *cgc;
++		bool is_active;
++
++		cgc = find_ancestor_cgrp_ctx(cgrp, level);
++		if (!cgc)
++			break;
++
++		if (!level) {
++			cgc->hweight = FCG_HWEIGHT_ONE;
++			cgc->hweight_gen = hweight_gen;
++		} else {
++			struct fcg_cgrp_ctx *pcgc;
++
++			pcgc = find_ancestor_cgrp_ctx(cgrp, level - 1);
++			if (!pcgc)
++				break;
++
++			/*
++			 * We can be opportunistic here and not grab the
++			 * cgv_tree_lock and deal with the occasional races.
++			 * However, hweight updates are already cached and
++			 * relatively low-frequency. Let's just do the
++			 * straightforward thing.
++			 */
++			bpf_spin_lock(&cgv_tree_lock);
++			is_active = cgc->nr_active;
++			if (is_active) {
++				cgc->hweight_gen = pcgc->hweight_gen;
++				cgc->hweight =
++					div_round_up(pcgc->hweight * cgc->weight,
++						     pcgc->child_weight_sum);
++			}
++			bpf_spin_unlock(&cgv_tree_lock);
++
++			if (!is_active) {
++				stat_inc(FCG_STAT_HWT_RACE);
++				break;
++			}
++		}
++	}
++}
++
++static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc)
++{
++	u64 delta, cvtime, max_budget;
++
++	/*
++	 * A node which is on the rbtree can't be pointed to from elsewhere yet
++	 * and thus can't be updated and repositioned. Instead, we collect the
++	 * vtime deltas separately and apply it asynchronously here.
++	 */
++	delta = __sync_fetch_and_sub(&cgc->cvtime_delta, cgc->cvtime_delta);
++	cvtime = cgv_node->cvtime + delta;
++
++	/*
++	 * Allow a cgroup to carry the maximum budget proportional to its
++	 * hweight such that a full-hweight cgroup can immediately take up half
++	 * of the CPUs at the most while staying at the front of the rbtree.
++	 */
++	max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) /
++		(2 * FCG_HWEIGHT_ONE);
++	if (vtime_before(cvtime, cvtime_now - max_budget))
++		cvtime = cvtime_now - max_budget;
++
++	cgv_node->cvtime = cvtime;
++}
++
++static void cgrp_enqueued(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc)
++{
++	struct cgv_node_stash *stash;
++	struct cgv_node *cgv_node;
++	u64 cgid = cgrp->kn->id;
++
++	/* paired with cmpxchg in try_pick_next_cgroup() */
++	if (__sync_val_compare_and_swap(&cgc->queued, 0, 1)) {
++		stat_inc(FCG_STAT_ENQ_SKIP);
++		return;
++	}
++
++	stash = bpf_map_lookup_elem(&cgv_node_stash, &cgid);
++	if (!stash) {
++		scx_bpf_error("cgv_node lookup failed for cgid %llu", cgid);
++		return;
++	}
++
++	/* NULL if the node is already on the rbtree */
++	cgv_node = bpf_kptr_xchg(&stash->node, NULL);
++	if (!cgv_node) {
++		stat_inc(FCG_STAT_ENQ_RACE);
++		return;
++	}
++
++	bpf_spin_lock(&cgv_tree_lock);
++	cgrp_cap_budget(cgv_node, cgc);
++	bpf_rbtree_add(&cgv_tree, &cgv_node->rb_node, cgv_node_less);
++	bpf_spin_unlock(&cgv_tree_lock);
++}
++
++static void set_bypassed_at(struct task_struct *p, struct fcg_task_ctx *taskc)
++{
++	/*
++	 * Tell fcg_stopping() that this bypassed the regular scheduling path
++	 * and should be force charged to the cgroup. 0 is used to indicate that
++	 * the task isn't bypassing, so if the current runtime is 0, go back by
++	 * one nanosecond.
++	 */
++	taskc->bypassed_at = p->se.sum_exec_runtime ?: (u64)-1;
++}
++
++s32 BPF_STRUCT_OPS(fcg_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
++{
++	struct fcg_task_ctx *taskc;
++	bool is_idle = false;
++	s32 cpu;
++
++	cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
++
++	taskc = bpf_task_storage_get(&task_ctx, p, 0, 0);
++	if (!taskc) {
++		scx_bpf_error("task_ctx lookup failed");
++		return cpu;
++	}
++
++	/*
++	 * If select_cpu_dfl() is recommending local enqueue, the target CPU is
++	 * idle. Follow it and charge the cgroup later in fcg_stopping() after
++	 * the fact.
++	 */
++	if (is_idle) {
++		set_bypassed_at(p, taskc);
++		stat_inc(FCG_STAT_LOCAL);
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
++	}
++
++	return cpu;
++}
++
++void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	struct fcg_task_ctx *taskc;
++	struct cgroup *cgrp;
++	struct fcg_cgrp_ctx *cgc;
++
++	taskc = bpf_task_storage_get(&task_ctx, p, 0, 0);
++	if (!taskc) {
++		scx_bpf_error("task_ctx lookup failed");
++		return;
++	}
++
++	/*
++	 * Use the direct dispatching and force charging to deal with tasks with
++	 * custom affinities so that we don't have to worry about per-cgroup
++	 * dq's containing tasks that can't be executed from some CPUs.
++	 */
++	if (p->nr_cpus_allowed != nr_cpus) {
++		set_bypassed_at(p, taskc);
++
++		/*
++		 * The global dq is deprioritized as we don't want to let tasks
++		 * to boost themselves by constraining its cpumask. The
++		 * deprioritization is rather severe, so let's not apply that to
++		 * per-cpu kernel threads. This is ham-fisted. We probably wanna
++		 * implement per-cgroup fallback dq's instead so that we have
++		 * more control over when tasks with custom cpumask get issued.
++		 */
++		if (p->nr_cpus_allowed == 1 && (p->flags & PF_KTHREAD)) {
++			stat_inc(FCG_STAT_LOCAL);
++			scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags);
++		} else {
++			stat_inc(FCG_STAT_GLOBAL);
++			scx_bpf_dispatch(p, FALLBACK_DSQ, SCX_SLICE_DFL, enq_flags);
++		}
++		return;
++	}
++
++	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
++	cgc = find_cgrp_ctx(cgrp);
++	if (!cgc)
++		goto out_release;
++
++	if (fifo_sched) {
++		scx_bpf_dispatch(p, cgrp->kn->id, SCX_SLICE_DFL, enq_flags);
++	} else {
++		u64 tvtime = p->scx.dsq_vtime;
++
++		/*
++		 * Limit the amount of budget that an idling task can accumulate
++		 * to one slice.
++		 */
++		if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
++			tvtime = cgc->tvtime_now - SCX_SLICE_DFL;
++
++		scx_bpf_dispatch_vtime(p, cgrp->kn->id, SCX_SLICE_DFL,
++				       tvtime, enq_flags);
++	}
++
++	cgrp_enqueued(cgrp, cgc);
++out_release:
++	bpf_cgroup_release(cgrp);
++}
++
++/*
++ * Walk the cgroup tree to update the active weight sums as tasks wake up and
++ * sleep. The weight sums are used as the base when calculating the proportion a
++ * given cgroup or task is entitled to at each level.
++ */
++static void update_active_weight_sums(struct cgroup *cgrp, bool runnable)
++{
++	struct fcg_cgrp_ctx *cgc;
++	bool updated = false;
++	int idx;
++
++	cgc = find_cgrp_ctx(cgrp);
++	if (!cgc)
++		return;
++
++	/*
++	 * In most cases, a hot cgroup would have multiple threads going to
++	 * sleep and waking up while the whole cgroup stays active. In leaf
++	 * cgroups, ->nr_runnable which is updated with __sync operations gates
++	 * ->nr_active updates, so that we don't have to grab the cgv_tree_lock
++	 * repeatedly for a busy cgroup which is staying active.
++	 */
++	if (runnable) {
++		if (__sync_fetch_and_add(&cgc->nr_runnable, 1))
++			return;
++		stat_inc(FCG_STAT_ACT);
++	} else {
++		if (__sync_sub_and_fetch(&cgc->nr_runnable, 1))
++			return;
++		stat_inc(FCG_STAT_DEACT);
++	}
++
++	/*
++	 * If @cgrp is becoming runnable, its hweight should be refreshed after
++	 * it's added to the weight tree so that enqueue has the up-to-date
++	 * value. If @cgrp is becoming quiescent, the hweight should be
++	 * refreshed before it's removed from the weight tree so that the usage
++	 * charging which happens afterwards has access to the latest value.
++	 */
++	if (!runnable)
++		cgrp_refresh_hweight(cgrp, cgc);
++
++	/* propagate upwards */
++	bpf_for(idx, 0, cgrp->level) {
++		int level = cgrp->level - idx;
++		struct fcg_cgrp_ctx *cgc, *pcgc = NULL;
++		bool propagate = false;
++
++		cgc = find_ancestor_cgrp_ctx(cgrp, level);
++		if (!cgc)
++			break;
++		if (level) {
++			pcgc = find_ancestor_cgrp_ctx(cgrp, level - 1);
++			if (!pcgc)
++				break;
++		}
++
++		/*
++		 * We need the propagation protected by a lock to synchronize
++		 * against weight changes. There's no reason to drop the lock at
++		 * each level but bpf_spin_lock() doesn't want any function
++		 * calls while locked.
++		 */
++		bpf_spin_lock(&cgv_tree_lock);
++
++		if (runnable) {
++			if (!cgc->nr_active++) {
++				updated = true;
++				if (pcgc) {
++					propagate = true;
++					pcgc->child_weight_sum += cgc->weight;
++				}
++			}
++		} else {
++			if (!--cgc->nr_active) {
++				updated = true;
++				if (pcgc) {
++					propagate = true;
++					pcgc->child_weight_sum -= cgc->weight;
++				}
++			}
++		}
++
++		bpf_spin_unlock(&cgv_tree_lock);
++
++		if (!propagate)
++			break;
++	}
++
++	if (updated)
++		__sync_fetch_and_add(&hweight_gen, 1);
++
++	if (runnable)
++		cgrp_refresh_hweight(cgrp, cgc);
++}
++
++void BPF_STRUCT_OPS(fcg_runnable, struct task_struct *p, u64 enq_flags)
++{
++	struct cgroup *cgrp;
++
++	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
++	update_active_weight_sums(cgrp, true);
++	bpf_cgroup_release(cgrp);
++}
++
++void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
++{
++	struct cgroup *cgrp;
++	struct fcg_cgrp_ctx *cgc;
++
++	if (fifo_sched)
++		return;
++
++	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
++	cgc = find_cgrp_ctx(cgrp);
++	if (cgc) {
++		/*
++		 * @cgc->tvtime_now always progresses forward as tasks start
++		 * executing. The test and update can be performed concurrently
++		 * from multiple CPUs and thus racy. Any error should be
++		 * contained and temporary. Let's just live with it.
++		 */
++		if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime))
++			cgc->tvtime_now = p->scx.dsq_vtime;
++	}
++	bpf_cgroup_release(cgrp);
++}
++
++void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable)
++{
++	struct fcg_task_ctx *taskc;
++	struct cgroup *cgrp;
++	struct fcg_cgrp_ctx *cgc;
++
++	/*
++	 * Scale the execution time by the inverse of the weight and charge.
++	 *
++	 * Note that the default yield implementation yields by setting
++	 * @p->scx.slice to zero and the following would treat the yielding task
++	 * as if it has consumed all its slice. If this penalizes yielding tasks
++	 * too much, determine the execution time by taking explicit timestamps
++	 * instead of depending on @p->scx.slice.
++	 */
++	if (!fifo_sched)
++		p->scx.dsq_vtime +=
++			(SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
++
++	taskc = bpf_task_storage_get(&task_ctx, p, 0, 0);
++	if (!taskc) {
++		scx_bpf_error("task_ctx lookup failed");
++		return;
++	}
++
++	if (!taskc->bypassed_at)
++		return;
++
++	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
++	cgc = find_cgrp_ctx(cgrp);
++	if (cgc) {
++		__sync_fetch_and_add(&cgc->cvtime_delta,
++				     p->se.sum_exec_runtime - taskc->bypassed_at);
++		taskc->bypassed_at = 0;
++	}
++	bpf_cgroup_release(cgrp);
++}
++
++void BPF_STRUCT_OPS(fcg_quiescent, struct task_struct *p, u64 deq_flags)
++{
++	struct cgroup *cgrp;
++
++	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
++	update_active_weight_sums(cgrp, false);
++	bpf_cgroup_release(cgrp);
++}
++
++void BPF_STRUCT_OPS(fcg_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
++{
++	struct fcg_cgrp_ctx *cgc, *pcgc = NULL;
++
++	cgc = find_cgrp_ctx(cgrp);
++	if (!cgc)
++		return;
++
++	if (cgrp->level) {
++		pcgc = find_ancestor_cgrp_ctx(cgrp, cgrp->level - 1);
++		if (!pcgc)
++			return;
++	}
++
++	bpf_spin_lock(&cgv_tree_lock);
++	if (pcgc && cgc->nr_active)
++		pcgc->child_weight_sum += (s64)weight - cgc->weight;
++	cgc->weight = weight;
++	bpf_spin_unlock(&cgv_tree_lock);
++}
++
++static bool try_pick_next_cgroup(u64 *cgidp)
++{
++	struct bpf_rb_node *rb_node;
++	struct cgv_node_stash *stash;
++	struct cgv_node *cgv_node;
++	struct fcg_cgrp_ctx *cgc;
++	struct cgroup *cgrp;
++	u64 cgid;
++
++	/* pop the front cgroup and wind cvtime_now accordingly */
++	bpf_spin_lock(&cgv_tree_lock);
++
++	rb_node = bpf_rbtree_first(&cgv_tree);
++	if (!rb_node) {
++		bpf_spin_unlock(&cgv_tree_lock);
++		stat_inc(FCG_STAT_PNC_NO_CGRP);
++		*cgidp = 0;
++		return true;
++	}
++
++	rb_node = bpf_rbtree_remove(&cgv_tree, rb_node);
++	bpf_spin_unlock(&cgv_tree_lock);
++
++	if (!rb_node) {
++		/*
++		 * This should never happen. bpf_rbtree_first() was called
++		 * above while the tree lock was held, so the node should
++		 * always be present.
++		 */
++		scx_bpf_error("node could not be removed");
++		return true;
++	}
++
++	cgv_node = container_of(rb_node, struct cgv_node, rb_node);
++	cgid = cgv_node->cgid;
++
++	if (vtime_before(cvtime_now, cgv_node->cvtime))
++		cvtime_now = cgv_node->cvtime;
++
++	/*
++	 * If lookup fails, the cgroup's gone. Free and move on. See
++	 * fcg_cgroup_exit().
++	 */
++	cgrp = bpf_cgroup_from_id(cgid);
++	if (!cgrp) {
++		stat_inc(FCG_STAT_PNC_GONE);
++		goto out_free;
++	}
++
++	cgc = bpf_cgrp_storage_get(&cgrp_ctx, cgrp, 0, 0);
++	if (!cgc) {
++		bpf_cgroup_release(cgrp);
++		stat_inc(FCG_STAT_PNC_GONE);
++		goto out_free;
++	}
++
++	if (!scx_bpf_consume(cgid)) {
++		bpf_cgroup_release(cgrp);
++		stat_inc(FCG_STAT_PNC_EMPTY);
++		goto out_stash;
++	}
++
++	/*
++	 * Successfully consumed from the cgroup. This will be our current
++	 * cgroup for the new slice. Refresh its hweight.
++	 */
++	cgrp_refresh_hweight(cgrp, cgc);
++
++	bpf_cgroup_release(cgrp);
++
++	/*
++	 * As the cgroup may have more tasks, add it back to the rbtree. Note
++	 * that here we charge the full slice upfront and then exact later
++	 * according to the actual consumption. This prevents lowpri thundering
++	 * herd from saturating the machine.
++	 */
++	bpf_spin_lock(&cgv_tree_lock);
++	cgv_node->cvtime += cgrp_slice_ns * FCG_HWEIGHT_ONE / (cgc->hweight ?: 1);
++	cgrp_cap_budget(cgv_node, cgc);
++	bpf_rbtree_add(&cgv_tree, &cgv_node->rb_node, cgv_node_less);
++	bpf_spin_unlock(&cgv_tree_lock);
++
++	*cgidp = cgid;
++	stat_inc(FCG_STAT_PNC_NEXT);
++	return true;
++
++out_stash:
++	stash = bpf_map_lookup_elem(&cgv_node_stash, &cgid);
++	if (!stash) {
++		stat_inc(FCG_STAT_PNC_GONE);
++		goto out_free;
++	}
++
++	/*
++	 * Paired with cmpxchg in cgrp_enqueued(). If they see the following
++	 * transition, they'll enqueue the cgroup. If they are earlier, we'll
++	 * see their task in the dq below and requeue the cgroup.
++	 */
++	__sync_val_compare_and_swap(&cgc->queued, 1, 0);
++
++	if (scx_bpf_dsq_nr_queued(cgid)) {
++		bpf_spin_lock(&cgv_tree_lock);
++		bpf_rbtree_add(&cgv_tree, &cgv_node->rb_node, cgv_node_less);
++		bpf_spin_unlock(&cgv_tree_lock);
++		stat_inc(FCG_STAT_PNC_RACE);
++	} else {
++		cgv_node = bpf_kptr_xchg(&stash->node, cgv_node);
++		if (cgv_node) {
++			scx_bpf_error("unexpected !NULL cgv_node stash");
++			goto out_free;
++		}
++	}
++
++	return false;
++
++out_free:
++	bpf_obj_drop(cgv_node);
++	return false;
++}
++
++void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
++{
++	struct fcg_cpu_ctx *cpuc;
++	struct fcg_cgrp_ctx *cgc;
++	struct cgroup *cgrp;
++	u64 now = bpf_ktime_get_ns();
++	bool picked_next = false;
++
++	cpuc = find_cpu_ctx();
++	if (!cpuc)
++		return;
++
++	if (!cpuc->cur_cgid)
++		goto pick_next_cgroup;
++
++	if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) {
++		if (scx_bpf_consume(cpuc->cur_cgid)) {
++			stat_inc(FCG_STAT_CNS_KEEP);
++			return;
++		}
++		stat_inc(FCG_STAT_CNS_EMPTY);
++	} else {
++		stat_inc(FCG_STAT_CNS_EXPIRE);
++	}
++
++	/*
++	 * The current cgroup is expiring. It was already charged a full slice.
++	 * Calculate the actual usage and accumulate the delta.
++	 */
++	cgrp = bpf_cgroup_from_id(cpuc->cur_cgid);
++	if (!cgrp) {
++		stat_inc(FCG_STAT_CNS_GONE);
++		goto pick_next_cgroup;
++	}
++
++	cgc = bpf_cgrp_storage_get(&cgrp_ctx, cgrp, 0, 0);
++	if (cgc) {
++		/*
++		 * We want to update the vtime delta and then look for the next
++		 * cgroup to execute but the latter needs to be done in a loop
++		 * and we can't keep the lock held. Oh well...
++		 */
++		bpf_spin_lock(&cgv_tree_lock);
++		__sync_fetch_and_add(&cgc->cvtime_delta,
++				     (cpuc->cur_at + cgrp_slice_ns - now) *
++				     FCG_HWEIGHT_ONE / (cgc->hweight ?: 1));
++		bpf_spin_unlock(&cgv_tree_lock);
++	} else {
++		stat_inc(FCG_STAT_CNS_GONE);
++	}
++
++	bpf_cgroup_release(cgrp);
++
++pick_next_cgroup:
++	cpuc->cur_at = now;
++
++	if (scx_bpf_consume(FALLBACK_DSQ)) {
++		cpuc->cur_cgid = 0;
++		return;
++	}
++
++	bpf_repeat(CGROUP_MAX_RETRIES) {
++		if (try_pick_next_cgroup(&cpuc->cur_cgid)) {
++			picked_next = true;
++			break;
++		}
++	}
++
++	/*
++	 * This only happens if try_pick_next_cgroup() races against enqueue
++	 * path for more than CGROUP_MAX_RETRIES times, which is extremely
++	 * unlikely and likely indicates an underlying bug. There shouldn't be
++	 * any stall risk as the race is against enqueue.
++	 */
++	if (!picked_next)
++		stat_inc(FCG_STAT_PNC_FAIL);
++}
++
++s32 BPF_STRUCT_OPS(fcg_init_task, struct task_struct *p,
++		   struct scx_init_task_args *args)
++{
++	struct fcg_task_ctx *taskc;
++	struct fcg_cgrp_ctx *cgc;
++
++	/*
++	 * @p is new. Let's ensure that its task_ctx is available. We can sleep
++	 * in this function and the following will automatically use GFP_KERNEL.
++	 */
++	taskc = bpf_task_storage_get(&task_ctx, p, 0,
++				     BPF_LOCAL_STORAGE_GET_F_CREATE);
++	if (!taskc)
++		return -ENOMEM;
++
++	taskc->bypassed_at = 0;
++
++	if (!(cgc = find_cgrp_ctx(args->cgroup)))
++		return -ENOENT;
++
++	p->scx.dsq_vtime = cgc->tvtime_now;
++
++	return 0;
++}
++
++int BPF_STRUCT_OPS_SLEEPABLE(fcg_cgroup_init, struct cgroup *cgrp,
++			     struct scx_cgroup_init_args *args)
++{
++	struct fcg_cgrp_ctx *cgc;
++	struct cgv_node *cgv_node;
++	struct cgv_node_stash empty_stash = {}, *stash;
++	u64 cgid = cgrp->kn->id;
++	int ret;
++
++	/*
++	 * Technically incorrect as cgroup ID is full 64bit while dsq ID is
++	 * 63bit. Should not be a problem in practice and easy to spot in the
++	 * unlikely case that it breaks.
++	 */
++	ret = scx_bpf_create_dsq(cgid, -1);
++	if (ret)
++		return ret;
++
++	cgc = bpf_cgrp_storage_get(&cgrp_ctx, cgrp, 0,
++				   BPF_LOCAL_STORAGE_GET_F_CREATE);
++	if (!cgc) {
++		ret = -ENOMEM;
++		goto err_destroy_dsq;
++	}
++
++	cgc->weight = args->weight;
++	cgc->hweight = FCG_HWEIGHT_ONE;
++
++	ret = bpf_map_update_elem(&cgv_node_stash, &cgid, &empty_stash,
++				  BPF_NOEXIST);
++	if (ret) {
++		if (ret != -ENOMEM)
++			scx_bpf_error("unexpected stash creation error (%d)",
++				      ret);
++		goto err_destroy_dsq;
++	}
++
++	stash = bpf_map_lookup_elem(&cgv_node_stash, &cgid);
++	if (!stash) {
++		scx_bpf_error("unexpected cgv_node stash lookup failure");
++		ret = -ENOENT;
++		goto err_destroy_dsq;
++	}
++
++	cgv_node = bpf_obj_new(struct cgv_node);
++	if (!cgv_node) {
++		ret = -ENOMEM;
++		goto err_del_cgv_node;
++	}
++
++	cgv_node->cgid = cgid;
++	cgv_node->cvtime = cvtime_now;
++
++	cgv_node = bpf_kptr_xchg(&stash->node, cgv_node);
++	if (cgv_node) {
++		scx_bpf_error("unexpected !NULL cgv_node stash");
++		ret = -EBUSY;
++		goto err_drop;
++	}
++
++	return 0;
++
++err_drop:
++	bpf_obj_drop(cgv_node);
++err_del_cgv_node:
++	bpf_map_delete_elem(&cgv_node_stash, &cgid);
++err_destroy_dsq:
++	scx_bpf_destroy_dsq(cgid);
++	return ret;
++}
++
++void BPF_STRUCT_OPS(fcg_cgroup_exit, struct cgroup *cgrp)
++{
++	u64 cgid = cgrp->kn->id;
++
++	/*
++	 * For now, there's no way find and remove the cgv_node if it's on the
++	 * cgv_tree. Let's drain them in the dispatch path as they get popped
++	 * off the front of the tree.
++	 */
++	bpf_map_delete_elem(&cgv_node_stash, &cgid);
++	scx_bpf_destroy_dsq(cgid);
++}
++
++void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
++		    struct cgroup *from, struct cgroup *to)
++{
++	struct fcg_cgrp_ctx *from_cgc, *to_cgc;
++	s64 vtime_delta;
++
++	/* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */
++	if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to)))
++		return;
++
++	vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now;
++	p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init)
++{
++	return scx_bpf_create_dsq(FALLBACK_DSQ, -1);
++}
++
++void BPF_STRUCT_OPS(fcg_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SCX_OPS_DEFINE(flatcg_ops,
++	       .select_cpu		= (void *)fcg_select_cpu,
++	       .enqueue			= (void *)fcg_enqueue,
++	       .dispatch		= (void *)fcg_dispatch,
++	       .runnable		= (void *)fcg_runnable,
++	       .running			= (void *)fcg_running,
++	       .stopping		= (void *)fcg_stopping,
++	       .quiescent		= (void *)fcg_quiescent,
++	       .init_task		= (void *)fcg_init_task,
++	       .cgroup_set_weight	= (void *)fcg_cgroup_set_weight,
++	       .cgroup_init		= (void *)fcg_cgroup_init,
++	       .cgroup_exit		= (void *)fcg_cgroup_exit,
++	       .cgroup_move		= (void *)fcg_cgroup_move,
++	       .init			= (void *)fcg_init,
++	       .exit			= (void *)fcg_exit,
++	       .flags			= SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
++	       .name			= "flatcg");
+diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c
+new file mode 100644
+index 000000000000..5d24ca9c29d9
+--- /dev/null
++++ b/tools/sched_ext/scx_flatcg.c
+@@ -0,0 +1,233 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ */
++#include <stdio.h>
++#include <signal.h>
++#include <unistd.h>
++#include <libgen.h>
++#include <limits.h>
++#include <inttypes.h>
++#include <fcntl.h>
++#include <time.h>
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include "scx_flatcg.h"
++#include "scx_flatcg.bpf.skel.h"
++
++#ifndef FILEID_KERNFS
++#define FILEID_KERNFS		0xfe
++#endif
++
++const char help_fmt[] =
++"A flattened cgroup hierarchy sched_ext scheduler.\n"
++"\n"
++"See the top-level comment in .bpf.c for more details.\n"
++"\n"
++"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
++"\n"
++"  -s SLICE_US   Override slice duration\n"
++"  -i INTERVAL   Report interval\n"
++"  -f            Use FIFO scheduling instead of weighted vtime scheduling\n"
++"  -v            Print libbpf debug messages\n"
++"  -h            Display this help and exit\n";
++
++static bool verbose;
++static volatile int exit_req;
++
++static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
++{
++	if (level == LIBBPF_DEBUG && !verbose)
++		return 0;
++	return vfprintf(stderr, format, args);
++}
++
++static void sigint_handler(int dummy)
++{
++	exit_req = 1;
++}
++
++static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
++{
++	FILE *fp;
++	char buf[4096];
++	char *line, *cur = NULL, *tok;
++	__u64 sum = 0, idle = 0;
++	__u64 delta_sum, delta_idle;
++	int idx;
++
++	fp = fopen("/proc/stat", "r");
++	if (!fp) {
++		perror("fopen(\"/proc/stat\")");
++		return 0.0;
++	}
++
++	if (!fgets(buf, sizeof(buf), fp)) {
++		perror("fgets(\"/proc/stat\")");
++		fclose(fp);
++		return 0.0;
++	}
++	fclose(fp);
++
++	line = buf;
++	for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) {
++		char *endp = NULL;
++		__u64 v;
++
++		if (idx == 0) {
++			line = NULL;
++			continue;
++		}
++		v = strtoull(tok, &endp, 0);
++		if (!endp || *endp != '\0') {
++			fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n",
++				idx, tok);
++			continue;
++		}
++		sum += v;
++		if (idx == 4)
++			idle = v;
++	}
++
++	delta_sum = sum - *last_sum;
++	delta_idle = idle - *last_idle;
++	*last_sum = sum;
++	*last_idle = idle;
++
++	return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0;
++}
++
++static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
++{
++	__u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus];
++	__u32 idx;
++
++	memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
++
++	for (idx = 0; idx < FCG_NR_STATS; idx++) {
++		int ret, cpu;
++
++		ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
++					  &idx, cnts[idx]);
++		if (ret < 0)
++			continue;
++		for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
++			stats[idx] += cnts[idx][cpu];
++	}
++}
++
++int main(int argc, char **argv)
++{
++	struct scx_flatcg *skel;
++	struct bpf_link *link;
++	struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 };
++	bool dump_cgrps = false;
++	__u64 last_cpu_sum = 0, last_cpu_idle = 0;
++	__u64 last_stats[FCG_NR_STATS] = {};
++	unsigned long seq = 0;
++	__s32 opt;
++	__u64 ecode;
++
++	libbpf_set_print(libbpf_print_fn);
++	signal(SIGINT, sigint_handler);
++	signal(SIGTERM, sigint_handler);
++restart:
++	skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
++
++	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
++
++	while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
++		double v;
++
++		switch (opt) {
++		case 's':
++			v = strtod(optarg, NULL);
++			skel->rodata->cgrp_slice_ns = v * 1000;
++			break;
++		case 'i':
++			v = strtod(optarg, NULL);
++			intv_ts.tv_sec = v;
++			intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000;
++			break;
++		case 'd':
++			dump_cgrps = true;
++			break;
++		case 'f':
++			skel->rodata->fifo_sched = true;
++			break;
++		case 'v':
++			verbose = true;
++			break;
++		case 'h':
++		default:
++			fprintf(stderr, help_fmt, basename(argv[0]));
++			return opt != 'h';
++		}
++	}
++
++	printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d",
++	       (double)skel->rodata->cgrp_slice_ns / 1000000.0,
++	       (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
++	       dump_cgrps);
++
++	SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
++	link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg);
++
++	while (!exit_req && !UEI_EXITED(skel, uei)) {
++		__u64 acc_stats[FCG_NR_STATS];
++		__u64 stats[FCG_NR_STATS];
++		float cpu_util;
++		int i;
++
++		cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle);
++
++		fcg_read_stats(skel, acc_stats);
++		for (i = 0; i < FCG_NR_STATS; i++)
++			stats[i] = acc_stats[i] - last_stats[i];
++
++		memcpy(last_stats, acc_stats, sizeof(acc_stats));
++
++		printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n",
++		       seq++, cpu_util * 100.0, skel->data->hweight_gen);
++		printf("       act:%6llu  deact:%6llu global:%6llu local:%6llu\n",
++		       stats[FCG_STAT_ACT],
++		       stats[FCG_STAT_DEACT],
++		       stats[FCG_STAT_GLOBAL],
++		       stats[FCG_STAT_LOCAL]);
++		printf("HWT  cache:%6llu update:%6llu   skip:%6llu  race:%6llu\n",
++		       stats[FCG_STAT_HWT_CACHE],
++		       stats[FCG_STAT_HWT_UPDATES],
++		       stats[FCG_STAT_HWT_SKIP],
++		       stats[FCG_STAT_HWT_RACE]);
++		printf("ENQ   skip:%6llu   race:%6llu\n",
++		       stats[FCG_STAT_ENQ_SKIP],
++		       stats[FCG_STAT_ENQ_RACE]);
++		printf("CNS   keep:%6llu expire:%6llu  empty:%6llu  gone:%6llu\n",
++		       stats[FCG_STAT_CNS_KEEP],
++		       stats[FCG_STAT_CNS_EXPIRE],
++		       stats[FCG_STAT_CNS_EMPTY],
++		       stats[FCG_STAT_CNS_GONE]);
++		printf("PNC   next:%6llu  empty:%6llu nocgrp:%6llu  gone:%6llu race:%6llu fail:%6llu\n",
++		       stats[FCG_STAT_PNC_NEXT],
++		       stats[FCG_STAT_PNC_EMPTY],
++		       stats[FCG_STAT_PNC_NO_CGRP],
++		       stats[FCG_STAT_PNC_GONE],
++		       stats[FCG_STAT_PNC_RACE],
++		       stats[FCG_STAT_PNC_FAIL]);
++		printf("BAD remove:%6llu\n",
++		       acc_stats[FCG_STAT_BAD_REMOVAL]);
++		fflush(stdout);
++
++		nanosleep(&intv_ts, NULL);
++	}
++
++	bpf_link__destroy(link);
++	ecode = UEI_REPORT(skel, uei);
++	scx_flatcg__destroy(skel);
++
++	if (UEI_ECODE_RESTART(ecode))
++		goto restart;
++	return 0;
++}
+diff --git a/tools/sched_ext/scx_flatcg.h b/tools/sched_ext/scx_flatcg.h
+new file mode 100644
+index 000000000000..6f2ea50acb1c
+--- /dev/null
++++ b/tools/sched_ext/scx_flatcg.h
+@@ -0,0 +1,51 @@
++#ifndef __SCX_EXAMPLE_FLATCG_H
++#define __SCX_EXAMPLE_FLATCG_H
++
++enum {
++	FCG_HWEIGHT_ONE		= 1LLU << 16,
++};
++
++enum fcg_stat_idx {
++	FCG_STAT_ACT,
++	FCG_STAT_DEACT,
++	FCG_STAT_LOCAL,
++	FCG_STAT_GLOBAL,
++
++	FCG_STAT_HWT_UPDATES,
++	FCG_STAT_HWT_CACHE,
++	FCG_STAT_HWT_SKIP,
++	FCG_STAT_HWT_RACE,
++
++	FCG_STAT_ENQ_SKIP,
++	FCG_STAT_ENQ_RACE,
++
++	FCG_STAT_CNS_KEEP,
++	FCG_STAT_CNS_EXPIRE,
++	FCG_STAT_CNS_EMPTY,
++	FCG_STAT_CNS_GONE,
++
++	FCG_STAT_PNC_NO_CGRP,
++	FCG_STAT_PNC_NEXT,
++	FCG_STAT_PNC_EMPTY,
++	FCG_STAT_PNC_GONE,
++	FCG_STAT_PNC_RACE,
++	FCG_STAT_PNC_FAIL,
++
++	FCG_STAT_BAD_REMOVAL,
++
++	FCG_NR_STATS,
++};
++
++struct fcg_cgrp_ctx {
++	u32			nr_active;
++	u32			nr_runnable;
++	u32			queued;
++	u32			weight;
++	u32			hweight;
++	u64			child_weight_sum;
++	u64			hweight_gen;
++	s64			cvtime_delta;
++	u64			tvtime_now;
++};
++
++#endif /* __SCX_EXAMPLE_FLATCG_H */
+diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
+new file mode 100644
+index 000000000000..5b39bee9eb23
+--- /dev/null
++++ b/tools/sched_ext/scx_qmap.bpf.c
+@@ -0,0 +1,813 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A simple five-level FIFO queue scheduler.
++ *
++ * There are five FIFOs implemented using BPF_MAP_TYPE_QUEUE. A task gets
++ * assigned to one depending on its compound weight. Each CPU round robins
++ * through the FIFOs and dispatches more from FIFOs with higher indices - 1 from
++ * queue0, 2 from queue1, 4 from queue2 and so on.
++ *
++ * This scheduler demonstrates:
++ *
++ * - BPF-side queueing using PIDs.
++ * - Sleepable per-task storage allocation using ops.prep_enable().
++ * - Using ops.cpu_release() to handle a higher priority scheduling class taking
++ *   the CPU away.
++ * - Core-sched support.
++ *
++ * This scheduler is primarily for demonstration and testing of sched_ext
++ * features and unlikely to be useful for actual workloads.
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#include <scx/common.bpf.h>
++
++enum consts {
++	ONE_SEC_IN_NS		= 1000000000,
++	SHARED_DSQ		= 0,
++	HIGHPRI_DSQ		= 1,
++	HIGHPRI_WEIGHT		= 8668,		/* this is what -20 maps to */
++};
++
++char _license[] SEC("license") = "GPL";
++
++const volatile u64 slice_ns = SCX_SLICE_DFL;
++const volatile u32 stall_user_nth;
++const volatile u32 stall_kernel_nth;
++const volatile u32 dsp_inf_loop_after;
++const volatile u32 dsp_batch;
++const volatile bool highpri_boosting;
++const volatile bool print_shared_dsq;
++const volatile s32 disallow_tgid;
++const volatile bool suppress_dump;
++
++u64 nr_highpri_queued;
++u32 test_error_cnt;
++
++UEI_DEFINE(uei);
++
++struct qmap {
++	__uint(type, BPF_MAP_TYPE_QUEUE);
++	__uint(max_entries, 4096);
++	__type(value, u32);
++} queue0 SEC(".maps"),
++  queue1 SEC(".maps"),
++  queue2 SEC(".maps"),
++  queue3 SEC(".maps"),
++  queue4 SEC(".maps");
++
++struct {
++	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
++	__uint(max_entries, 5);
++	__type(key, int);
++	__array(values, struct qmap);
++} queue_arr SEC(".maps") = {
++	.values = {
++		[0] = &queue0,
++		[1] = &queue1,
++		[2] = &queue2,
++		[3] = &queue3,
++		[4] = &queue4,
++	},
++};
++
++/*
++ * If enabled, CPU performance target is set according to the queue index
++ * according to the following table.
++ */
++static const u32 qidx_to_cpuperf_target[] = {
++	[0] = SCX_CPUPERF_ONE * 0 / 4,
++	[1] = SCX_CPUPERF_ONE * 1 / 4,
++	[2] = SCX_CPUPERF_ONE * 2 / 4,
++	[3] = SCX_CPUPERF_ONE * 3 / 4,
++	[4] = SCX_CPUPERF_ONE * 4 / 4,
++};
++
++/*
++ * Per-queue sequence numbers to implement core-sched ordering.
++ *
++ * Tail seq is assigned to each queued task and incremented. Head seq tracks the
++ * sequence number of the latest dispatched task. The distance between the a
++ * task's seq and the associated queue's head seq is called the queue distance
++ * and used when comparing two tasks for ordering. See qmap_core_sched_before().
++ */
++static u64 core_sched_head_seqs[5];
++static u64 core_sched_tail_seqs[5];
++
++/* Per-task scheduling context */
++struct task_ctx {
++	bool	force_local;	/* Dispatch directly to local_dsq */
++	bool	highpri;
++	u64	core_sched_seq;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
++	__uint(map_flags, BPF_F_NO_PREALLOC);
++	__type(key, int);
++	__type(value, struct task_ctx);
++} task_ctx_stor SEC(".maps");
++
++struct cpu_ctx {
++	u64	dsp_idx;	/* dispatch index */
++	u64	dsp_cnt;	/* remaining count */
++	u32	avg_weight;
++	u32	cpuperf_target;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++	__uint(max_entries, 1);
++	__type(key, u32);
++	__type(value, struct cpu_ctx);
++} cpu_ctx_stor SEC(".maps");
++
++/* Statistics */
++u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq;
++u64 nr_core_sched_execed;
++u64 nr_expedited_local, nr_expedited_remote, nr_expedited_lost, nr_expedited_from_timer;
++u32 cpuperf_min, cpuperf_avg, cpuperf_max;
++u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
++
++static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu)
++{
++	s32 cpu;
++
++	if (p->nr_cpus_allowed == 1 ||
++	    scx_bpf_test_and_clear_cpu_idle(prev_cpu))
++		return prev_cpu;
++
++	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++	if (cpu >= 0)
++		return cpu;
++
++	return -1;
++}
++
++static struct task_ctx *lookup_task_ctx(struct task_struct *p)
++{
++	struct task_ctx *tctx;
++
++	if (!(tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) {
++		scx_bpf_error("task_ctx lookup failed");
++		return NULL;
++	}
++	return tctx;
++}
++
++s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	struct task_ctx *tctx;
++	s32 cpu;
++
++	if (!(tctx = lookup_task_ctx(p)))
++		return -ESRCH;
++
++	cpu = pick_direct_dispatch_cpu(p, prev_cpu);
++
++	if (cpu >= 0) {
++		tctx->force_local = true;
++		return cpu;
++	} else {
++		return prev_cpu;
++	}
++}
++
++static int weight_to_idx(u32 weight)
++{
++	/* Coarsely map the compound weight to a FIFO. */
++	if (weight <= 25)
++		return 0;
++	else if (weight <= 50)
++		return 1;
++	else if (weight < 200)
++		return 2;
++	else if (weight < 400)
++		return 3;
++	else
++		return 4;
++}
++
++void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	static u32 user_cnt, kernel_cnt;
++	struct task_ctx *tctx;
++	u32 pid = p->pid;
++	int idx = weight_to_idx(p->scx.weight);
++	void *ring;
++	s32 cpu;
++
++	if (p->flags & PF_KTHREAD) {
++		if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
++			return;
++	} else {
++		if (stall_user_nth && !(++user_cnt % stall_user_nth))
++			return;
++	}
++
++	if (test_error_cnt && !--test_error_cnt)
++		scx_bpf_error("test triggering error");
++
++	if (!(tctx = lookup_task_ctx(p)))
++		return;
++
++	/*
++	 * All enqueued tasks must have their core_sched_seq updated for correct
++	 * core-sched ordering, which is why %SCX_OPS_ENQ_LAST is specified in
++	 * qmap_ops.flags.
++	 */
++	tctx->core_sched_seq = core_sched_tail_seqs[idx]++;
++
++	/*
++	 * If qmap_select_cpu() is telling us to or this is the last runnable
++	 * task on the CPU, enqueue locally.
++	 */
++	if (tctx->force_local || (enq_flags & SCX_ENQ_LAST)) {
++		tctx->force_local = false;
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags);
++		return;
++	}
++
++	/* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */
++	if (!(enq_flags & SCX_ENQ_WAKEUP) &&
++	    (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
++		__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
++		return;
++	}
++
++	/*
++	 * If the task was re-enqueued due to the CPU being preempted by a
++	 * higher priority scheduling class, just re-enqueue the task directly
++	 * on the global DSQ. As we want another CPU to pick it up, find and
++	 * kick an idle CPU.
++	 */
++	if (enq_flags & SCX_ENQ_REENQ) {
++		s32 cpu;
++
++		scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags);
++		cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++		if (cpu >= 0)
++			scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
++		return;
++	}
++
++	ring = bpf_map_lookup_elem(&queue_arr, &idx);
++	if (!ring) {
++		scx_bpf_error("failed to find ring %d", idx);
++		return;
++	}
++
++	/* Queue on the selected FIFO. If the FIFO overflows, punt to global. */
++	if (bpf_map_push_elem(ring, &pid, 0)) {
++		scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags);
++		return;
++	}
++
++	if (highpri_boosting && p->scx.weight >= HIGHPRI_WEIGHT) {
++		tctx->highpri = true;
++		__sync_fetch_and_add(&nr_highpri_queued, 1);
++	}
++	__sync_fetch_and_add(&nr_enqueued, 1);
++}
++
++/*
++ * The BPF queue map doesn't support removal and sched_ext can handle spurious
++ * dispatches. qmap_dequeue() is only used to collect statistics.
++ */
++void BPF_STRUCT_OPS(qmap_dequeue, struct task_struct *p, u64 deq_flags)
++{
++	__sync_fetch_and_add(&nr_dequeued, 1);
++	if (deq_flags & SCX_DEQ_CORE_SCHED_EXEC)
++		__sync_fetch_and_add(&nr_core_sched_execed, 1);
++}
++
++static void update_core_sched_head_seq(struct task_struct *p)
++{
++	int idx = weight_to_idx(p->scx.weight);
++	struct task_ctx *tctx;
++
++	if ((tctx = lookup_task_ctx(p)))
++		core_sched_head_seqs[idx] = tctx->core_sched_seq;
++}
++
++/*
++ * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly
++ * selective priority boosting mechanism by scanning SHARED_DSQ looking for
++ * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This
++ * makes minor difference only when dsp_batch is larger than 1.
++ *
++ * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and
++ * non-rq-lock holding BPF programs. As demonstration, this function is called
++ * from qmap_dispatch() and monitor_timerfn().
++ */
++static bool dispatch_highpri(bool from_timer)
++{
++	struct task_struct *p;
++	s32 this_cpu = bpf_get_smp_processor_id();
++
++	/* scan SHARED_DSQ and move highpri tasks to HIGHPRI_DSQ */
++	bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) {
++		static u64 highpri_seq;
++		struct task_ctx *tctx;
++
++		if (!(tctx = lookup_task_ctx(p)))
++			return false;
++
++		if (tctx->highpri) {
++			/* exercise the set_*() and vtime interface too */
++			__COMPAT_scx_bpf_dispatch_from_dsq_set_slice(
++				BPF_FOR_EACH_ITER, slice_ns * 2);
++			__COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(
++				BPF_FOR_EACH_ITER, highpri_seq++);
++			__COMPAT_scx_bpf_dispatch_vtime_from_dsq(
++				BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
++		}
++	}
++
++	/*
++	 * Scan HIGHPRI_DSQ and dispatch until a task that can run on this CPU
++	 * is found.
++	 */
++	bpf_for_each(scx_dsq, p, HIGHPRI_DSQ, 0) {
++		bool dispatched = false;
++		s32 cpu;
++
++		if (bpf_cpumask_test_cpu(this_cpu, p->cpus_ptr))
++			cpu = this_cpu;
++		else
++			cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0);
++
++		if (__COMPAT_scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p,
++						       SCX_DSQ_LOCAL_ON | cpu,
++						       SCX_ENQ_PREEMPT)) {
++			if (cpu == this_cpu) {
++				dispatched = true;
++				__sync_fetch_and_add(&nr_expedited_local, 1);
++			} else {
++				__sync_fetch_and_add(&nr_expedited_remote, 1);
++			}
++			if (from_timer)
++				__sync_fetch_and_add(&nr_expedited_from_timer, 1);
++		} else {
++			__sync_fetch_and_add(&nr_expedited_lost, 1);
++		}
++
++		if (dispatched)
++			return true;
++	}
++
++	return false;
++}
++
++void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
++{
++	struct task_struct *p;
++	struct cpu_ctx *cpuc;
++	u32 zero = 0, batch = dsp_batch ?: 1;
++	void *fifo;
++	s32 i, pid;
++
++	if (dispatch_highpri(false))
++		return;
++
++	if (!nr_highpri_queued && scx_bpf_consume(SHARED_DSQ))
++		return;
++
++	if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) {
++		/*
++		 * PID 2 should be kthreadd which should mostly be idle and off
++		 * the scheduler. Let's keep dispatching it to force the kernel
++		 * to call this function over and over again.
++		 */
++		p = bpf_task_from_pid(2);
++		if (p) {
++			scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
++			bpf_task_release(p);
++			return;
++		}
++	}
++
++	if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
++		scx_bpf_error("failed to look up cpu_ctx");
++		return;
++	}
++
++	for (i = 0; i < 5; i++) {
++		/* Advance the dispatch cursor and pick the fifo. */
++		if (!cpuc->dsp_cnt) {
++			cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5;
++			cpuc->dsp_cnt = 1 << cpuc->dsp_idx;
++		}
++
++		fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx);
++		if (!fifo) {
++			scx_bpf_error("failed to find ring %llu", cpuc->dsp_idx);
++			return;
++		}
++
++		/* Dispatch or advance. */
++		bpf_repeat(BPF_MAX_LOOPS) {
++			struct task_ctx *tctx;
++
++			if (bpf_map_pop_elem(fifo, &pid))
++				break;
++
++			p = bpf_task_from_pid(pid);
++			if (!p)
++				continue;
++
++			if (!(tctx = lookup_task_ctx(p))) {
++				bpf_task_release(p);
++				return;
++			}
++
++			if (tctx->highpri)
++				__sync_fetch_and_sub(&nr_highpri_queued, 1);
++
++			update_core_sched_head_seq(p);
++			__sync_fetch_and_add(&nr_dispatched, 1);
++
++			scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0);
++			bpf_task_release(p);
++
++			batch--;
++			cpuc->dsp_cnt--;
++			if (!batch || !scx_bpf_dispatch_nr_slots()) {
++				if (dispatch_highpri(false))
++					return;
++				scx_bpf_consume(SHARED_DSQ);
++				return;
++			}
++			if (!cpuc->dsp_cnt)
++				break;
++		}
++
++		cpuc->dsp_cnt = 0;
++	}
++}
++
++void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
++{
++	struct cpu_ctx *cpuc;
++	u32 zero = 0;
++	int idx;
++
++	if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
++		scx_bpf_error("failed to look up cpu_ctx");
++		return;
++	}
++
++	/*
++	 * Use the running avg of weights to select the target cpuperf level.
++	 * This is a demonstration of the cpuperf feature rather than a
++	 * practical strategy to regulate CPU frequency.
++	 */
++	cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
++	idx = weight_to_idx(cpuc->avg_weight);
++	cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];
++
++	scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
++}
++
++/*
++ * The distance from the head of the queue scaled by the weight of the queue.
++ * The lower the number, the older the task and the higher the priority.
++ */
++static s64 task_qdist(struct task_struct *p)
++{
++	int idx = weight_to_idx(p->scx.weight);
++	struct task_ctx *tctx;
++	s64 qdist;
++
++	tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
++	if (!tctx) {
++		scx_bpf_error("task_ctx lookup failed");
++		return 0;
++	}
++
++	qdist = tctx->core_sched_seq - core_sched_head_seqs[idx];
++
++	/*
++	 * As queue index increments, the priority doubles. The queue w/ index 3
++	 * is dispatched twice more frequently than 2. Reflect the difference by
++	 * scaling qdists accordingly. Note that the shift amount needs to be
++	 * flipped depending on the sign to avoid flipping priority direction.
++	 */
++	if (qdist >= 0)
++		return qdist << (4 - idx);
++	else
++		return qdist << idx;
++}
++
++/*
++ * This is called to determine the task ordering when core-sched is picking
++ * tasks to execute on SMT siblings and should encode about the same ordering as
++ * the regular scheduling path. Use the priority-scaled distances from the head
++ * of the queues to compare the two tasks which should be consistent with the
++ * dispatch path behavior.
++ */
++bool BPF_STRUCT_OPS(qmap_core_sched_before,
++		    struct task_struct *a, struct task_struct *b)
++{
++	return task_qdist(a) > task_qdist(b);
++}
++
++void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
++{
++	u32 cnt;
++
++	/*
++	 * Called when @cpu is taken by a higher priority scheduling class. This
++	 * makes @cpu no longer available for executing sched_ext tasks. As we
++	 * don't want the tasks in @cpu's local dsq to sit there until @cpu
++	 * becomes available again, re-enqueue them into the global dsq. See
++	 * %SCX_ENQ_REENQ handling in qmap_enqueue().
++	 */
++	cnt = scx_bpf_reenqueue_local();
++	if (cnt)
++		__sync_fetch_and_add(&nr_reenqueued, cnt);
++}
++
++s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
++		   struct scx_init_task_args *args)
++{
++	if (p->tgid == disallow_tgid)
++		p->scx.disallow = true;
++
++	/*
++	 * @p is new. Let's ensure that its task_ctx is available. We can sleep
++	 * in this function and the following will automatically use GFP_KERNEL.
++	 */
++	if (bpf_task_storage_get(&task_ctx_stor, p, 0,
++				 BPF_LOCAL_STORAGE_GET_F_CREATE))
++		return 0;
++	else
++		return -ENOMEM;
++}
++
++void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
++{
++	s32 i, pid;
++
++	if (suppress_dump)
++		return;
++
++	bpf_for(i, 0, 5) {
++		void *fifo;
++
++		if (!(fifo = bpf_map_lookup_elem(&queue_arr, &i)))
++			return;
++
++		scx_bpf_dump("QMAP FIFO[%d]:", i);
++		bpf_repeat(4096) {
++			if (bpf_map_pop_elem(fifo, &pid))
++				break;
++			scx_bpf_dump(" %d", pid);
++		}
++		scx_bpf_dump("\n");
++	}
++}
++
++void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_dump_ctx *dctx, s32 cpu, bool idle)
++{
++	u32 zero = 0;
++	struct cpu_ctx *cpuc;
++
++	if (suppress_dump || idle)
++		return;
++	if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, cpu)))
++		return;
++
++	scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u",
++		     cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight,
++		     cpuc->cpuperf_target);
++}
++
++void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p)
++{
++	struct task_ctx *taskc;
++
++	if (suppress_dump)
++		return;
++	if (!(taskc = bpf_task_storage_get(&task_ctx_stor, p, 0, 0)))
++		return;
++
++	scx_bpf_dump("QMAP: force_local=%d core_sched_seq=%llu",
++		     taskc->force_local, taskc->core_sched_seq);
++}
++
++/*
++ * Print out the online and possible CPU map using bpf_printk() as a
++ * demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
++ */
++static void print_cpus(void)
++{
++	const struct cpumask *possible, *online;
++	s32 cpu;
++	char buf[128] = "", *p;
++	int idx;
++
++	possible = scx_bpf_get_possible_cpumask();
++	online = scx_bpf_get_online_cpumask();
++
++	idx = 0;
++	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
++		if (!(p = MEMBER_VPTR(buf, [idx++])))
++			break;
++		if (bpf_cpumask_test_cpu(cpu, online))
++			*p++ = 'O';
++		else if (bpf_cpumask_test_cpu(cpu, possible))
++			*p++ = 'X';
++		else
++			*p++ = ' ';
++
++		if ((cpu & 7) == 7) {
++			if (!(p = MEMBER_VPTR(buf, [idx++])))
++				break;
++			*p++ = '|';
++		}
++	}
++	buf[sizeof(buf) - 1] = '\0';
++
++	scx_bpf_put_cpumask(online);
++	scx_bpf_put_cpumask(possible);
++
++	bpf_printk("CPUS: |%s", buf);
++}
++
++void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
++{
++	bpf_printk("CPU %d coming online", cpu);
++	/* @cpu is already online at this point */
++	print_cpus();
++}
++
++void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
++{
++	bpf_printk("CPU %d going offline", cpu);
++	/* @cpu is still online at this point */
++	print_cpus();
++}
++
++struct monitor_timer {
++	struct bpf_timer timer;
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_ARRAY);
++	__uint(max_entries, 1);
++	__type(key, u32);
++	__type(value, struct monitor_timer);
++} monitor_timer SEC(".maps");
++
++/*
++ * Print out the min, avg and max performance levels of CPUs every second to
++ * demonstrate the cpuperf interface.
++ */
++static void monitor_cpuperf(void)
++{
++	u32 zero = 0, nr_cpu_ids;
++	u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
++	u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
++	const struct cpumask *online;
++	int i, nr_online_cpus = 0;
++
++	nr_cpu_ids = scx_bpf_nr_cpu_ids();
++	online = scx_bpf_get_online_cpumask();
++
++	bpf_for(i, 0, nr_cpu_ids) {
++		struct cpu_ctx *cpuc;
++		u32 cap, cur;
++
++		if (!bpf_cpumask_test_cpu(i, online))
++			continue;
++		nr_online_cpus++;
++
++		/* collect the capacity and current cpuperf */
++		cap = scx_bpf_cpuperf_cap(i);
++		cur = scx_bpf_cpuperf_cur(i);
++
++		cur_min = cur < cur_min ? cur : cur_min;
++		cur_max = cur > cur_max ? cur : cur_max;
++
++		/*
++		 * $cur is relative to $cap. Scale it down accordingly so that
++		 * it's in the same scale as other CPUs and $cur_sum/$cap_sum
++		 * makes sense.
++		 */
++		cur_sum += cur * cap / SCX_CPUPERF_ONE;
++		cap_sum += cap;
++
++		if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
++			scx_bpf_error("failed to look up cpu_ctx");
++			goto out;
++		}
++
++		/* collect target */
++		cur = cpuc->cpuperf_target;
++		target_sum += cur;
++		target_min = cur < target_min ? cur : target_min;
++		target_max = cur > target_max ? cur : target_max;
++	}
++
++	cpuperf_min = cur_min;
++	cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
++	cpuperf_max = cur_max;
++
++	cpuperf_target_min = target_min;
++	cpuperf_target_avg = target_sum / nr_online_cpus;
++	cpuperf_target_max = target_max;
++out:
++	scx_bpf_put_cpumask(online);
++}
++
++/*
++ * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of
++ * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to
++ * see meaningful dumps in the trace pipe.
++ */
++static void dump_shared_dsq(void)
++{
++	struct task_struct *p;
++	s32 nr;
++
++	if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ)))
++		return;
++
++	bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr);
++
++	bpf_rcu_read_lock();
++	bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV)
++		bpf_printk("%s[%d]", p->comm, p->pid);
++	bpf_rcu_read_unlock();
++}
++
++static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
++{
++	bpf_rcu_read_lock();
++	dispatch_highpri(true);
++	bpf_rcu_read_unlock();
++
++	monitor_cpuperf();
++
++	if (print_shared_dsq)
++		dump_shared_dsq();
++
++	bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
++	return 0;
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
++{
++	u32 key = 0;
++	struct bpf_timer *timer;
++	s32 ret;
++
++	print_cpus();
++
++	ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
++	if (ret)
++		return ret;
++
++	ret = scx_bpf_create_dsq(HIGHPRI_DSQ, -1);
++	if (ret)
++		return ret;
++
++	timer = bpf_map_lookup_elem(&monitor_timer, &key);
++	if (!timer)
++		return -ESRCH;
++
++	bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC);
++	bpf_timer_set_callback(timer, monitor_timerfn);
++
++	return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
++}
++
++void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SCX_OPS_DEFINE(qmap_ops,
++	       .select_cpu		= (void *)qmap_select_cpu,
++	       .enqueue			= (void *)qmap_enqueue,
++	       .dequeue			= (void *)qmap_dequeue,
++	       .dispatch		= (void *)qmap_dispatch,
++	       .tick			= (void *)qmap_tick,
++	       .core_sched_before	= (void *)qmap_core_sched_before,
++	       .cpu_release		= (void *)qmap_cpu_release,
++	       .init_task		= (void *)qmap_init_task,
++	       .dump			= (void *)qmap_dump,
++	       .dump_cpu		= (void *)qmap_dump_cpu,
++	       .dump_task		= (void *)qmap_dump_task,
++	       .cpu_online		= (void *)qmap_cpu_online,
++	       .cpu_offline		= (void *)qmap_cpu_offline,
++	       .init			= (void *)qmap_init,
++	       .exit			= (void *)qmap_exit,
++	       .flags			= SCX_OPS_ENQ_LAST,
++	       .timeout_ms		= 5000U,
++	       .name			= "qmap");
+diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
+new file mode 100644
+index 000000000000..ac45a02b4055
+--- /dev/null
++++ b/tools/sched_ext/scx_qmap.c
+@@ -0,0 +1,153 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <signal.h>
++#include <libgen.h>
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include "scx_qmap.bpf.skel.h"
++
++const char help_fmt[] =
++"A simple five-level FIFO queue sched_ext scheduler.\n"
++"\n"
++"See the top-level comment in .bpf.c for more details.\n"
++"\n"
++"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
++"       [-P] [-d PID] [-D LEN] [-p] [-v]\n"
++"\n"
++"  -s SLICE_US   Override slice duration\n"
++"  -e COUNT      Trigger scx_bpf_error() after COUNT enqueues\n"
++"  -t COUNT      Stall every COUNT'th user thread\n"
++"  -T COUNT      Stall every COUNT'th kernel thread\n"
++"  -l COUNT      Trigger dispatch infinite looping after COUNT dispatches\n"
++"  -b COUNT      Dispatch upto COUNT tasks together\n"
++"  -P            Print out DSQ content to trace_pipe every second, use with -b\n"
++"  -H            Boost nice -20 tasks in SHARED_DSQ, use with -b\n"
++"  -d PID        Disallow a process from switching into SCHED_EXT (-1 for self)\n"
++"  -D LEN        Set scx_exit_info.dump buffer length\n"
++"  -S            Suppress qmap-specific debug dump\n"
++"  -p            Switch only tasks on SCHED_EXT policy instead of all\n"
++"  -v            Print libbpf debug messages\n"
++"  -h            Display this help and exit\n";
++
++static bool verbose;
++static volatile int exit_req;
++
++static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
++{
++	if (level == LIBBPF_DEBUG && !verbose)
++		return 0;
++	return vfprintf(stderr, format, args);
++}
++
++static void sigint_handler(int dummy)
++{
++	exit_req = 1;
++}
++
++int main(int argc, char **argv)
++{
++	struct scx_qmap *skel;
++	struct bpf_link *link;
++	int opt;
++
++	libbpf_set_print(libbpf_print_fn);
++	signal(SIGINT, sigint_handler);
++	signal(SIGTERM, sigint_handler);
++
++	skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
++
++	while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
++		switch (opt) {
++		case 's':
++			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
++			break;
++		case 'e':
++			skel->bss->test_error_cnt = strtoul(optarg, NULL, 0);
++			break;
++		case 't':
++			skel->rodata->stall_user_nth = strtoul(optarg, NULL, 0);
++			break;
++		case 'T':
++			skel->rodata->stall_kernel_nth = strtoul(optarg, NULL, 0);
++			break;
++		case 'l':
++			skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0);
++			break;
++		case 'b':
++			skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
++			break;
++		case 'P':
++			skel->rodata->print_shared_dsq = true;
++			break;
++		case 'H':
++			skel->rodata->highpri_boosting = true;
++			break;
++		case 'd':
++			skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
++			if (skel->rodata->disallow_tgid < 0)
++				skel->rodata->disallow_tgid = getpid();
++			break;
++		case 'D':
++			skel->struct_ops.qmap_ops->exit_dump_len = strtoul(optarg, NULL, 0);
++			break;
++		case 'S':
++			skel->rodata->suppress_dump = true;
++			break;
++		case 'p':
++			skel->struct_ops.qmap_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
++			break;
++		case 'v':
++			verbose = true;
++			break;
++		default:
++			fprintf(stderr, help_fmt, basename(argv[0]));
++			return opt != 'h';
++		}
++	}
++
++	SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
++	link = SCX_OPS_ATTACH(skel, qmap_ops, scx_qmap);
++
++	while (!exit_req && !UEI_EXITED(skel, uei)) {
++		long nr_enqueued = skel->bss->nr_enqueued;
++		long nr_dispatched = skel->bss->nr_dispatched;
++
++		printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n",
++		       nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
++		       skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
++		       skel->bss->nr_core_sched_execed,
++		       skel->bss->nr_ddsp_from_enq);
++		printf("         exp_local=%"PRIu64" exp_remote=%"PRIu64" exp_timer=%"PRIu64" exp_lost=%"PRIu64"\n",
++		       skel->bss->nr_expedited_local,
++		       skel->bss->nr_expedited_remote,
++		       skel->bss->nr_expedited_from_timer,
++		       skel->bss->nr_expedited_lost);
++		if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur"))
++			printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
++			       skel->bss->cpuperf_min,
++			       skel->bss->cpuperf_avg,
++			       skel->bss->cpuperf_max,
++			       skel->bss->cpuperf_target_min,
++			       skel->bss->cpuperf_target_avg,
++			       skel->bss->cpuperf_target_max);
++		fflush(stdout);
++		sleep(1);
++	}
++
++	bpf_link__destroy(link);
++	UEI_REPORT(skel, uei);
++	scx_qmap__destroy(skel);
++	/*
++	 * scx_qmap implements ops.cpu_on/offline() and doesn't need to restart
++	 * on CPU hotplug events.
++	 */
++	return 0;
++}
+diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
+new file mode 100644
+index 000000000000..8bc626ede1c4
+--- /dev/null
++++ b/tools/sched_ext/scx_show_state.py
+@@ -0,0 +1,40 @@
++#!/usr/bin/env drgn
++#
++# Copyright (C) 2024 Tejun Heo <tj@kernel.org>
++# Copyright (C) 2024 Meta Platforms, Inc. and affiliates.
++
++desc = """
++This is a drgn script to show the current sched_ext state.
++For more info on drgn, visit https://github.com/osandov/drgn.
++"""
++
++import drgn
++import sys
++
++def err(s):
++    print(s, file=sys.stderr, flush=True)
++    sys.exit(1)
++
++def read_int(name):
++    return int(prog[name].value_())
++
++def read_atomic(name):
++    return prog[name].counter.value_()
++
++def read_static_key(name):
++    return prog[name].key.enabled.counter.value_()
++
++def ops_state_str(state):
++    return prog['scx_ops_enable_state_str'][state].string_().decode()
++
++ops = prog['scx_ops']
++enable_state = read_atomic("scx_ops_enable_state_var")
++
++print(f'ops           : {ops.name.string_().decode()}')
++print(f'enabled       : {read_static_key("__scx_ops_enabled")}')
++print(f'switching_all : {read_int("scx_switching_all")}')
++print(f'switched_all  : {read_static_key("__scx_switched_all")}')
++print(f'enable_state  : {ops_state_str(enable_state)} ({enable_state})')
++print(f'bypass_depth  : {read_atomic("scx_ops_bypass_depth")}')
++print(f'nr_rejected   : {read_atomic("scx_nr_rejected")}')
++print(f'enable_seq    : {read_atomic("scx_enable_seq")}')
+diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c
+new file mode 100644
+index 000000000000..ed7e8d535fc5
+--- /dev/null
++++ b/tools/sched_ext/scx_simple.bpf.c
+@@ -0,0 +1,156 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A simple scheduler.
++ *
++ * By default, it operates as a simple global weighted vtime scheduler and can
++ * be switched to FIFO scheduling. It also demonstrates the following niceties.
++ *
++ * - Statistics tracking how many tasks are queued to local and global dsq's.
++ * - Termination notification for userspace.
++ *
++ * While very simple, this scheduler should work reasonably well on CPUs with a
++ * uniform L3 cache topology. While preemption is not implemented, the fact that
++ * the scheduling queue is shared across all CPUs means that whatever is at the
++ * front of the queue is likely to be executed fairly quickly given enough
++ * number of CPUs. The FIFO scheduling mode may be beneficial to some workloads
++ * but comes with the usual problems with FIFO scheduling where saturating
++ * threads can easily drown out interactive ones.
++ *
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++const volatile bool fifo_sched;
++
++static u64 vtime_now;
++UEI_DEFINE(uei);
++
++/*
++ * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues
++ * (meaning, cannot be dispatched to with scx_bpf_dispatch_vtime()). We
++ * therefore create a separate DSQ with ID 0 that we dispatch to and consume
++ * from. If scx_simple only supported global FIFO scheduling, then we could
++ * just use SCX_DSQ_GLOBAL.
++ */
++#define SHARED_DSQ 0
++
++struct {
++	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++	__uint(key_size, sizeof(u32));
++	__uint(value_size, sizeof(u64));
++	__uint(max_entries, 2);			/* [local, global] */
++} stats SEC(".maps");
++
++static void stat_inc(u32 idx)
++{
++	u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
++	if (cnt_p)
++		(*cnt_p)++;
++}
++
++static inline bool vtime_before(u64 a, u64 b)
++{
++	return (s64)(a - b) < 0;
++}
++
++s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
++{
++	bool is_idle = false;
++	s32 cpu;
++
++	cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
++	if (is_idle) {
++		stat_inc(0);	/* count local queueing */
++		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
++	}
++
++	return cpu;
++}
++
++void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	stat_inc(1);	/* count global queueing */
++
++	if (fifo_sched) {
++		scx_bpf_dispatch(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);
++	} else {
++		u64 vtime = p->scx.dsq_vtime;
++
++		/*
++		 * Limit the amount of budget that an idling task can accumulate
++		 * to one slice.
++		 */
++		if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
++			vtime = vtime_now - SCX_SLICE_DFL;
++
++		scx_bpf_dispatch_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
++				       enq_flags);
++	}
++}
++
++void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev)
++{
++	scx_bpf_consume(SHARED_DSQ);
++}
++
++void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
++{
++	if (fifo_sched)
++		return;
++
++	/*
++	 * Global vtime always progresses forward as tasks start executing. The
++	 * test and update can be performed concurrently from multiple CPUs and
++	 * thus racy. Any error should be contained and temporary. Let's just
++	 * live with it.
++	 */
++	if (vtime_before(vtime_now, p->scx.dsq_vtime))
++		vtime_now = p->scx.dsq_vtime;
++}
++
++void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable)
++{
++	if (fifo_sched)
++		return;
++
++	/*
++	 * Scale the execution time by the inverse of the weight and charge.
++	 *
++	 * Note that the default yield implementation yields by setting
++	 * @p->scx.slice to zero and the following would treat the yielding task
++	 * as if it has consumed all its slice. If this penalizes yielding tasks
++	 * too much, determine the execution time by taking explicit timestamps
++	 * instead of depending on @p->scx.slice.
++	 */
++	p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
++}
++
++void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
++{
++	p->scx.dsq_vtime = vtime_now;
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
++{
++	return scx_bpf_create_dsq(SHARED_DSQ, -1);
++}
++
++void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SCX_OPS_DEFINE(simple_ops,
++	       .select_cpu		= (void *)simple_select_cpu,
++	       .enqueue			= (void *)simple_enqueue,
++	       .dispatch		= (void *)simple_dispatch,
++	       .running			= (void *)simple_running,
++	       .stopping		= (void *)simple_stopping,
++	       .enable			= (void *)simple_enable,
++	       .init			= (void *)simple_init,
++	       .exit			= (void *)simple_exit,
++	       .name			= "simple");
+diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c
+new file mode 100644
+index 000000000000..76d83199545c
+--- /dev/null
++++ b/tools/sched_ext/scx_simple.c
+@@ -0,0 +1,107 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
++ */
++#include <stdio.h>
++#include <unistd.h>
++#include <signal.h>
++#include <libgen.h>
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include "scx_simple.bpf.skel.h"
++
++const char help_fmt[] =
++"A simple sched_ext scheduler.\n"
++"\n"
++"See the top-level comment in .bpf.c for more details.\n"
++"\n"
++"Usage: %s [-f] [-v]\n"
++"\n"
++"  -f            Use FIFO scheduling instead of weighted vtime scheduling\n"
++"  -v            Print libbpf debug messages\n"
++"  -h            Display this help and exit\n";
++
++static bool verbose;
++static volatile int exit_req;
++
++static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
++{
++	if (level == LIBBPF_DEBUG && !verbose)
++		return 0;
++	return vfprintf(stderr, format, args);
++}
++
++static void sigint_handler(int simple)
++{
++	exit_req = 1;
++}
++
++static void read_stats(struct scx_simple *skel, __u64 *stats)
++{
++	int nr_cpus = libbpf_num_possible_cpus();
++	__u64 cnts[2][nr_cpus];
++	__u32 idx;
++
++	memset(stats, 0, sizeof(stats[0]) * 2);
++
++	for (idx = 0; idx < 2; idx++) {
++		int ret, cpu;
++
++		ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
++					  &idx, cnts[idx]);
++		if (ret < 0)
++			continue;
++		for (cpu = 0; cpu < nr_cpus; cpu++)
++			stats[idx] += cnts[idx][cpu];
++	}
++}
++
++int main(int argc, char **argv)
++{
++	struct scx_simple *skel;
++	struct bpf_link *link;
++	__u32 opt;
++	__u64 ecode;
++
++	libbpf_set_print(libbpf_print_fn);
++	signal(SIGINT, sigint_handler);
++	signal(SIGTERM, sigint_handler);
++restart:
++	skel = SCX_OPS_OPEN(simple_ops, scx_simple);
++
++	while ((opt = getopt(argc, argv, "fvh")) != -1) {
++		switch (opt) {
++		case 'f':
++			skel->rodata->fifo_sched = true;
++			break;
++		case 'v':
++			verbose = true;
++			break;
++		default:
++			fprintf(stderr, help_fmt, basename(argv[0]));
++			return opt != 'h';
++		}
++	}
++
++	SCX_OPS_LOAD(skel, simple_ops, scx_simple, uei);
++	link = SCX_OPS_ATTACH(skel, simple_ops, scx_simple);
++
++	while (!exit_req && !UEI_EXITED(skel, uei)) {
++		__u64 stats[2];
++
++		read_stats(skel, stats);
++		printf("local=%llu global=%llu\n", stats[0], stats[1]);
++		fflush(stdout);
++		sleep(1);
++	}
++
++	bpf_link__destroy(link);
++	ecode = UEI_REPORT(skel, uei);
++	scx_simple__destroy(skel);
++
++	if (UEI_ECODE_RESTART(ecode))
++		goto restart;
++	return 0;
++}
+diff --git a/tools/testing/selftests/sched_ext/.gitignore b/tools/testing/selftests/sched_ext/.gitignore
+new file mode 100644
+index 000000000000..ae5491a114c0
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/.gitignore
+@@ -0,0 +1,6 @@
++*
++!*.c
++!*.h
++!Makefile
++!.gitignore
++!config
+diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
+new file mode 100644
+index 000000000000..0754a2c110a1
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/Makefile
+@@ -0,0 +1,218 @@
++# SPDX-License-Identifier: GPL-2.0
++# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
++include ../../../build/Build.include
++include ../../../scripts/Makefile.arch
++include ../../../scripts/Makefile.include
++include ../lib.mk
++
++ifneq ($(LLVM),)
++ifneq ($(filter %/,$(LLVM)),)
++LLVM_PREFIX := $(LLVM)
++else ifneq ($(filter -%,$(LLVM)),)
++LLVM_SUFFIX := $(LLVM)
++endif
++
++CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
++else
++CC := gcc
++endif # LLVM
++
++ifneq ($(CROSS_COMPILE),)
++$(error CROSS_COMPILE not supported for scx selftests)
++endif # CROSS_COMPILE
++
++CURDIR := $(abspath .)
++REPOROOT := $(abspath ../../../..)
++TOOLSDIR := $(REPOROOT)/tools
++LIBDIR := $(TOOLSDIR)/lib
++BPFDIR := $(LIBDIR)/bpf
++TOOLSINCDIR := $(TOOLSDIR)/include
++BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
++APIDIR := $(TOOLSINCDIR)/uapi
++GENDIR := $(REPOROOT)/include/generated
++GENHDR := $(GENDIR)/autoconf.h
++SCXTOOLSDIR := $(TOOLSDIR)/sched_ext
++SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include
++
++OUTPUT_DIR := $(CURDIR)/build
++OBJ_DIR := $(OUTPUT_DIR)/obj
++INCLUDE_DIR := $(OUTPUT_DIR)/include
++BPFOBJ_DIR := $(OBJ_DIR)/libbpf
++SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
++BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
++LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a
++DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool
++HOST_BUILD_DIR := $(OBJ_DIR)
++HOST_OUTPUT_DIR := $(OUTPUT_DIR)
++
++VMLINUX_BTF_PATHS ?= ../../../../vmlinux					\
++		     /sys/kernel/btf/vmlinux					\
++		     /boot/vmlinux-$(shell uname -r)
++VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
++ifeq ($(VMLINUX_BTF),)
++$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
++endif
++
++BPFTOOL ?= $(DEFAULT_BPFTOOL)
++
++ifneq ($(wildcard $(GENHDR)),)
++  GENFLAGS := -DHAVE_GENHDR
++endif
++
++CFLAGS += -g -O2 -rdynamic -pthread -Wall -Werror $(GENFLAGS)			\
++	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)				\
++	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(CURDIR)/include -I$(SCXTOOLSINCDIR)
++
++# Silence some warnings when compiled with clang
++ifneq ($(LLVM),)
++CFLAGS += -Wno-unused-command-line-argument
++endif
++
++LDFLAGS = -lelf -lz -lpthread -lzstd
++
++IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null |				\
++			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
++
++# Get Clang's default includes on this system, as opposed to those seen by
++# '-target bpf'. This fixes "missing" files on some architectures/distros,
++# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
++#
++# Use '-idirafter': Don't interfere with include mechanics except where the
++# build would have failed anyways.
++define get_sys_includes
++$(shell $(1) -v -E - </dev/null 2>&1 \
++	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
++$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
++endef
++
++BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH)					\
++	     $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)		\
++	     -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat			\
++	     -I$(INCLUDE_DIR) -I$(APIDIR) -I$(SCXTOOLSINCDIR)			\
++	     -I$(REPOROOT)/include						\
++	     $(call get_sys_includes,$(CLANG))					\
++	     -Wall -Wno-compare-distinct-pointer-types				\
++	     -Wno-incompatible-function-pointer-types				\
++	     -O2 -mcpu=v3
++
++# sort removes libbpf duplicates when not cross-building
++MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(OBJ_DIR)/libbpf				\
++	       $(OBJ_DIR)/bpftool $(OBJ_DIR)/resolve_btfids			\
++	       $(INCLUDE_DIR) $(SCXOBJ_DIR))
++
++$(MAKE_DIRS):
++	$(call msg,MKDIR,,$@)
++	$(Q)mkdir -p $@
++
++$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)			\
++	   $(APIDIR)/linux/bpf.h						\
++	   | $(OBJ_DIR)/libbpf
++	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/	\
++		    EXTRA_CFLAGS='-g -O0 -fPIC'					\
++		    DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
++
++$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
++		    $(LIBBPF_OUTPUT) | $(OBJ_DIR)/bpftool
++	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)				\
++		    ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD)		\
++		    EXTRA_CFLAGS='-g -O0'					\
++		    OUTPUT=$(OBJ_DIR)/bpftool/					\
++		    LIBBPF_OUTPUT=$(OBJ_DIR)/libbpf/				\
++		    LIBBPF_DESTDIR=$(OUTPUT_DIR)/				\
++		    prefix= DESTDIR=$(OUTPUT_DIR)/ install-bin
++
++$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
++ifeq ($(VMLINUX_H),)
++	$(call msg,GEN,,$@)
++	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
++else
++	$(call msg,CP,,$@)
++	$(Q)cp "$(VMLINUX_H)" $@
++endif
++
++$(SCXOBJ_DIR)/%.bpf.o: %.bpf.c $(INCLUDE_DIR)/vmlinux.h	| $(BPFOBJ) $(SCXOBJ_DIR)
++	$(call msg,CLNG-BPF,,$(notdir $@))
++	$(Q)$(CLANG) $(BPF_CFLAGS) -target bpf -c $< -o $@
++
++$(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BPFTOOL) | $(INCLUDE_DIR)
++	$(eval sched=$(notdir $@))
++	$(call msg,GEN-SKEL,,$(sched))
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked2.o) $(<:.o=.linked1.o)
++	$(Q)$(BPFTOOL) gen object $(<:.o=.linked3.o) $(<:.o=.linked2.o)
++	$(Q)diff $(<:.o=.linked2.o) $(<:.o=.linked3.o)
++	$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $@
++	$(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $(@:.skel.h=.subskel.h)
++
++################
++# C schedulers #
++################
++
++override define CLEAN
++	rm -rf $(OUTPUT_DIR)
++	rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h
++	rm -f $(TEST_GEN_PROGS)
++	rm -f runner
++endef
++
++# Every testcase takes all of the BPF progs are dependencies by default. This
++# allows testcases to load any BPF scheduler, which is useful for testcases
++# that don't need their own prog to run their test.
++all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubst %.c,%.skel.h,$(prog)))
++
++auto-test-targets :=			\
++	create_dsq			\
++	enq_last_no_enq_fails		\
++	enq_select_cpu_fails		\
++	ddsp_bogus_dsq_fail		\
++	ddsp_vtimelocal_fail		\
++	dsp_local_on			\
++	exit				\
++	hotplug				\
++	init_enable_count		\
++	maximal				\
++	maybe_null			\
++	minimal				\
++	prog_run			\
++	reload_loop			\
++	select_cpu_dfl			\
++	select_cpu_dfl_nodispatch	\
++	select_cpu_dispatch		\
++	select_cpu_dispatch_bad_dsq	\
++	select_cpu_dispatch_dbl_dsp	\
++	select_cpu_vtime		\
++	test_example			\
++
++testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
++
++$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR)
++	$(CC) $(CFLAGS) -c $< -o $@
++
++# Create all of the test targets object files, whose testcase objects will be
++# registered into the runner in ELF constructors.
++#
++# Note that we must do double expansion here in order to support conditionally
++# compiling BPF object files only if one is present, as the wildcard Make
++# function doesn't support using implicit rules otherwise.
++$(testcase-targets): $(SCXOBJ_DIR)/%.o: %.c $(SCXOBJ_DIR)/runner.o $(all_test_bpfprogs) | $(SCXOBJ_DIR)
++	$(eval test=$(patsubst %.o,%.c,$(notdir $@)))
++	$(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/runner.o
++
++$(SCXOBJ_DIR)/util.o: util.c | $(SCXOBJ_DIR)
++	$(CC) $(CFLAGS) -c $< -o $@
++
++runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets)
++	@echo "$(testcase-targets)"
++	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
++
++TEST_GEN_PROGS := runner
++
++all: runner
++
++.PHONY: all clean help
++
++.DEFAULT_GOAL := all
++
++.DELETE_ON_ERROR:
++
++.SECONDARY:
+diff --git a/tools/testing/selftests/sched_ext/config b/tools/testing/selftests/sched_ext/config
+new file mode 100644
+index 000000000000..0de9b4ee249d
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/config
+@@ -0,0 +1,9 @@
++CONFIG_SCHED_DEBUG=y
++CONFIG_SCHED_CLASS_EXT=y
++CONFIG_CGROUPS=y
++CONFIG_CGROUP_SCHED=y
++CONFIG_EXT_GROUP_SCHED=y
++CONFIG_BPF=y
++CONFIG_BPF_SYSCALL=y
++CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_BTF=y
+diff --git a/tools/testing/selftests/sched_ext/create_dsq.bpf.c b/tools/testing/selftests/sched_ext/create_dsq.bpf.c
+new file mode 100644
+index 000000000000..23f79ed343f0
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/create_dsq.bpf.c
+@@ -0,0 +1,58 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Create and destroy DSQs in a loop.
++ *
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++void BPF_STRUCT_OPS(create_dsq_exit_task, struct task_struct *p,
++		    struct scx_exit_task_args *args)
++{
++	scx_bpf_destroy_dsq(p->pid);
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init_task, struct task_struct *p,
++			     struct scx_init_task_args *args)
++{
++	s32 err;
++
++	err = scx_bpf_create_dsq(p->pid, -1);
++	if (err)
++		scx_bpf_error("Failed to create DSQ for %s[%d]",
++			      p->comm, p->pid);
++
++	return err;
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init)
++{
++	u32 i;
++	s32 err;
++
++	bpf_for(i, 0, 1024) {
++		err = scx_bpf_create_dsq(i, -1);
++		if (err) {
++			scx_bpf_error("Failed to create DSQ %d", i);
++			return 0;
++		}
++	}
++
++	bpf_for(i, 0, 1024) {
++		scx_bpf_destroy_dsq(i);
++	}
++
++	return 0;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops create_dsq_ops = {
++	.init_task		= create_dsq_init_task,
++	.exit_task		= create_dsq_exit_task,
++	.init			= create_dsq_init,
++	.name			= "create_dsq",
++};
+diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c
+new file mode 100644
+index 000000000000..fa946d9146d4
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/create_dsq.c
+@@ -0,0 +1,57 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "create_dsq.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct create_dsq *skel;
++
++	skel = create_dsq__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct create_dsq *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.create_dsq_ops);
++	if (!link) {
++		SCX_ERR("Failed to attach scheduler");
++		return SCX_TEST_FAIL;
++	}
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct create_dsq *skel = ctx;
++
++	create_dsq__destroy(skel);
++}
++
++struct scx_test create_dsq = {
++	.name = "create_dsq",
++	.description = "Create and destroy a dsq in a loop",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&create_dsq)
+diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+new file mode 100644
+index 000000000000..e97ad41d354a
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+@@ -0,0 +1,42 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++UEI_DEFINE(uei);
++
++s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	s32 cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++
++	if (cpu >= 0) {
++		/*
++		 * If we dispatch to a bogus DSQ that will fall back to the
++		 * builtin global DSQ, we fail gracefully.
++		 */
++		scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL,
++				       p->scx.dsq_vtime, 0);
++		return cpu;
++	}
++
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops ddsp_bogus_dsq_fail_ops = {
++	.select_cpu		= ddsp_bogus_dsq_fail_select_cpu,
++	.exit			= ddsp_bogus_dsq_fail_exit,
++	.name			= "ddsp_bogus_dsq_fail",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
+new file mode 100644
+index 000000000000..e65d22f23f3b
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
+@@ -0,0 +1,57 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "ddsp_bogus_dsq_fail.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct ddsp_bogus_dsq_fail *skel;
++
++	skel = ddsp_bogus_dsq_fail__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct ddsp_bogus_dsq_fail *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.ddsp_bogus_dsq_fail_ops);
++	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
++
++	sleep(1);
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct ddsp_bogus_dsq_fail *skel = ctx;
++
++	ddsp_bogus_dsq_fail__destroy(skel);
++}
++
++struct scx_test ddsp_bogus_dsq_fail = {
++	.name = "ddsp_bogus_dsq_fail",
++	.description = "Verify we gracefully fail, and fall back to using a "
++		       "built-in DSQ, if we do a direct dispatch to an invalid"
++		       " DSQ in ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&ddsp_bogus_dsq_fail)
+diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+new file mode 100644
+index 000000000000..dde7e7dafbfb
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+@@ -0,0 +1,39 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++UEI_DEFINE(uei);
++
++s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	s32 cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++
++	if (cpu >= 0) {
++		/* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */
++		scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL,
++				       p->scx.dsq_vtime, 0);
++		return cpu;
++	}
++
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(ddsp_vtimelocal_fail_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops ddsp_vtimelocal_fail_ops = {
++	.select_cpu		= ddsp_vtimelocal_fail_select_cpu,
++	.exit			= ddsp_vtimelocal_fail_exit,
++	.name			= "ddsp_vtimelocal_fail",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
+new file mode 100644
+index 000000000000..abafee587cd6
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <unistd.h>
++#include "ddsp_vtimelocal_fail.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct ddsp_vtimelocal_fail *skel;
++
++	skel = ddsp_vtimelocal_fail__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct ddsp_vtimelocal_fail *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.ddsp_vtimelocal_fail_ops);
++	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
++
++	sleep(1);
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct ddsp_vtimelocal_fail *skel = ctx;
++
++	ddsp_vtimelocal_fail__destroy(skel);
++}
++
++struct scx_test ddsp_vtimelocal_fail = {
++	.name = "ddsp_vtimelocal_fail",
++	.description = "Verify we gracefully fail, and fall back to using a "
++		       "built-in DSQ, if we do a direct vtime dispatch to a "
++		       "built-in DSQ from DSQ in ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&ddsp_vtimelocal_fail)
+diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+new file mode 100644
+index 000000000000..efb4672decb4
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+@@ -0,0 +1,65 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++const volatile s32 nr_cpus;
++
++UEI_DEFINE(uei);
++
++struct {
++	__uint(type, BPF_MAP_TYPE_QUEUE);
++	__uint(max_entries, 8192);
++	__type(value, s32);
++} queue SEC(".maps");
++
++s32 BPF_STRUCT_OPS(dsp_local_on_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(dsp_local_on_enqueue, struct task_struct *p,
++		    u64 enq_flags)
++{
++	s32 pid = p->pid;
++
++	if (bpf_map_push_elem(&queue, &pid, 0))
++		scx_bpf_error("Failed to enqueue %s[%d]", p->comm, p->pid);
++}
++
++void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
++{
++	s32 pid, target;
++	struct task_struct *p;
++
++	if (bpf_map_pop_elem(&queue, &pid))
++		return;
++
++	p = bpf_task_from_pid(pid);
++	if (!p)
++		return;
++
++	target = bpf_get_prandom_u32() % nr_cpus;
++
++	scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
++	bpf_task_release(p);
++}
++
++void BPF_STRUCT_OPS(dsp_local_on_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops dsp_local_on_ops = {
++	.select_cpu		= dsp_local_on_select_cpu,
++	.enqueue		= dsp_local_on_enqueue,
++	.dispatch		= dsp_local_on_dispatch,
++	.exit			= dsp_local_on_exit,
++	.name			= "dsp_local_on",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c
+new file mode 100644
+index 000000000000..472851b56854
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/dsp_local_on.c
+@@ -0,0 +1,58 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <unistd.h>
++#include "dsp_local_on.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct dsp_local_on *skel;
++
++	skel = dsp_local_on__open();
++	SCX_FAIL_IF(!skel, "Failed to open");
++
++	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
++	SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct dsp_local_on *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.dsp_local_on_ops);
++	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
++
++	/* Just sleeping is fine, plenty of scheduling events happening */
++	sleep(1);
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct dsp_local_on *skel = ctx;
++
++	dsp_local_on__destroy(skel);
++}
++
++struct scx_test dsp_local_on = {
++	.name = "dsp_local_on",
++	.description = "Verify we can directly dispatch tasks to a local DSQs "
++		       "from osp.dispatch()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&dsp_local_on)
+diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
+new file mode 100644
+index 000000000000..b0b99531d5d5
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
+@@ -0,0 +1,21 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++SEC(".struct_ops.link")
++struct sched_ext_ops enq_last_no_enq_fails_ops = {
++	.name			= "enq_last_no_enq_fails",
++	/* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */
++	.flags			= SCX_OPS_ENQ_LAST,
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+new file mode 100644
+index 000000000000..2a3eda5e2c0b
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+@@ -0,0 +1,60 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "enq_last_no_enq_fails.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct enq_last_no_enq_fails *skel;
++
++	skel = enq_last_no_enq_fails__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct enq_last_no_enq_fails *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops);
++	if (link) {
++		SCX_ERR("Incorrectly succeeded in to attaching scheduler");
++		return SCX_TEST_FAIL;
++	}
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct enq_last_no_enq_fails *skel = ctx;
++
++	enq_last_no_enq_fails__destroy(skel);
++}
++
++struct scx_test enq_last_no_enq_fails = {
++	.name = "enq_last_no_enq_fails",
++	.description = "Verify we fail to load a scheduler if we specify "
++		       "the SCX_OPS_ENQ_LAST flag without defining "
++		       "ops.enqueue()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&enq_last_no_enq_fails)
+diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+new file mode 100644
+index 000000000000..b3dfc1033cd6
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+@@ -0,0 +1,43 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++/* Manually specify the signature until the kfunc is added to the scx repo. */
++s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
++			   bool *found) __ksym;
++
++s32 BPF_STRUCT_OPS(enq_select_cpu_fails_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
++		    u64 enq_flags)
++{
++	/*
++	 * Need to initialize the variable or the verifier will fail to load.
++	 * Improving these semantics is actively being worked on.
++	 */
++	bool found = false;
++
++	/* Can only call from ops.select_cpu() */
++	scx_bpf_select_cpu_dfl(p, 0, 0, &found);
++
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops enq_select_cpu_fails_ops = {
++	.select_cpu		= enq_select_cpu_fails_select_cpu,
++	.enqueue		= enq_select_cpu_fails_enqueue,
++	.name			= "enq_select_cpu_fails",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+new file mode 100644
+index 000000000000..dd1350e5f002
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+@@ -0,0 +1,61 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "enq_select_cpu_fails.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct enq_select_cpu_fails *skel;
++
++	skel = enq_select_cpu_fails__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct enq_select_cpu_fails *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_fails_ops);
++	if (!link) {
++		SCX_ERR("Failed to attach scheduler");
++		return SCX_TEST_FAIL;
++	}
++
++	sleep(1);
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct enq_select_cpu_fails *skel = ctx;
++
++	enq_select_cpu_fails__destroy(skel);
++}
++
++struct scx_test enq_select_cpu_fails = {
++	.name = "enq_select_cpu_fails",
++	.description = "Verify we fail to call scx_bpf_select_cpu_dfl() "
++		       "from ops.enqueue()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&enq_select_cpu_fails)
+diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c
+new file mode 100644
+index 000000000000..ae12ddaac921
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/exit.bpf.c
+@@ -0,0 +1,84 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++#include "exit_test.h"
++
++const volatile int exit_point;
++UEI_DEFINE(uei);
++
++#define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point)
++
++s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	bool found;
++
++	if (exit_point == EXIT_SELECT_CPU)
++		EXIT_CLEANLY();
++
++	return scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &found);
++}
++
++void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	if (exit_point == EXIT_ENQUEUE)
++		EXIT_CLEANLY();
++
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
++}
++
++void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
++{
++	if (exit_point == EXIT_DISPATCH)
++		EXIT_CLEANLY();
++
++	scx_bpf_consume(SCX_DSQ_GLOBAL);
++}
++
++void BPF_STRUCT_OPS(exit_enable, struct task_struct *p)
++{
++	if (exit_point == EXIT_ENABLE)
++		EXIT_CLEANLY();
++}
++
++s32 BPF_STRUCT_OPS(exit_init_task, struct task_struct *p,
++		    struct scx_init_task_args *args)
++{
++	if (exit_point == EXIT_INIT_TASK)
++		EXIT_CLEANLY();
++
++	return 0;
++}
++
++void BPF_STRUCT_OPS(exit_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init)
++{
++	if (exit_point == EXIT_INIT)
++		EXIT_CLEANLY();
++
++	return 0;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops exit_ops = {
++	.select_cpu		= exit_select_cpu,
++	.enqueue		= exit_enqueue,
++	.dispatch		= exit_dispatch,
++	.init_task		= exit_init_task,
++	.enable			= exit_enable,
++	.exit			= exit_exit,
++	.init			= exit_init,
++	.name			= "exit",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
+new file mode 100644
+index 000000000000..31bcd06e21cd
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/exit.c
+@@ -0,0 +1,55 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <sched.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "exit.bpf.skel.h"
++#include "scx_test.h"
++
++#include "exit_test.h"
++
++static enum scx_test_status run(void *ctx)
++{
++	enum exit_test_case tc;
++
++	for (tc = 0; tc < NUM_EXITS; tc++) {
++		struct exit *skel;
++		struct bpf_link *link;
++		char buf[16];
++
++		skel = exit__open();
++		skel->rodata->exit_point = tc;
++		exit__load(skel);
++		link = bpf_map__attach_struct_ops(skel->maps.exit_ops);
++		if (!link) {
++			SCX_ERR("Failed to attach scheduler");
++			exit__destroy(skel);
++			return SCX_TEST_FAIL;
++		}
++
++		/* Assumes uei.kind is written last */
++		while (skel->data->uei.kind == EXIT_KIND(SCX_EXIT_NONE))
++			sched_yield();
++
++		SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG_BPF));
++		SCX_EQ(skel->data->uei.exit_code, tc);
++		sprintf(buf, "%d", tc);
++		SCX_ASSERT(!strcmp(skel->data->uei.msg, buf));
++		bpf_link__destroy(link);
++		exit__destroy(skel);
++	}
++
++	return SCX_TEST_PASS;
++}
++
++struct scx_test exit_test = {
++	.name = "exit",
++	.description = "Verify we can cleanly exit a scheduler in multiple places",
++	.run = run,
++};
++REGISTER_SCX_TEST(&exit_test)
+diff --git a/tools/testing/selftests/sched_ext/exit_test.h b/tools/testing/selftests/sched_ext/exit_test.h
+new file mode 100644
+index 000000000000..94f0268b9cb8
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/exit_test.h
+@@ -0,0 +1,20 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#ifndef __EXIT_TEST_H__
++#define __EXIT_TEST_H__
++
++enum exit_test_case {
++	EXIT_SELECT_CPU,
++	EXIT_ENQUEUE,
++	EXIT_DISPATCH,
++	EXIT_ENABLE,
++	EXIT_INIT_TASK,
++	EXIT_INIT,
++	NUM_EXITS,
++};
++
++#endif  // # __EXIT_TEST_H__
+diff --git a/tools/testing/selftests/sched_ext/hotplug.bpf.c b/tools/testing/selftests/sched_ext/hotplug.bpf.c
+new file mode 100644
+index 000000000000..8f2601db39f3
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/hotplug.bpf.c
+@@ -0,0 +1,61 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++#include "hotplug_test.h"
++
++UEI_DEFINE(uei);
++
++void BPF_STRUCT_OPS(hotplug_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++static void exit_from_hotplug(s32 cpu, bool onlining)
++{
++	/*
++	 * Ignored, just used to verify that we can invoke blocking kfuncs
++	 * from the hotplug path.
++	 */
++	scx_bpf_create_dsq(0, -1);
++
++	s64 code = SCX_ECODE_ACT_RESTART | HOTPLUG_EXIT_RSN;
++
++	if (onlining)
++		code |= HOTPLUG_ONLINING;
++
++	scx_bpf_exit(code, "hotplug event detected (%d going %s)", cpu,
++		     onlining ? "online" : "offline");
++}
++
++void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_online, s32 cpu)
++{
++	exit_from_hotplug(cpu, true);
++}
++
++void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_offline, s32 cpu)
++{
++	exit_from_hotplug(cpu, false);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops hotplug_cb_ops = {
++	.cpu_online		= hotplug_cpu_online,
++	.cpu_offline		= hotplug_cpu_offline,
++	.exit			= hotplug_exit,
++	.name			= "hotplug_cbs",
++	.timeout_ms		= 1000U,
++};
++
++SEC(".struct_ops.link")
++struct sched_ext_ops hotplug_nocb_ops = {
++	.exit			= hotplug_exit,
++	.name			= "hotplug_nocbs",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c
+new file mode 100644
+index 000000000000..87bf220b1bce
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/hotplug.c
+@@ -0,0 +1,168 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <sched.h>
++#include <scx/common.h>
++#include <sched.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "hotplug_test.h"
++#include "hotplug.bpf.skel.h"
++#include "scx_test.h"
++#include "util.h"
++
++const char *online_path = "/sys/devices/system/cpu/cpu1/online";
++
++static bool is_cpu_online(void)
++{
++	return file_read_long(online_path) > 0;
++}
++
++static void toggle_online_status(bool online)
++{
++	long val = online ? 1 : 0;
++	int ret;
++
++	ret = file_write_long(online_path, val);
++	if (ret != 0)
++		fprintf(stderr, "Failed to bring CPU %s (%s)",
++			online ? "online" : "offline", strerror(errno));
++}
++
++static enum scx_test_status setup(void **ctx)
++{
++	if (!is_cpu_online())
++		return SCX_TEST_SKIP;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)
++{
++	struct hotplug *skel;
++	struct bpf_link *link;
++	long kind, code;
++
++	SCX_ASSERT(is_cpu_online());
++
++	skel = hotplug__open_and_load();
++	SCX_ASSERT(skel);
++
++	/* Testing the offline -> online path, so go offline before starting */
++	if (onlining)
++		toggle_online_status(0);
++
++	if (cbs_defined) {
++		kind = SCX_KIND_VAL(SCX_EXIT_UNREG_BPF);
++		code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) | HOTPLUG_EXIT_RSN;
++		if (onlining)
++			code |= HOTPLUG_ONLINING;
++	} else {
++		kind = SCX_KIND_VAL(SCX_EXIT_UNREG_KERN);
++		code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) |
++		       SCX_ECODE_VAL(SCX_ECODE_RSN_HOTPLUG);
++	}
++
++	if (cbs_defined)
++		link = bpf_map__attach_struct_ops(skel->maps.hotplug_cb_ops);
++	else
++		link = bpf_map__attach_struct_ops(skel->maps.hotplug_nocb_ops);
++
++	if (!link) {
++		SCX_ERR("Failed to attach scheduler");
++		hotplug__destroy(skel);
++		return SCX_TEST_FAIL;
++	}
++
++	toggle_online_status(onlining ? 1 : 0);
++
++	while (!UEI_EXITED(skel, uei))
++		sched_yield();
++
++	SCX_EQ(skel->data->uei.kind, kind);
++	SCX_EQ(UEI_REPORT(skel, uei), code);
++
++	if (!onlining)
++		toggle_online_status(1);
++
++	bpf_link__destroy(link);
++	hotplug__destroy(skel);
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status test_hotplug_attach(void)
++{
++	struct hotplug *skel;
++	struct bpf_link *link;
++	enum scx_test_status status = SCX_TEST_PASS;
++	long kind, code;
++
++	SCX_ASSERT(is_cpu_online());
++	SCX_ASSERT(scx_hotplug_seq() > 0);
++
++	skel = SCX_OPS_OPEN(hotplug_nocb_ops, hotplug);
++	SCX_ASSERT(skel);
++
++	SCX_OPS_LOAD(skel, hotplug_nocb_ops, hotplug, uei);
++
++	/*
++	 * Take the CPU offline to increment the global hotplug seq, which
++	 * should cause attach to fail due to us setting the hotplug seq above
++	 */
++	toggle_online_status(0);
++	link = bpf_map__attach_struct_ops(skel->maps.hotplug_nocb_ops);
++
++	toggle_online_status(1);
++
++	SCX_ASSERT(link);
++	while (!UEI_EXITED(skel, uei))
++		sched_yield();
++
++	kind = SCX_KIND_VAL(SCX_EXIT_UNREG_KERN);
++	code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) |
++	       SCX_ECODE_VAL(SCX_ECODE_RSN_HOTPLUG);
++	SCX_EQ(skel->data->uei.kind, kind);
++	SCX_EQ(UEI_REPORT(skel, uei), code);
++
++	bpf_link__destroy(link);
++	hotplug__destroy(skel);
++
++	return status;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++
++#define HP_TEST(__onlining, __cbs_defined) ({				\
++	if (test_hotplug(__onlining, __cbs_defined) != SCX_TEST_PASS)	\
++		return SCX_TEST_FAIL;					\
++})
++
++	HP_TEST(true, true);
++	HP_TEST(false, true);
++	HP_TEST(true, false);
++	HP_TEST(false, false);
++
++#undef HP_TEST
++
++	return test_hotplug_attach();
++}
++
++static void cleanup(void *ctx)
++{
++	toggle_online_status(1);
++}
++
++struct scx_test hotplug_test = {
++	.name = "hotplug",
++	.description = "Verify hotplug behavior",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&hotplug_test)
+diff --git a/tools/testing/selftests/sched_ext/hotplug_test.h b/tools/testing/selftests/sched_ext/hotplug_test.h
+new file mode 100644
+index 000000000000..73d236f90787
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/hotplug_test.h
+@@ -0,0 +1,15 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#ifndef __HOTPLUG_TEST_H__
++#define __HOTPLUG_TEST_H__
++
++enum hotplug_test_flags {
++	HOTPLUG_EXIT_RSN = 1LLU << 0,
++	HOTPLUG_ONLINING = 1LLU << 1,
++};
++
++#endif  // # __HOTPLUG_TEST_H__
+diff --git a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c
+new file mode 100644
+index 000000000000..47ea89a626c3
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that verifies that we do proper counting of init, enable, etc
++ * callbacks.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++u64 init_task_cnt, exit_task_cnt, enable_cnt, disable_cnt;
++u64 init_fork_cnt, init_transition_cnt;
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(cnt_init_task, struct task_struct *p,
++			     struct scx_init_task_args *args)
++{
++	__sync_fetch_and_add(&init_task_cnt, 1);
++
++	if (args->fork)
++		__sync_fetch_and_add(&init_fork_cnt, 1);
++	else
++		__sync_fetch_and_add(&init_transition_cnt, 1);
++
++	return 0;
++}
++
++void BPF_STRUCT_OPS(cnt_exit_task, struct task_struct *p)
++{
++	__sync_fetch_and_add(&exit_task_cnt, 1);
++}
++
++void BPF_STRUCT_OPS(cnt_enable, struct task_struct *p)
++{
++	__sync_fetch_and_add(&enable_cnt, 1);
++}
++
++void BPF_STRUCT_OPS(cnt_disable, struct task_struct *p)
++{
++	__sync_fetch_and_add(&disable_cnt, 1);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops init_enable_count_ops = {
++	.init_task	= cnt_init_task,
++	.exit_task	= cnt_exit_task,
++	.enable		= cnt_enable,
++	.disable	= cnt_disable,
++	.name		= "init_enable_count",
++};
+diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
+new file mode 100644
+index 000000000000..97d45f1e5597
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/init_enable_count.c
+@@ -0,0 +1,166 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <stdio.h>
++#include <unistd.h>
++#include <sched.h>
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include "scx_test.h"
++#include "init_enable_count.bpf.skel.h"
++
++#define SCHED_EXT 7
++
++static struct init_enable_count *
++open_load_prog(bool global)
++{
++	struct init_enable_count *skel;
++
++	skel = init_enable_count__open();
++	SCX_BUG_ON(!skel, "Failed to open skel");
++
++	if (!global)
++		skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
++
++	SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel");
++
++	return skel;
++}
++
++static enum scx_test_status run_test(bool global)
++{
++	struct init_enable_count *skel;
++	struct bpf_link *link;
++	const u32 num_children = 5, num_pre_forks = 1024;
++	int ret, i, status;
++	struct sched_param param = {};
++	pid_t pids[num_pre_forks];
++
++	skel = open_load_prog(global);
++
++	/*
++	 * Fork a bunch of children before we attach the scheduler so that we
++	 * ensure (at least in practical terms) that there are more tasks that
++	 * transition from SCHED_OTHER -> SCHED_EXT than there are tasks that
++	 * take the fork() path either below or in other processes.
++	 */
++	for (i = 0; i < num_pre_forks; i++) {
++		pids[i] = fork();
++		SCX_FAIL_IF(pids[i] < 0, "Failed to fork child");
++		if (pids[i] == 0) {
++			sleep(1);
++			exit(0);
++		}
++	}
++
++	link = bpf_map__attach_struct_ops(skel->maps.init_enable_count_ops);
++	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
++
++	for (i = 0; i < num_pre_forks; i++) {
++		SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
++			    "Failed to wait for pre-forked child\n");
++
++		SCX_FAIL_IF(status != 0, "Pre-forked child %d exited with status %d\n", i,
++			    status);
++	}
++
++	bpf_link__destroy(link);
++	SCX_GE(skel->bss->init_task_cnt, num_pre_forks);
++	SCX_GE(skel->bss->exit_task_cnt, num_pre_forks);
++
++	link = bpf_map__attach_struct_ops(skel->maps.init_enable_count_ops);
++	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
++
++	/* SCHED_EXT children */
++	for (i = 0; i < num_children; i++) {
++		pids[i] = fork();
++		SCX_FAIL_IF(pids[i] < 0, "Failed to fork child");
++
++		if (pids[i] == 0) {
++			ret = sched_setscheduler(0, SCHED_EXT, &param);
++			SCX_BUG_ON(ret, "Failed to set sched to sched_ext");
++
++			/*
++			 * Reset to SCHED_OTHER for half of them. Counts for
++			 * everything should still be the same regardless, as
++			 * ops.disable() is invoked even if a task is still on
++			 * SCHED_EXT before it exits.
++			 */
++			if (i % 2 == 0) {
++				ret = sched_setscheduler(0, SCHED_OTHER, &param);
++				SCX_BUG_ON(ret, "Failed to reset sched to normal");
++			}
++			exit(0);
++		}
++	}
++	for (i = 0; i < num_children; i++) {
++		SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
++			    "Failed to wait for SCX child\n");
++
++		SCX_FAIL_IF(status != 0, "SCX child %d exited with status %d\n", i,
++			    status);
++	}
++
++	/* SCHED_OTHER children */
++	for (i = 0; i < num_children; i++) {
++		pids[i] = fork();
++		if (pids[i] == 0)
++			exit(0);
++	}
++
++	for (i = 0; i < num_children; i++) {
++		SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
++			    "Failed to wait for normal child\n");
++
++		SCX_FAIL_IF(status != 0, "Normal child %d exited with status %d\n", i,
++			    status);
++	}
++
++	bpf_link__destroy(link);
++
++	SCX_GE(skel->bss->init_task_cnt, 2 * num_children);
++	SCX_GE(skel->bss->exit_task_cnt, 2 * num_children);
++
++	if (global) {
++		SCX_GE(skel->bss->enable_cnt, 2 * num_children);
++		SCX_GE(skel->bss->disable_cnt, 2 * num_children);
++	} else {
++		SCX_EQ(skel->bss->enable_cnt, num_children);
++		SCX_EQ(skel->bss->disable_cnt, num_children);
++	}
++	/*
++	 * We forked a ton of tasks before we attached the scheduler above, so
++	 * this should be fine. Technically it could be flaky if a ton of forks
++	 * are happening at the same time in other processes, but that should
++	 * be exceedingly unlikely.
++	 */
++	SCX_GT(skel->bss->init_transition_cnt, skel->bss->init_fork_cnt);
++	SCX_GE(skel->bss->init_fork_cnt, 2 * num_children);
++
++	init_enable_count__destroy(skel);
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	enum scx_test_status status;
++
++	status = run_test(true);
++	if (status != SCX_TEST_PASS)
++		return status;
++
++	return run_test(false);
++}
++
++struct scx_test init_enable_count = {
++	.name = "init_enable_count",
++	.description = "Verify we do the correct amount of counting of init, "
++		       "enable, etc callbacks.",
++	.run = run,
++};
++REGISTER_SCX_TEST(&init_enable_count)
+diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
+new file mode 100644
+index 000000000000..00bfa9cb95d3
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
+@@ -0,0 +1,164 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler with every callback defined.
++ *
++ * This scheduler defines every callback.
++ *
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
++		   u64 wake_flags)
++{
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags)
++{
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
++}
++
++void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
++{}
++
++void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev)
++{
++	scx_bpf_consume(SCX_DSQ_GLOBAL);
++}
++
++void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags)
++{}
++
++void BPF_STRUCT_OPS(maximal_running, struct task_struct *p)
++{}
++
++void BPF_STRUCT_OPS(maximal_stopping, struct task_struct *p, bool runnable)
++{}
++
++void BPF_STRUCT_OPS(maximal_quiescent, struct task_struct *p, u64 deq_flags)
++{}
++
++bool BPF_STRUCT_OPS(maximal_yield, struct task_struct *from,
++		    struct task_struct *to)
++{
++	return false;
++}
++
++bool BPF_STRUCT_OPS(maximal_core_sched_before, struct task_struct *a,
++		    struct task_struct *b)
++{
++	return false;
++}
++
++void BPF_STRUCT_OPS(maximal_set_weight, struct task_struct *p, u32 weight)
++{}
++
++void BPF_STRUCT_OPS(maximal_set_cpumask, struct task_struct *p,
++		    const struct cpumask *cpumask)
++{}
++
++void BPF_STRUCT_OPS(maximal_update_idle, s32 cpu, bool idle)
++{}
++
++void BPF_STRUCT_OPS(maximal_cpu_acquire, s32 cpu,
++		    struct scx_cpu_acquire_args *args)
++{}
++
++void BPF_STRUCT_OPS(maximal_cpu_release, s32 cpu,
++		    struct scx_cpu_release_args *args)
++{}
++
++void BPF_STRUCT_OPS(maximal_cpu_online, s32 cpu)
++{}
++
++void BPF_STRUCT_OPS(maximal_cpu_offline, s32 cpu)
++{}
++
++s32 BPF_STRUCT_OPS(maximal_init_task, struct task_struct *p,
++		   struct scx_init_task_args *args)
++{
++	return 0;
++}
++
++void BPF_STRUCT_OPS(maximal_enable, struct task_struct *p)
++{}
++
++void BPF_STRUCT_OPS(maximal_exit_task, struct task_struct *p,
++		    struct scx_exit_task_args *args)
++{}
++
++void BPF_STRUCT_OPS(maximal_disable, struct task_struct *p)
++{}
++
++s32 BPF_STRUCT_OPS(maximal_cgroup_init, struct cgroup *cgrp,
++		   struct scx_cgroup_init_args *args)
++{
++	return 0;
++}
++
++void BPF_STRUCT_OPS(maximal_cgroup_exit, struct cgroup *cgrp)
++{}
++
++s32 BPF_STRUCT_OPS(maximal_cgroup_prep_move, struct task_struct *p,
++		   struct cgroup *from, struct cgroup *to)
++{
++	return 0;
++}
++
++void BPF_STRUCT_OPS(maximal_cgroup_move, struct task_struct *p,
++		    struct cgroup *from, struct cgroup *to)
++{}
++
++void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p,
++	       struct cgroup *from, struct cgroup *to)
++{}
++
++void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
++{}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
++{
++	return 0;
++}
++
++void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info)
++{}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops maximal_ops = {
++	.select_cpu		= maximal_select_cpu,
++	.enqueue		= maximal_enqueue,
++	.dequeue		= maximal_dequeue,
++	.dispatch		= maximal_dispatch,
++	.runnable		= maximal_runnable,
++	.running		= maximal_running,
++	.stopping		= maximal_stopping,
++	.quiescent		= maximal_quiescent,
++	.yield			= maximal_yield,
++	.core_sched_before	= maximal_core_sched_before,
++	.set_weight		= maximal_set_weight,
++	.set_cpumask		= maximal_set_cpumask,
++	.update_idle		= maximal_update_idle,
++	.cpu_acquire		= maximal_cpu_acquire,
++	.cpu_release		= maximal_cpu_release,
++	.cpu_online		= maximal_cpu_online,
++	.cpu_offline		= maximal_cpu_offline,
++	.init_task		= maximal_init_task,
++	.enable			= maximal_enable,
++	.exit_task		= maximal_exit_task,
++	.disable		= maximal_disable,
++	.cgroup_init		= maximal_cgroup_init,
++	.cgroup_exit		= maximal_cgroup_exit,
++	.cgroup_prep_move	= maximal_cgroup_prep_move,
++	.cgroup_move		= maximal_cgroup_move,
++	.cgroup_cancel_move	= maximal_cgroup_cancel_move,
++	.cgroup_set_weight	= maximal_cgroup_set_weight,
++	.init			= maximal_init,
++	.exit			= maximal_exit,
++	.name			= "maximal",
++};
+diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c
+new file mode 100644
+index 000000000000..f38fc973c380
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maximal.c
+@@ -0,0 +1,51 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "maximal.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct maximal *skel;
++
++	skel = maximal__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct maximal *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.maximal_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct maximal *skel = ctx;
++
++	maximal__destroy(skel);
++}
++
++struct scx_test maximal = {
++	.name = "maximal",
++	.description = "Verify we can load a scheduler with every callback defined",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&maximal)
+diff --git a/tools/testing/selftests/sched_ext/maybe_null.bpf.c b/tools/testing/selftests/sched_ext/maybe_null.bpf.c
+new file mode 100644
+index 000000000000..27d0f386acfb
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maybe_null.bpf.c
+@@ -0,0 +1,36 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++u64 vtime_test;
++
++void BPF_STRUCT_OPS(maybe_null_running, struct task_struct *p)
++{}
++
++void BPF_STRUCT_OPS(maybe_null_success_dispatch, s32 cpu, struct task_struct *p)
++{
++	if (p != NULL)
++		vtime_test = p->scx.dsq_vtime;
++}
++
++bool BPF_STRUCT_OPS(maybe_null_success_yield, struct task_struct *from,
++		    struct task_struct *to)
++{
++	if (to)
++		bpf_printk("Yielding to %s[%d]", to->comm, to->pid);
++
++	return false;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops maybe_null_success = {
++	.dispatch               = maybe_null_success_dispatch,
++	.yield			= maybe_null_success_yield,
++	.enable			= maybe_null_running,
++	.name			= "minimal",
++};
+diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c
+new file mode 100644
+index 000000000000..31cfafb0cf65
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maybe_null.c
+@@ -0,0 +1,49 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "maybe_null.bpf.skel.h"
++#include "maybe_null_fail_dsp.bpf.skel.h"
++#include "maybe_null_fail_yld.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status run(void *ctx)
++{
++	struct maybe_null *skel;
++	struct maybe_null_fail_dsp *fail_dsp;
++	struct maybe_null_fail_yld *fail_yld;
++
++	skel = maybe_null__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load maybe_null skel");
++		return SCX_TEST_FAIL;
++	}
++	maybe_null__destroy(skel);
++
++	fail_dsp = maybe_null_fail_dsp__open_and_load();
++	if (fail_dsp) {
++		maybe_null_fail_dsp__destroy(fail_dsp);
++		SCX_ERR("Should failed to open and load maybe_null_fail_dsp skel");
++		return SCX_TEST_FAIL;
++	}
++
++	fail_yld = maybe_null_fail_yld__open_and_load();
++	if (fail_yld) {
++		maybe_null_fail_yld__destroy(fail_yld);
++		SCX_ERR("Should failed to open and load maybe_null_fail_yld skel");
++		return SCX_TEST_FAIL;
++	}
++
++	return SCX_TEST_PASS;
++}
++
++struct scx_test maybe_null = {
++	.name = "maybe_null",
++	.description = "Verify if PTR_MAYBE_NULL work for .dispatch",
++	.run = run,
++};
++REGISTER_SCX_TEST(&maybe_null)
+diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
+new file mode 100644
+index 000000000000..c0641050271d
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
+@@ -0,0 +1,25 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++u64 vtime_test;
++
++void BPF_STRUCT_OPS(maybe_null_running, struct task_struct *p)
++{}
++
++void BPF_STRUCT_OPS(maybe_null_fail_dispatch, s32 cpu, struct task_struct *p)
++{
++	vtime_test = p->scx.dsq_vtime;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops maybe_null_fail = {
++	.dispatch               = maybe_null_fail_dispatch,
++	.enable			= maybe_null_running,
++	.name			= "maybe_null_fail_dispatch",
++};
+diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
+new file mode 100644
+index 000000000000..3c1740028e3b
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
+@@ -0,0 +1,28 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++u64 vtime_test;
++
++void BPF_STRUCT_OPS(maybe_null_running, struct task_struct *p)
++{}
++
++bool BPF_STRUCT_OPS(maybe_null_fail_yield, struct task_struct *from,
++		    struct task_struct *to)
++{
++	bpf_printk("Yielding to %s[%d]", to->comm, to->pid);
++
++	return false;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops maybe_null_fail = {
++	.yield			= maybe_null_fail_yield,
++	.enable			= maybe_null_running,
++	.name			= "maybe_null_fail_yield",
++};
+diff --git a/tools/testing/selftests/sched_ext/minimal.bpf.c b/tools/testing/selftests/sched_ext/minimal.bpf.c
+new file mode 100644
+index 000000000000..6a7eccef0104
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/minimal.bpf.c
+@@ -0,0 +1,21 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A completely minimal scheduler.
++ *
++ * This scheduler defines the absolute minimal set of struct sched_ext_ops
++ * fields: its name. It should _not_ fail to be loaded, and can be used to
++ * exercise the default scheduling paths in ext.c.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++SEC(".struct_ops.link")
++struct sched_ext_ops minimal_ops = {
++	.name			= "minimal",
++};
+diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c
+new file mode 100644
+index 000000000000..6c5db8ebbf8a
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/minimal.c
+@@ -0,0 +1,58 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "minimal.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct minimal *skel;
++
++	skel = minimal__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct minimal *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.minimal_ops);
++	if (!link) {
++		SCX_ERR("Failed to attach scheduler");
++		return SCX_TEST_FAIL;
++	}
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct minimal *skel = ctx;
++
++	minimal__destroy(skel);
++}
++
++struct scx_test minimal = {
++	.name = "minimal",
++	.description = "Verify we can load a fully minimal scheduler",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&minimal)
+diff --git a/tools/testing/selftests/sched_ext/prog_run.bpf.c b/tools/testing/selftests/sched_ext/prog_run.bpf.c
+new file mode 100644
+index 000000000000..6a4d7c48e3f2
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/prog_run.bpf.c
+@@ -0,0 +1,33 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates that we can invoke sched_ext kfuncs in
++ * BPF_PROG_TYPE_SYSCALL programs.
++ *
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++
++#include <scx/common.bpf.h>
++
++UEI_DEFINE(uei);
++
++char _license[] SEC("license") = "GPL";
++
++SEC("syscall")
++int BPF_PROG(prog_run_syscall)
++{
++	scx_bpf_create_dsq(0, -1);
++	scx_bpf_exit(0xdeadbeef, "Exited from PROG_RUN");
++	return 0;
++}
++
++void BPF_STRUCT_OPS(prog_run_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops prog_run_ops = {
++	.exit			= prog_run_exit,
++	.name			= "prog_run",
++};
+diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c
+new file mode 100644
+index 000000000000..3cd57ef8daaa
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/prog_run.c
+@@ -0,0 +1,78 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <sched.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "prog_run.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct prog_run *skel;
++
++	skel = prog_run__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct prog_run *skel = ctx;
++	struct bpf_link *link;
++	int prog_fd, err = 0;
++
++	prog_fd = bpf_program__fd(skel->progs.prog_run_syscall);
++	if (prog_fd < 0) {
++		SCX_ERR("Failed to get BPF_PROG_RUN prog");
++		return SCX_TEST_FAIL;
++	}
++
++	LIBBPF_OPTS(bpf_test_run_opts, topts);
++
++	link = bpf_map__attach_struct_ops(skel->maps.prog_run_ops);
++	if (!link) {
++		SCX_ERR("Failed to attach scheduler");
++		close(prog_fd);
++		return SCX_TEST_FAIL;
++	}
++
++	err = bpf_prog_test_run_opts(prog_fd, &topts);
++	SCX_EQ(err, 0);
++
++	/* Assumes uei.kind is written last */
++	while (skel->data->uei.kind == EXIT_KIND(SCX_EXIT_NONE))
++		sched_yield();
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG_BPF));
++	SCX_EQ(skel->data->uei.exit_code, 0xdeadbeef);
++	close(prog_fd);
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct prog_run *skel = ctx;
++
++	prog_run__destroy(skel);
++}
++
++struct scx_test prog_run = {
++	.name = "prog_run",
++	.description = "Verify we can call into a scheduler with BPF_PROG_RUN, and invoke kfuncs",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&prog_run)
+diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c
+new file mode 100644
+index 000000000000..5cfba2d6e056
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/reload_loop.c
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <pthread.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "maximal.bpf.skel.h"
++#include "scx_test.h"
++
++static struct maximal *skel;
++static pthread_t threads[2];
++
++bool force_exit = false;
++
++static enum scx_test_status setup(void **ctx)
++{
++	skel = maximal__open_and_load();
++	if (!skel) {
++		SCX_ERR("Failed to open and load skel");
++		return SCX_TEST_FAIL;
++	}
++
++	return SCX_TEST_PASS;
++}
++
++static void *do_reload_loop(void *arg)
++{
++	u32 i;
++
++	for (i = 0; i < 1024 && !force_exit; i++) {
++		struct bpf_link *link;
++
++		link = bpf_map__attach_struct_ops(skel->maps.maximal_ops);
++		if (link)
++			bpf_link__destroy(link);
++	}
++
++	return NULL;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	int err;
++	void *ret;
++
++	err = pthread_create(&threads[0], NULL, do_reload_loop, NULL);
++	SCX_FAIL_IF(err, "Failed to create thread 0");
++
++	err = pthread_create(&threads[1], NULL, do_reload_loop, NULL);
++	SCX_FAIL_IF(err, "Failed to create thread 1");
++
++	SCX_FAIL_IF(pthread_join(threads[0], &ret), "thread 0 failed");
++	SCX_FAIL_IF(pthread_join(threads[1], &ret), "thread 1 failed");
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	force_exit = true;
++	maximal__destroy(skel);
++}
++
++struct scx_test reload_loop = {
++	.name = "reload_loop",
++	.description = "Stress test loading and unloading schedulers repeatedly in a tight loop",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&reload_loop)
+diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c
+new file mode 100644
+index 000000000000..eab48c7ff309
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/runner.c
+@@ -0,0 +1,201 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <stdio.h>
++#include <unistd.h>
++#include <signal.h>
++#include <libgen.h>
++#include <bpf/bpf.h>
++#include "scx_test.h"
++
++const char help_fmt[] =
++"The runner for sched_ext tests.\n"
++"\n"
++"The runner is statically linked against all testcases, and runs them all serially.\n"
++"It's required for the testcases to be serial, as only a single host-wide sched_ext\n"
++"scheduler may be loaded at any given time."
++"\n"
++"Usage: %s [-t TEST] [-h]\n"
++"\n"
++"  -t TEST       Only run tests whose name includes this string\n"
++"  -s            Include print output for skipped tests\n"
++"  -q            Don't print the test descriptions during run\n"
++"  -h            Display this help and exit\n";
++
++static volatile int exit_req;
++static bool quiet, print_skipped;
++
++#define MAX_SCX_TESTS 2048
++
++static struct scx_test __scx_tests[MAX_SCX_TESTS];
++static unsigned __scx_num_tests = 0;
++
++static void sigint_handler(int simple)
++{
++	exit_req = 1;
++}
++
++static void print_test_preamble(const struct scx_test *test, bool quiet)
++{
++	printf("===== START =====\n");
++	printf("TEST: %s\n", test->name);
++	if (!quiet)
++		printf("DESCRIPTION: %s\n", test->description);
++	printf("OUTPUT:\n");
++}
++
++static const char *status_to_result(enum scx_test_status status)
++{
++	switch (status) {
++	case SCX_TEST_PASS:
++	case SCX_TEST_SKIP:
++		return "ok";
++	case SCX_TEST_FAIL:
++		return "not ok";
++	default:
++		return "<UNKNOWN>";
++	}
++}
++
++static void print_test_result(const struct scx_test *test,
++			      enum scx_test_status status,
++			      unsigned int testnum)
++{
++	const char *result = status_to_result(status);
++	const char *directive = status == SCX_TEST_SKIP ? "SKIP " : "";
++
++	printf("%s %u %s # %s\n", result, testnum, test->name, directive);
++	printf("=====  END  =====\n");
++}
++
++static bool should_skip_test(const struct scx_test *test, const char * filter)
++{
++	return !strstr(test->name, filter);
++}
++
++static enum scx_test_status run_test(const struct scx_test *test)
++{
++	enum scx_test_status status;
++	void *context = NULL;
++
++	if (test->setup) {
++		status = test->setup(&context);
++		if (status != SCX_TEST_PASS)
++			return status;
++	}
++
++	status = test->run(context);
++
++	if (test->cleanup)
++		test->cleanup(context);
++
++	return status;
++}
++
++static bool test_valid(const struct scx_test *test)
++{
++	if (!test) {
++		fprintf(stderr, "NULL test detected\n");
++		return false;
++	}
++
++	if (!test->name) {
++		fprintf(stderr,
++			"Test with no name found. Must specify test name.\n");
++		return false;
++	}
++
++	if (!test->description) {
++		fprintf(stderr, "Test %s requires description.\n", test->name);
++		return false;
++	}
++
++	if (!test->run) {
++		fprintf(stderr, "Test %s has no run() callback\n", test->name);
++		return false;
++	}
++
++	return true;
++}
++
++int main(int argc, char **argv)
++{
++	const char *filter = NULL;
++	unsigned testnum = 0, i;
++	unsigned passed = 0, skipped = 0, failed = 0;
++	int opt;
++
++	signal(SIGINT, sigint_handler);
++	signal(SIGTERM, sigint_handler);
++
++	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
++
++	while ((opt = getopt(argc, argv, "qst:h")) != -1) {
++		switch (opt) {
++		case 'q':
++			quiet = true;
++			break;
++		case 's':
++			print_skipped = true;
++			break;
++		case 't':
++			filter = optarg;
++			break;
++		default:
++			fprintf(stderr, help_fmt, basename(argv[0]));
++			return opt != 'h';
++		}
++	}
++
++	for (i = 0; i < __scx_num_tests; i++) {
++		enum scx_test_status status;
++		struct scx_test *test = &__scx_tests[i];
++
++		if (filter && should_skip_test(test, filter)) {
++			/*
++			 * Printing the skipped tests and their preambles can
++			 * add a lot of noise to the runner output. Printing
++			 * this is only really useful for CI, so let's skip it
++			 * by default.
++			 */
++			if (print_skipped) {
++				print_test_preamble(test, quiet);
++				print_test_result(test, SCX_TEST_SKIP, ++testnum);
++			}
++			continue;
++		}
++
++		print_test_preamble(test, quiet);
++		status = run_test(test);
++		print_test_result(test, status, ++testnum);
++		switch (status) {
++		case SCX_TEST_PASS:
++			passed++;
++			break;
++		case SCX_TEST_SKIP:
++			skipped++;
++			break;
++		case SCX_TEST_FAIL:
++			failed++;
++			break;
++		}
++	}
++	printf("\n\n=============================\n\n");
++	printf("RESULTS:\n\n");
++	printf("PASSED:  %u\n", passed);
++	printf("SKIPPED: %u\n", skipped);
++	printf("FAILED:  %u\n", failed);
++
++	return 0;
++}
++
++void scx_test_register(struct scx_test *test)
++{
++	SCX_BUG_ON(!test_valid(test), "Invalid test found");
++	SCX_BUG_ON(__scx_num_tests >= MAX_SCX_TESTS, "Maximum tests exceeded");
++
++	__scx_tests[__scx_num_tests++] = *test;
++}
+diff --git a/tools/testing/selftests/sched_ext/scx_test.h b/tools/testing/selftests/sched_ext/scx_test.h
+new file mode 100644
+index 000000000000..90b8d6915bb7
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/scx_test.h
+@@ -0,0 +1,131 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ */
++
++#ifndef __SCX_TEST_H__
++#define __SCX_TEST_H__
++
++#include <errno.h>
++#include <scx/common.h>
++#include <scx/compat.h>
++
++enum scx_test_status {
++	SCX_TEST_PASS = 0,
++	SCX_TEST_SKIP,
++	SCX_TEST_FAIL,
++};
++
++#define EXIT_KIND(__ent) __COMPAT_ENUM_OR_ZERO("scx_exit_kind", #__ent)
++
++struct scx_test {
++	/**
++	 * name - The name of the testcase.
++	 */
++	const char *name;
++
++	/**
++	 * description - A description of your testcase: what it tests and is
++	 * meant to validate.
++	 */
++	const char *description;
++
++	/*
++	 * setup - Setup the test.
++	 * @ctx: A pointer to a context object that will be passed to run and
++	 *	 cleanup.
++	 *
++	 * An optional callback that allows a testcase to perform setup for its
++	 * run. A test may return SCX_TEST_SKIP to skip the run.
++	 */
++	enum scx_test_status (*setup)(void **ctx);
++
++	/*
++	 * run - Run the test.
++	 * @ctx: Context set in the setup() callback. If @ctx was not set in
++	 *	 setup(), it is NULL.
++	 *
++	 * The main test. Callers should return one of:
++	 *
++	 * - SCX_TEST_PASS: Test passed
++	 * - SCX_TEST_SKIP: Test should be skipped
++	 * - SCX_TEST_FAIL: Test failed
++	 *
++	 * This callback must be defined.
++	 */
++	enum scx_test_status (*run)(void *ctx);
++
++	/*
++	 * cleanup - Perform cleanup following the test
++	 * @ctx: Context set in the setup() callback. If @ctx was not set in
++	 *	 setup(), it is NULL.
++	 *
++	 * An optional callback that allows a test to perform cleanup after
++	 * being run. This callback is run even if the run() callback returns
++	 * SCX_TEST_SKIP or SCX_TEST_FAIL. It is not run if setup() returns
++	 * SCX_TEST_SKIP or SCX_TEST_FAIL.
++	 */
++	void (*cleanup)(void *ctx);
++};
++
++void scx_test_register(struct scx_test *test);
++
++#define REGISTER_SCX_TEST(__test)			\
++	__attribute__((constructor))			\
++	static void ___scxregister##__LINE__(void)	\
++	{						\
++		scx_test_register(__test);		\
++	}
++
++#define SCX_ERR(__fmt, ...)						\
++	do {								\
++		fprintf(stderr, "ERR: %s:%d\n", __FILE__, __LINE__);	\
++		fprintf(stderr, __fmt"\n", ##__VA_ARGS__);			\
++	} while (0)
++
++#define SCX_FAIL(__fmt, ...)						\
++	do {								\
++		SCX_ERR(__fmt, ##__VA_ARGS__);				\
++		return SCX_TEST_FAIL;					\
++	} while (0)
++
++#define SCX_FAIL_IF(__cond, __fmt, ...)					\
++	do {								\
++		if (__cond)						\
++			SCX_FAIL(__fmt, ##__VA_ARGS__);			\
++	} while (0)
++
++#define SCX_GT(_x, _y) SCX_FAIL_IF((_x) <= (_y), "Expected %s > %s (%lu > %lu)",	\
++				   #_x, #_y, (u64)(_x), (u64)(_y))
++#define SCX_GE(_x, _y) SCX_FAIL_IF((_x) < (_y), "Expected %s >= %s (%lu >= %lu)",	\
++				   #_x, #_y, (u64)(_x), (u64)(_y))
++#define SCX_LT(_x, _y) SCX_FAIL_IF((_x) >= (_y), "Expected %s < %s (%lu < %lu)",	\
++				   #_x, #_y, (u64)(_x), (u64)(_y))
++#define SCX_LE(_x, _y) SCX_FAIL_IF((_x) > (_y), "Expected %s <= %s (%lu <= %lu)",	\
++				   #_x, #_y, (u64)(_x), (u64)(_y))
++#define SCX_EQ(_x, _y) SCX_FAIL_IF((_x) != (_y), "Expected %s == %s (%lu == %lu)",	\
++				   #_x, #_y, (u64)(_x), (u64)(_y))
++#define SCX_ASSERT(_x) SCX_FAIL_IF(!(_x), "Expected %s to be true (%lu)",		\
++				   #_x, (u64)(_x))
++
++#define SCX_ECODE_VAL(__ecode) ({						\
++        u64 __val = 0;								\
++	bool __found = false;							\
++										\
++	__found = __COMPAT_read_enum("scx_exit_code", #__ecode, &__val);	\
++	SCX_ASSERT(__found);							\
++	(s64)__val;								\
++})
++
++#define SCX_KIND_VAL(__kind) ({							\
++        u64 __val = 0;								\
++	bool __found = false;							\
++										\
++	__found = __COMPAT_read_enum("scx_exit_kind", #__kind, &__val);		\
++	SCX_ASSERT(__found);							\
++	__val;									\
++})
++
++#endif  // # __SCX_TEST_H__
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+new file mode 100644
+index 000000000000..2ed2991afafe
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+@@ -0,0 +1,40 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++bool saw_local = false;
++
++static bool task_is_test(const struct task_struct *p)
++{
++	return !bpf_strncmp(p->comm, 9, "select_cpu");
++}
++
++void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p,
++		    u64 enq_flags)
++{
++	const struct cpumask *idle_mask = scx_bpf_get_idle_cpumask();
++
++	if (task_is_test(p) &&
++	    bpf_cpumask_test_cpu(scx_bpf_task_cpu(p), idle_mask)) {
++		saw_local = true;
++	}
++	scx_bpf_put_idle_cpumask(idle_mask);
++
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_dfl_ops = {
++	.enqueue		= select_cpu_dfl_enqueue,
++	.name			= "select_cpu_dfl",
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
+new file mode 100644
+index 000000000000..a53a40c2d2f0
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
+@@ -0,0 +1,72 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_dfl.bpf.skel.h"
++#include "scx_test.h"
++
++#define NUM_CHILDREN 1028
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_dfl *skel;
++
++	skel = select_cpu_dfl__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_dfl *skel = ctx;
++	struct bpf_link *link;
++	pid_t pids[NUM_CHILDREN];
++	int i, status;
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dfl_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		pids[i] = fork();
++		if (pids[i] == 0) {
++			sleep(1);
++			exit(0);
++		}
++	}
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
++		SCX_EQ(status, 0);
++	}
++
++	SCX_ASSERT(!skel->bss->saw_local);
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_dfl *skel = ctx;
++
++	select_cpu_dfl__destroy(skel);
++}
++
++struct scx_test select_cpu_dfl = {
++	.name = "select_cpu_dfl",
++	.description = "Verify the default ops.select_cpu() dispatches tasks "
++		       "when idles cores are found, and skips ops.enqueue()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_dfl)
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+new file mode 100644
+index 000000000000..4bb5abb2d369
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+@@ -0,0 +1,89 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation, and with the SCX_OPS_ENQ_DFL_NO_DISPATCH ops flag
++ * specified.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++bool saw_local = false;
++
++/* Per-task scheduling context */
++struct task_ctx {
++	bool	force_local;	/* CPU changed by ops.select_cpu() */
++};
++
++struct {
++	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
++	__uint(map_flags, BPF_F_NO_PREALLOC);
++	__type(key, int);
++	__type(value, struct task_ctx);
++} task_ctx_stor SEC(".maps");
++
++/* Manually specify the signature until the kfunc is added to the scx repo. */
++s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
++			   bool *found) __ksym;
++
++s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	struct task_ctx *tctx;
++	s32 cpu;
++
++	tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
++	if (!tctx) {
++		scx_bpf_error("task_ctx lookup failed");
++		return -ESRCH;
++	}
++
++	cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags,
++				     &tctx->force_local);
++
++	return cpu;
++}
++
++void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p,
++		    u64 enq_flags)
++{
++	u64 dsq_id = SCX_DSQ_GLOBAL;
++	struct task_ctx *tctx;
++
++	tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
++	if (!tctx) {
++		scx_bpf_error("task_ctx lookup failed");
++		return;
++	}
++
++	if (tctx->force_local) {
++		dsq_id = SCX_DSQ_LOCAL;
++		tctx->force_local = false;
++		saw_local = true;
++	}
++
++	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags);
++}
++
++s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task,
++		   struct task_struct *p, struct scx_init_task_args *args)
++{
++	if (bpf_task_storage_get(&task_ctx_stor, p, 0,
++				 BPF_LOCAL_STORAGE_GET_F_CREATE))
++		return 0;
++	else
++		return -ENOMEM;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_dfl_nodispatch_ops = {
++	.select_cpu		= select_cpu_dfl_nodispatch_select_cpu,
++	.enqueue		= select_cpu_dfl_nodispatch_enqueue,
++	.init_task		= select_cpu_dfl_nodispatch_init_task,
++	.name			= "select_cpu_dfl_nodispatch",
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
+new file mode 100644
+index 000000000000..1d85bf4bf3a3
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
+@@ -0,0 +1,72 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_dfl_nodispatch.bpf.skel.h"
++#include "scx_test.h"
++
++#define NUM_CHILDREN 1028
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_dfl_nodispatch *skel;
++
++	skel = select_cpu_dfl_nodispatch__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_dfl_nodispatch *skel = ctx;
++	struct bpf_link *link;
++	pid_t pids[NUM_CHILDREN];
++	int i, status;
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dfl_nodispatch_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		pids[i] = fork();
++		if (pids[i] == 0) {
++			sleep(1);
++			exit(0);
++		}
++	}
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
++		SCX_EQ(status, 0);
++	}
++
++	SCX_ASSERT(skel->bss->saw_local);
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_dfl_nodispatch *skel = ctx;
++
++	select_cpu_dfl_nodispatch__destroy(skel);
++}
++
++struct scx_test select_cpu_dfl_nodispatch = {
++	.name = "select_cpu_dfl_nodispatch",
++	.description = "Verify behavior of scx_bpf_select_cpu_dfl() in "
++		       "ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_dfl_nodispatch)
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+new file mode 100644
+index 000000000000..f0b96a4a04b2
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+@@ -0,0 +1,41 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	u64 dsq_id = SCX_DSQ_LOCAL;
++	s32 cpu = prev_cpu;
++
++	if (scx_bpf_test_and_clear_cpu_idle(cpu))
++		goto dispatch;
++
++	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++	if (cpu >= 0)
++		goto dispatch;
++
++	dsq_id = SCX_DSQ_GLOBAL;
++	cpu = prev_cpu;
++
++dispatch:
++	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0);
++	return cpu;
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_dispatch_ops = {
++	.select_cpu		= select_cpu_dispatch_select_cpu,
++	.name			= "select_cpu_dispatch",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
+new file mode 100644
+index 000000000000..0309ca8785b3
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_dispatch.bpf.skel.h"
++#include "scx_test.h"
++
++#define NUM_CHILDREN 1028
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_dispatch *skel;
++
++	skel = select_cpu_dispatch__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_dispatch *skel = ctx;
++	struct bpf_link *link;
++	pid_t pids[NUM_CHILDREN];
++	int i, status;
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		pids[i] = fork();
++		if (pids[i] == 0) {
++			sleep(1);
++			exit(0);
++		}
++	}
++
++	for (i = 0; i < NUM_CHILDREN; i++) {
++		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
++		SCX_EQ(status, 0);
++	}
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_dispatch *skel = ctx;
++
++	select_cpu_dispatch__destroy(skel);
++}
++
++struct scx_test select_cpu_dispatch = {
++	.name = "select_cpu_dispatch",
++	.description = "Test direct dispatching to built-in DSQs from "
++		       "ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_dispatch)
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+new file mode 100644
+index 000000000000..7b42ddce0f56
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+@@ -0,0 +1,37 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++UEI_DEFINE(uei);
++
++s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	/* Dispatching to a random DSQ should fail. */
++	scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0);
++
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = {
++	.select_cpu		= select_cpu_dispatch_bad_dsq_select_cpu,
++	.exit			= select_cpu_dispatch_bad_dsq_exit,
++	.name			= "select_cpu_dispatch_bad_dsq",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
+new file mode 100644
+index 000000000000..47eb6ed7627d
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_dispatch_bad_dsq.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_dispatch_bad_dsq *skel;
++
++	skel = select_cpu_dispatch_bad_dsq__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_dispatch_bad_dsq *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_bad_dsq_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	sleep(1);
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_dispatch_bad_dsq *skel = ctx;
++
++	select_cpu_dispatch_bad_dsq__destroy(skel);
++}
++
++struct scx_test select_cpu_dispatch_bad_dsq = {
++	.name = "select_cpu_dispatch_bad_dsq",
++	.description = "Verify graceful failure if we direct-dispatch to a "
++		       "bogus DSQ in ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_dispatch_bad_dsq)
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+new file mode 100644
+index 000000000000..653e3dc0b4dc
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+@@ -0,0 +1,38 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates the behavior of direct dispatching with a default
++ * select_cpu implementation.
++ *
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++UEI_DEFINE(uei);
++
++s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	/* Dispatching twice in a row is disallowed. */
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
++	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
++
++	return prev_cpu;
++}
++
++void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei)
++{
++	UEI_RECORD(uei, ei);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = {
++	.select_cpu		= select_cpu_dispatch_dbl_dsp_select_cpu,
++	.exit			= select_cpu_dispatch_dbl_dsp_exit,
++	.name			= "select_cpu_dispatch_dbl_dsp",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
+new file mode 100644
+index 000000000000..48ff028a3c46
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_dispatch_dbl_dsp.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_dispatch_dbl_dsp *skel;
++
++	skel = select_cpu_dispatch_dbl_dsp__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_dispatch_dbl_dsp *skel = ctx;
++	struct bpf_link *link;
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_dbl_dsp_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	sleep(1);
++
++	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_dispatch_dbl_dsp *skel = ctx;
++
++	select_cpu_dispatch_dbl_dsp__destroy(skel);
++}
++
++struct scx_test select_cpu_dispatch_dbl_dsp = {
++	.name = "select_cpu_dispatch_dbl_dsp",
++	.description = "Verify graceful failure if we dispatch twice to a "
++		       "DSQ in ops.select_cpu()",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_dispatch_dbl_dsp)
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+new file mode 100644
+index 000000000000..7f3ebf4fc2ea
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+@@ -0,0 +1,92 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * A scheduler that validates that enqueue flags are properly stored and
++ * applied at dispatch time when a task is directly dispatched from
++ * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and
++ * making the test a very basic vtime scheduler.
++ *
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++
++#include <scx/common.bpf.h>
++
++char _license[] SEC("license") = "GPL";
++
++volatile bool consumed;
++
++static u64 vtime_now;
++
++#define VTIME_DSQ 0
++
++static inline bool vtime_before(u64 a, u64 b)
++{
++	return (s64)(a - b) < 0;
++}
++
++static inline u64 task_vtime(const struct task_struct *p)
++{
++	u64 vtime = p->scx.dsq_vtime;
++
++	if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
++		return vtime_now - SCX_SLICE_DFL;
++	else
++		return vtime;
++}
++
++s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p,
++		   s32 prev_cpu, u64 wake_flags)
++{
++	s32 cpu;
++
++	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
++	if (cpu >= 0)
++		goto ddsp;
++
++	cpu = prev_cpu;
++	scx_bpf_test_and_clear_cpu_idle(cpu);
++ddsp:
++	scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
++	return cpu;
++}
++
++void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p)
++{
++	if (scx_bpf_consume(VTIME_DSQ))
++		consumed = true;
++}
++
++void BPF_STRUCT_OPS(select_cpu_vtime_running, struct task_struct *p)
++{
++	if (vtime_before(vtime_now, p->scx.dsq_vtime))
++		vtime_now = p->scx.dsq_vtime;
++}
++
++void BPF_STRUCT_OPS(select_cpu_vtime_stopping, struct task_struct *p,
++		    bool runnable)
++{
++	p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
++}
++
++void BPF_STRUCT_OPS(select_cpu_vtime_enable, struct task_struct *p)
++{
++	p->scx.dsq_vtime = vtime_now;
++}
++
++s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init)
++{
++	return scx_bpf_create_dsq(VTIME_DSQ, -1);
++}
++
++SEC(".struct_ops.link")
++struct sched_ext_ops select_cpu_vtime_ops = {
++	.select_cpu		= select_cpu_vtime_select_cpu,
++	.dispatch		= select_cpu_vtime_dispatch,
++	.running		= select_cpu_vtime_running,
++	.stopping		= select_cpu_vtime_stopping,
++	.enable			= select_cpu_vtime_enable,
++	.init			= select_cpu_vtime_init,
++	.name			= "select_cpu_vtime",
++	.timeout_ms		= 1000U,
++};
+diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
+new file mode 100644
+index 000000000000..b4629c2364f5
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
+@@ -0,0 +1,59 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include "select_cpu_vtime.bpf.skel.h"
++#include "scx_test.h"
++
++static enum scx_test_status setup(void **ctx)
++{
++	struct select_cpu_vtime *skel;
++
++	skel = select_cpu_vtime__open_and_load();
++	SCX_FAIL_IF(!skel, "Failed to open and load skel");
++	*ctx = skel;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	struct select_cpu_vtime *skel = ctx;
++	struct bpf_link *link;
++
++	SCX_ASSERT(!skel->bss->consumed);
++
++	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_vtime_ops);
++	SCX_FAIL_IF(!link, "Failed to attach scheduler");
++
++	sleep(1);
++
++	SCX_ASSERT(skel->bss->consumed);
++
++	bpf_link__destroy(link);
++
++	return SCX_TEST_PASS;
++}
++
++static void cleanup(void *ctx)
++{
++	struct select_cpu_vtime *skel = ctx;
++
++	select_cpu_vtime__destroy(skel);
++}
++
++struct scx_test select_cpu_vtime = {
++	.name = "select_cpu_vtime",
++	.description = "Test doing direct vtime-dispatching from "
++		       "ops.select_cpu(), to a non-built-in DSQ",
++	.setup = setup,
++	.run = run,
++	.cleanup = cleanup,
++};
++REGISTER_SCX_TEST(&select_cpu_vtime)
+diff --git a/tools/testing/selftests/sched_ext/test_example.c b/tools/testing/selftests/sched_ext/test_example.c
+new file mode 100644
+index 000000000000..ce36cdf03cdc
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/test_example.c
+@@ -0,0 +1,49 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <bpf/bpf.h>
++#include <scx/common.h>
++#include "scx_test.h"
++
++static bool setup_called = false;
++static bool run_called = false;
++static bool cleanup_called = false;
++
++static int context = 10;
++
++static enum scx_test_status setup(void **ctx)
++{
++	setup_called = true;
++	*ctx = &context;
++
++	return SCX_TEST_PASS;
++}
++
++static enum scx_test_status run(void *ctx)
++{
++	int *arg = ctx;
++
++	SCX_ASSERT(setup_called);
++	SCX_ASSERT(!run_called && !cleanup_called);
++	SCX_EQ(*arg, context);
++
++	run_called = true;
++	return SCX_TEST_PASS;
++}
++
++static void cleanup (void *ctx)
++{
++	SCX_BUG_ON(!run_called || cleanup_called, "Wrong callbacks invoked");
++}
++
++struct scx_test example = {
++	.name		= "example",
++	.description	= "Validate the basic function of the test suite itself",
++	.setup		= setup,
++	.run		= run,
++	.cleanup	= cleanup,
++};
++REGISTER_SCX_TEST(&example)
+diff --git a/tools/testing/selftests/sched_ext/util.c b/tools/testing/selftests/sched_ext/util.c
+new file mode 100644
+index 000000000000..e47769c91918
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/util.c
+@@ -0,0 +1,71 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
++ */
++#include <errno.h>
++#include <fcntl.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++
++/* Returns read len on success, or -errno on failure. */
++static ssize_t read_text(const char *path, char *buf, size_t max_len)
++{
++	ssize_t len;
++	int fd;
++
++	fd = open(path, O_RDONLY);
++	if (fd < 0)
++		return -errno;
++
++	len = read(fd, buf, max_len - 1);
++
++	if (len >= 0)
++		buf[len] = 0;
++
++	close(fd);
++	return len < 0 ? -errno : len;
++}
++
++/* Returns written len on success, or -errno on failure. */
++static ssize_t write_text(const char *path, char *buf, ssize_t len)
++{
++	int fd;
++	ssize_t written;
++
++	fd = open(path, O_WRONLY | O_APPEND);
++	if (fd < 0)
++		return -errno;
++
++	written = write(fd, buf, len);
++	close(fd);
++	return written < 0 ? -errno : written;
++}
++
++long file_read_long(const char *path)
++{
++	char buf[128];
++
++
++	if (read_text(path, buf, sizeof(buf)) <= 0)
++		return -1;
++
++	return atol(buf);
++}
++
++int file_write_long(const char *path, long val)
++{
++	char buf[64];
++	int ret;
++
++	ret = sprintf(buf, "%lu", val);
++	if (ret < 0)
++		return ret;
++
++	if (write_text(path, buf, sizeof(buf)) <= 0)
++		return -1;
++
++	return 0;
++}
+diff --git a/tools/testing/selftests/sched_ext/util.h b/tools/testing/selftests/sched_ext/util.h
+new file mode 100644
+index 000000000000..bc13dfec1267
+--- /dev/null
++++ b/tools/testing/selftests/sched_ext/util.h
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2024 David Vernet <void@manifault.com>
++ */
++
++#ifndef __SCX_TEST_UTIL_H__
++#define __SCX_TEST_UTIL_H__
++
++long file_read_long(const char *path);
++int file_write_long(const char *path, long val);
++
++#endif // __SCX_TEST_H__
+-- 
+2.47.0.rc0
diff --git a/patches/0003-bore-cachy-ext.patch b/patches/0003-bore-cachy-ext.patch
new file mode 100644
index 0000000..c1f3c3d
--- /dev/null
+++ b/patches/0003-bore-cachy-ext.patch
@@ -0,0 +1,1020 @@
+From e91f8d993bc2b1a1424cb2f5a931fe8f31eb97b9 Mon Sep 17 00:00:00 2001
+From: Eric Naim <dnaim@cachyos.org>
+Date: Tue, 8 Oct 2024 23:02:55 +0800
+Subject: [PATCH] bore-cachy-ext
+
+Signed-off-by: Eric Naim <dnaim@cachyos.org>
+---
+ include/linux/sched.h      |  20 +-
+ include/linux/sched/bore.h |  37 ++++
+ init/Kconfig               |  17 ++
+ kernel/Kconfig.hz          |  17 ++
+ kernel/fork.c              |   5 +
+ kernel/sched/Makefile      |   1 +
+ kernel/sched/bore.c        | 381 +++++++++++++++++++++++++++++++++++++
+ kernel/sched/core.c        |   7 +
+ kernel/sched/debug.c       |  60 +++++-
+ kernel/sched/fair.c        | 102 ++++++++--
+ kernel/sched/features.h    |   4 +
+ kernel/sched/sched.h       |   7 +
+ 12 files changed, 640 insertions(+), 18 deletions(-)
+ create mode 100644 include/linux/sched/bore.h
+ create mode 100644 kernel/sched/bore.c
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index c5a7901b2580..bab2d659b667 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -537,6 +537,14 @@ struct sched_statistics {
+ #endif /* CONFIG_SCHEDSTATS */
+ } ____cacheline_aligned;
+ 
++#ifdef CONFIG_SCHED_BORE
++struct sched_burst_cache {
++	u8				score;
++	u32				count;
++	u64				timestamp;
++};
++#endif // CONFIG_SCHED_BORE
++
+ struct sched_entity {
+ 	/* For load-balancing: */
+ 	struct load_weight		load;
+@@ -545,12 +553,22 @@ struct sched_entity {
+ 	u64				min_vruntime;
+ 
+ 	struct list_head		group_node;
+-	unsigned int			on_rq;
++	unsigned char			on_rq;
++	unsigned char			rel_deadline;
+ 
+ 	u64				exec_start;
+ 	u64				sum_exec_runtime;
+ 	u64				prev_sum_exec_runtime;
+ 	u64				vruntime;
++#ifdef CONFIG_SCHED_BORE
++	u64				burst_time;
++	u8				prev_burst_penalty;
++	u8				curr_burst_penalty;
++	u8				burst_penalty;
++	u8				burst_score;
++	struct sched_burst_cache child_burst;
++	struct sched_burst_cache group_burst;
++#endif // CONFIG_SCHED_BORE
+ 	s64				vlag;
+ 	u64				slice;
+ 
+diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
+new file mode 100644
+index 000000000000..12a613a94ff0
+--- /dev/null
++++ b/include/linux/sched/bore.h
+@@ -0,0 +1,37 @@
++
++#include <linux/sched.h>
++#include <linux/sched/cputime.h>
++
++#ifndef _LINUX_SCHED_BORE_H
++#define _LINUX_SCHED_BORE_H
++
++#ifdef CONFIG_SCHED_BORE
++extern u8   __read_mostly sched_bore;
++extern u8   __read_mostly sched_burst_exclude_kthreads;
++extern u8   __read_mostly sched_burst_smoothness_long;
++extern u8   __read_mostly sched_burst_smoothness_short;
++extern u8   __read_mostly sched_burst_fork_atavistic;
++extern u8   __read_mostly sched_burst_parity_threshold;
++extern u8   __read_mostly sched_burst_penalty_offset;
++extern uint __read_mostly sched_burst_penalty_scale;
++extern uint __read_mostly sched_burst_cache_lifetime;
++extern uint __read_mostly sched_deadline_boost_mask;
++
++extern void update_burst_score(struct sched_entity *se);
++extern void update_burst_penalty(struct sched_entity *se);
++
++extern void restart_burst(struct sched_entity *se);
++extern void restart_burst_rescale_deadline(struct sched_entity *se);
++
++extern int sched_bore_update_handler(const struct ctl_table *table, int write,
++		void __user *buffer, size_t *lenp, loff_t *ppos);
++
++extern void sched_clone_bore(
++	struct task_struct *p, struct task_struct *parent, u64 clone_flags);
++
++extern void init_task_bore(struct task_struct *p);
++
++extern void reweight_entity(
++	struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
++#endif // CONFIG_SCHED_BORE
++#endif // _LINUX_SCHED_BORE_H
+diff --git a/init/Kconfig b/init/Kconfig
+index e1a88d48d652..3aea8e43c360 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1327,6 +1327,23 @@ config CHECKPOINT_RESTORE
+ 
+ 	  If unsure, say N here.
+ 
++config SCHED_BORE
++	bool "Burst-Oriented Response Enhancer"
++	default y
++	help
++	  In Desktop and Mobile computing, one might prefer interactive
++	  tasks to keep responsive no matter what they run in the background.
++
++	  Enabling this kernel feature modifies the scheduler to discriminate
++	  tasks by their burst time (runtime since it last went sleeping or
++	  yielding state) and prioritize those that run less bursty.
++	  Such tasks usually include window compositor, widgets backend,
++	  terminal emulator, video playback, games and so on.
++	  With a little impact to scheduling fairness, it may improve
++	  responsiveness especially under heavy background workload.
++
++	  If unsure, say Y here.
++
+ config SCHED_AUTOGROUP
+ 	bool "Automatic process group scheduling"
+ 	select CGROUPS
+diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
+index 0f78364efd4f..83a6b919ab29 100644
+--- a/kernel/Kconfig.hz
++++ b/kernel/Kconfig.hz
+@@ -79,5 +79,22 @@ config HZ
+ 	default 750 if HZ_750
+ 	default 1000 if HZ_1000
+ 
++config MIN_BASE_SLICE_NS
++	int "Default value for min_base_slice_ns"
++	default 2000000
++	help
++	 The BORE Scheduler automatically calculates the optimal base
++	 slice for the configured HZ using the following equation:
++	 
++	 base_slice_ns =
++	 	1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
++	 
++	 This option sets the default lower bound limit of the base slice
++	 to prevent the loss of task throughput due to overscheduling.
++	 
++	 Setting this value too high can cause the system to boot with
++	 an unnecessarily large base slice, resulting in high scheduling
++	 latency and poor system responsiveness.
++
+ config SCHED_HRTICK
+ 	def_bool HIGH_RES_TIMERS
+diff --git a/kernel/fork.c b/kernel/fork.c
+index eb290420d926..8f060c73877b 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -116,6 +116,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <linux/sched/bore.h>
++
+ #include <trace/events/sched.h>
+ 
+ #define CREATE_TRACE_POINTS
+@@ -2351,6 +2353,9 @@ __latent_entropy struct task_struct *copy_process(
+ 	retval = sched_fork(clone_flags, p);
+ 	if (retval)
+ 		goto bad_fork_cleanup_policy;
++#ifdef CONFIG_SCHED_BORE
++	sched_clone_bore(p, current, clone_flags);
++#endif // CONFIG_SCHED_BORE
+ 
+ 	retval = perf_event_init_task(p, clone_flags);
+ 	if (retval)
+diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
+index 976092b7bd45..293aad675444 100644
+--- a/kernel/sched/Makefile
++++ b/kernel/sched/Makefile
+@@ -32,3 +32,4 @@ obj-y += core.o
+ obj-y += fair.o
+ obj-y += build_policy.o
+ obj-y += build_utility.o
++obj-y += bore.o
+diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
+new file mode 100644
+index 000000000000..cd7e8a8d6075
+--- /dev/null
++++ b/kernel/sched/bore.c
+@@ -0,0 +1,381 @@
++/*
++ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
++ *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
++ */
++#include <linux/cpuset.h>
++#include <linux/sched/bore.h>
++#include "sched.h"
++
++#ifdef CONFIG_SCHED_BORE
++u8   __read_mostly sched_bore                   = 1;
++u8   __read_mostly sched_burst_exclude_kthreads = 1;
++u8   __read_mostly sched_burst_smoothness_long  = 1;
++u8   __read_mostly sched_burst_smoothness_short = 0;
++u8   __read_mostly sched_burst_fork_atavistic   = 2;
++u8   __read_mostly sched_burst_parity_threshold = 2;
++u8   __read_mostly sched_burst_penalty_offset   = 24;
++uint __read_mostly sched_burst_penalty_scale    = 1280;
++uint __read_mostly sched_burst_cache_lifetime   = 60000000;
++uint __read_mostly sched_deadline_boost_mask    = ENQUEUE_INITIAL
++                                                | ENQUEUE_WAKEUP;
++static int __maybe_unused sixty_four     = 64;
++static int __maybe_unused maxval_u8      = 255;
++static int __maybe_unused maxval_12_bits = 4095;
++
++#define MAX_BURST_PENALTY (39U <<2)
++
++static inline u32 log2plus1_u64_u32f8(u64 v) {
++	u32 integral = fls64(v);
++	u8  fractional = v << (64 - integral) >> 55;
++	return integral << 8 | fractional;
++}
++
++static inline u32 calc_burst_penalty(u64 burst_time) {
++	u32 greed, tolerance, penalty, scaled_penalty;
++	
++	greed = log2plus1_u64_u32f8(burst_time);
++	tolerance = sched_burst_penalty_offset << 8;
++	penalty = max(0, (s32)(greed - tolerance));
++	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
++
++	return min(MAX_BURST_PENALTY, scaled_penalty);
++}
++
++static inline u64 __scale_slice(u64 delta, u8 score)
++{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);}
++
++static inline u64 __unscale_slice(u64 delta, u8 score)
++{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);}
++
++static void reweight_task_by_prio(struct task_struct *p, int prio) {
++	struct sched_entity *se = &p->se;
++	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
++
++	reweight_entity(cfs_rq_of(se), se, weight);
++	se->load.inv_weight = sched_prio_to_wmult[prio];
++}
++
++static inline u8 effective_prio(struct task_struct *p) {
++	u8 prio = p->static_prio - MAX_RT_PRIO;
++	if (likely(sched_bore))
++		prio += p->se.burst_score;
++	return min(39, prio);
++}
++
++void update_burst_score(struct sched_entity *se) {
++	if (!entity_is_task(se)) return;
++	struct task_struct *p = task_of(se);
++	u8 prev_prio = effective_prio(p);
++
++	u8 burst_score = 0;
++	if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads)))
++		burst_score = se->burst_penalty >> 2;
++	se->burst_score = burst_score;
++
++	u8 new_prio = effective_prio(p);
++	if (new_prio != prev_prio)
++		reweight_task_by_prio(p, new_prio);
++}
++
++void update_burst_penalty(struct sched_entity *se) {
++	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
++	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
++	update_burst_score(se);
++}
++
++static inline u32 binary_smooth(u32 new, u32 old) {
++	int increment = new - old;
++	return (0 <= increment)?
++		old + ( increment >> (int)sched_burst_smoothness_long):
++		old - (-increment >> (int)sched_burst_smoothness_short);
++}
++
++static void revolve_burst_penalty(struct sched_entity *se) {
++	se->prev_burst_penalty =
++		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
++	se->burst_time = 0;
++	se->curr_burst_penalty = 0;
++}
++
++inline void restart_burst(struct sched_entity *se) {
++	revolve_burst_penalty(se);
++	se->burst_penalty = se->prev_burst_penalty;
++	update_burst_score(se);
++}
++
++void restart_burst_rescale_deadline(struct sched_entity *se) {
++	s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
++	struct task_struct *p = task_of(se);
++	u8 prev_prio = effective_prio(p);
++	restart_burst(se);
++	u8 new_prio = effective_prio(p);
++	if (prev_prio > new_prio) {
++		wremain = __unscale_slice(abs(vremain), prev_prio);
++		vscaled = __scale_slice(wremain, new_prio);
++		if (unlikely(vremain < 0))
++			vscaled = -vscaled;
++		se->deadline = se->vruntime + vscaled;
++	}
++}
++
++static inline bool task_is_bore_eligible(struct task_struct *p)
++{return p->sched_class == &fair_sched_class;}
++
++static void reset_task_weights_bore(void) {
++	struct task_struct *task;
++	struct rq *rq;
++	struct rq_flags rf;
++
++	write_lock_irq(&tasklist_lock);
++	for_each_process(task) {
++		if (!task_is_bore_eligible(task)) continue;
++		rq = task_rq(task);
++		rq_lock_irqsave(rq, &rf);
++		reweight_task_by_prio(task, effective_prio(task));
++		rq_unlock_irqrestore(rq, &rf);
++	}
++	write_unlock_irq(&tasklist_lock);
++}
++
++int sched_bore_update_handler(const struct ctl_table *table, int write,
++		void __user *buffer, size_t *lenp, loff_t *ppos) {
++	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
++	if (ret || !write)
++		return ret;
++
++	reset_task_weights_bore();
++
++	return 0;
++}
++
++static u32 count_child_tasks(struct task_struct *p) {
++	struct task_struct *child;
++	u32 cnt = 0;
++	list_for_each_entry(child, &p->children, sibling) {cnt++;}
++	return cnt;
++}
++
++static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now)
++{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;}
++
++static void update_burst_cache(struct sched_burst_cache *bc,
++		struct task_struct *p, u32 cnt, u32 sum, u64 now) {
++	u8 avg = cnt ? sum / cnt : 0;
++	bc->score = max(avg, p->se.burst_penalty);
++	bc->count = cnt;
++	bc->timestamp = now;
++}
++
++static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
++	u32 cnt = 0, sum = 0;
++	struct task_struct *child;
++
++	list_for_each_entry(child, &p->children, sibling) {
++		if (!task_is_bore_eligible(child)) continue;
++		cnt++;
++		sum += child->se.burst_penalty;
++	}
++
++	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
++}
++
++static inline u8 inherit_burst_direct(struct task_struct *p, u64 now) {
++	struct task_struct *parent = p;
++	if (burst_cache_expired(&parent->se.child_burst, now))
++		update_child_burst_direct(parent, now);
++
++	return parent->se.child_burst.score;
++}
++
++static void update_child_burst_topological(
++	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
++	u32 cnt = 0, dcnt = 0, sum = 0;
++	struct task_struct *child, *dec;
++
++	list_for_each_entry(child, &p->children, sibling) {
++		dec = child;
++		while ((dcnt = count_child_tasks(dec)) == 1)
++			dec = list_first_entry(&dec->children, struct task_struct, sibling);
++		
++		if (!dcnt || !depth) {
++			if (!task_is_bore_eligible(dec)) continue;
++			cnt++;
++			sum += dec->se.burst_penalty;
++			continue;
++		}
++		if (!burst_cache_expired(&dec->se.child_burst, now)) {
++			cnt += dec->se.child_burst.count;
++			sum += (u32)dec->se.child_burst.score * dec->se.child_burst.count;
++			continue;
++		}
++		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
++	}
++
++	update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
++	*acnt += cnt;
++	*asum += sum;
++}
++
++static inline u8 inherit_burst_topological(struct task_struct *p, u64 now) {
++	struct task_struct *anc = p;
++	u32 cnt = 0, sum = 0;
++
++	while (anc->real_parent != anc && count_child_tasks(anc) == 1)
++		anc = anc->real_parent;
++
++	if (burst_cache_expired(&anc->se.child_burst, now))
++		update_child_burst_topological(
++			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
++
++	return anc->se.child_burst.score;
++}
++
++static inline void update_tg_burst(struct task_struct *p, u64 now) {
++	struct task_struct *task;
++	u32 cnt = 0, sum = 0;
++
++	for_each_thread(p, task) {
++		if (!task_is_bore_eligible(task)) continue;
++		cnt++;
++		sum += task->se.burst_penalty;
++	}
++
++	update_burst_cache(&p->se.group_burst, p, cnt, sum, now);
++}
++
++static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
++	struct task_struct *parent = p->group_leader;
++	if (burst_cache_expired(&parent->se.group_burst, now))
++		update_tg_burst(parent, now);
++
++	return parent->se.group_burst.score;
++}
++
++void sched_clone_bore(
++	struct task_struct *p, struct task_struct *parent, u64 clone_flags) {
++	if (!task_is_bore_eligible(p)) return;
++
++	u64 now = ktime_get_ns();
++	read_lock(&tasklist_lock);
++	u8 penalty = (clone_flags & CLONE_THREAD) ?
++		inherit_burst_tg(parent, now) :
++		likely(sched_burst_fork_atavistic) ?
++			inherit_burst_topological(parent, now):
++			inherit_burst_direct(parent, now);
++	read_unlock(&tasklist_lock);
++
++	struct sched_entity *se = &p->se;
++	revolve_burst_penalty(se);
++	se->burst_penalty = se->prev_burst_penalty =
++		max(se->prev_burst_penalty, penalty);
++	se->child_burst.timestamp = 0;
++	se->group_burst.timestamp = 0;
++}
++
++void init_task_bore(struct task_struct *p) {
++	p->se.burst_time = 0;
++	p->se.prev_burst_penalty = 0;
++	p->se.curr_burst_penalty = 0;
++	p->se.burst_penalty = 0;
++	p->se.burst_score = 0;
++	memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache));
++	memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache));
++}
++
++#ifdef CONFIG_SYSCTL
++static struct ctl_table sched_bore_sysctls[] = {
++	{
++		.procname	= "sched_bore",
++		.data		= &sched_bore,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = sched_bore_update_handler,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_exclude_kthreads",
++		.data		= &sched_burst_exclude_kthreads,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_smoothness_long",
++		.data		= &sched_burst_smoothness_long,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_smoothness_short",
++		.data		= &sched_burst_smoothness_short,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_fork_atavistic",
++		.data		= &sched_burst_fork_atavistic,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_THREE,
++	},
++	{
++		.procname	= "sched_burst_parity_threshold",
++		.data		= &sched_burst_parity_threshold,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &maxval_u8,
++	},
++	{
++		.procname	= "sched_burst_penalty_offset",
++		.data		= &sched_burst_penalty_offset,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &sixty_four,
++	},
++	{
++		.procname	= "sched_burst_penalty_scale",
++		.data		= &sched_burst_penalty_scale,
++		.maxlen		= sizeof(uint),
++		.mode		= 0644,
++		.proc_handler = proc_douintvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &maxval_12_bits,
++	},
++	{
++		.procname	= "sched_burst_cache_lifetime",
++		.data		= &sched_burst_cache_lifetime,
++		.maxlen		= sizeof(uint),
++		.mode		= 0644,
++		.proc_handler = proc_douintvec,
++	},
++	{
++		.procname	= "sched_deadline_boost_mask",
++		.data		= &sched_deadline_boost_mask,
++		.maxlen		= sizeof(uint),
++		.mode		= 0644,
++		.proc_handler = proc_douintvec,
++	},
++};
++
++static int __init sched_bore_sysctl_init(void) {
++	register_sysctl_init("kernel", sched_bore_sysctls);
++	return 0;
++}
++late_initcall(sched_bore_sysctl_init);
++#endif // CONFIG_SYSCTL
++#endif // CONFIG_SCHED_BORE
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8ae04bd4a5a4..4aa992f99c36 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -97,6 +97,8 @@
+ #include "../../io_uring/io-wq.h"
+ #include "../smpboot.h"
+ 
++#include <linux/sched/bore.h>
++
+ EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
+ 
+@@ -8290,6 +8292,11 @@ void __init sched_init(void)
+ 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
+ #endif
+ 
++#ifdef CONFIG_SCHED_BORE
++	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.6.1 by Masahito Suzuki");
++	init_task_bore(&init_task);
++#endif // CONFIG_SCHED_BORE
++
+ 	wait_bit_init();
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index c057ef46c5f8..3cab39e34824 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -167,7 +167,52 @@ static const struct file_operations sched_feat_fops = {
+ };
+ 
+ #ifdef CONFIG_SMP
++#ifdef CONFIG_SCHED_BORE
++static ssize_t sched_min_base_slice_write(struct file *filp, const char __user *ubuf,
++				   size_t cnt, loff_t *ppos)
++{
++	char buf[16];
++	unsigned int value;
++
++	if (cnt > 15)
++		cnt = 15;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++	buf[cnt] = '\0';
++
++	if (kstrtouint(buf, 10, &value))
++		return -EINVAL;
+ 
++	if (!value)
++		return -EINVAL;
++
++	sysctl_sched_min_base_slice = value;
++	sched_update_min_base_slice();
++
++	*ppos += cnt;
++	return cnt;
++}
++
++static int sched_min_base_slice_show(struct seq_file *m, void *v)
++{
++	seq_printf(m, "%d\n", sysctl_sched_min_base_slice);
++	return 0;
++}
++
++static int sched_min_base_slice_open(struct inode *inode, struct file *filp)
++{
++	return single_open(filp, sched_min_base_slice_show, NULL);
++}
++
++static const struct file_operations sched_min_base_slice_fops = {
++	.open		= sched_min_base_slice_open,
++	.write		= sched_min_base_slice_write,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++#else // !CONFIG_SCHED_BORE
+ static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
+ 				   size_t cnt, loff_t *ppos)
+ {
+@@ -213,7 +258,7 @@ static const struct file_operations sched_scaling_fops = {
+ 	.llseek		= seq_lseek,
+ 	.release	= single_release,
+ };
+-
++#endif // CONFIG_SCHED_BORE
+ #endif /* SMP */
+ 
+ #ifdef CONFIG_PREEMPT_DYNAMIC
+@@ -347,13 +392,20 @@ static __init int sched_init_debug(void)
+ 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
+ #endif
+ 
++#ifdef CONFIG_SCHED_BORE
++	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
++	debugfs_create_u32("base_slice_ns", 0400, debugfs_sched, &sysctl_sched_base_slice);
++#else // !CONFIG_SCHED_BORE
+ 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
++#endif // CONFIG_SCHED_BORE
+ 
+ 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
+ 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
+ 
+ #ifdef CONFIG_SMP
++#if !defined(CONFIG_SCHED_BORE)
+ 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
++#endif // CONFIG_SCHED_BORE
+ 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
+ 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
+ 
+@@ -596,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
+ 
++#ifdef CONFIG_SCHED_BORE
++	SEQ_printf(m, " %2d", p->se.burst_score);
++#endif // CONFIG_SCHED_BORE
+ #ifdef CONFIG_NUMA_BALANCING
+ 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
+ #endif
+@@ -1069,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ 
+ 	P(se.load.weight);
+ #ifdef CONFIG_SMP
++#ifdef CONFIG_SCHED_BORE
++	P(se.burst_score);
++#endif // CONFIG_SCHED_BORE
+ 	P(se.avg.load_sum);
+ 	P(se.avg.runnable_sum);
+ 	P(se.avg.util_sum);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a36e37a674e8..603d72b9e6e8 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -55,6 +55,8 @@
+ #include "stats.h"
+ #include "autogroup.h"
+ 
++#include <linux/sched/bore.h>
++
+ /*
+  * The initial- and re-scaling of tunables is configurable
+  *
+@@ -64,28 +66,31 @@
+  *   SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
+  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
+  *
+- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
++ * (BORE  default SCHED_TUNABLESCALING_NONE = *1 constant)
++ * (EEVDF default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
+  */
++#ifdef CONFIG_SCHED_BORE
++unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
++#else // !CONFIG_SCHED_BORE
+ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
++#endif // CONFIG_SCHED_BORE
+ 
+ /*
+  * Minimal preemption granularity for CPU-bound tasks:
+  *
+- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
++ * (BORE  default: max(1 sec / HZ, min_base_slice) constant, units: nanoseconds)
++ * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
+  */
+-#ifdef CONFIG_CACHY
+-unsigned int sysctl_sched_base_slice			= 350000ULL;
+-static unsigned int normalized_sysctl_sched_base_slice	= 350000ULL;
+-#else
++#ifdef CONFIG_SCHED_BORE
++unsigned int            sysctl_sched_base_slice = 1000000000ULL / HZ;
++static unsigned int configured_sched_base_slice = 1000000000ULL / HZ;
++unsigned int        sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS;
++#else // !CONFIG_SCHED_BORE
+ unsigned int sysctl_sched_base_slice			= 750000ULL;
+ static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
+-#endif
++#endif // CONFIG_SCHED_BORE
+ 
+-#ifdef CONFIG_CACHY
+-const_debug unsigned int sysctl_sched_migration_cost	= 300000UL;
+-#else
+ const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
+-#endif
+ 
+ static int __init setup_sched_thermal_decay_shift(char *str)
+ {
+@@ -130,12 +135,8 @@ int __weak arch_asym_cpu_priority(int cpu)
+  *
+  * (default: 5 msec, units: microseconds)
+  */
+-#ifdef CONFIG_CACHY
+-static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
+-#else
+ static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
+ #endif
+-#endif
+ 
+ #ifdef CONFIG_NUMA_BALANCING
+ /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
+@@ -201,6 +202,18 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
+  *
+  * This idea comes from the SD scheduler of Con Kolivas:
+  */
++#ifdef CONFIG_SCHED_BORE
++static void update_sysctl(void) {
++	unsigned int base_slice = configured_sched_base_slice;
++	unsigned int min_base_slice = sysctl_sched_min_base_slice;
++
++	if (min_base_slice)
++		base_slice *= DIV_ROUND_UP(min_base_slice, base_slice);
++
++	sysctl_sched_base_slice = base_slice;
++}
++void sched_update_min_base_slice(void) { update_sysctl(); }
++#else // !CONFIG_SCHED_BORE
+ static unsigned int get_update_sysctl_factor(void)
+ {
+ 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
+@@ -231,6 +244,7 @@ static void update_sysctl(void)
+ 	SET_SYSCTL(sched_base_slice);
+ #undef SET_SYSCTL
+ }
++#endif // CONFIG_SCHED_BORE
+ 
+ void __init sched_init_granularity(void)
+ {
+@@ -708,6 +722,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
+ 
+ 	vlag = avruntime - se->vruntime;
+ 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
++#ifdef CONFIG_SCHED_BORE
++	limit >>= !!sched_bore;
++#endif // CONFIG_SCHED_BORE
+ 
+ 	return clamp(vlag, -limit, limit);
+ }
+@@ -909,6 +926,10 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+ 	 * until it gets a new slice. See the HACK in set_next_entity().
+ 	 */
+ 	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
++#ifdef CONFIG_SCHED_BORE
++		if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) &&
++			sched_burst_parity_threshold < cfs_rq->nr_running))
++#endif // CONFIG_SCHED_BORE
+ 		return curr;
+ 
+ 	/* Pick the leftmost entity if it's eligible */
+@@ -967,6 +988,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+  * Scheduling class statistics methods:
+  */
+ #ifdef CONFIG_SMP
++#if !defined(CONFIG_SCHED_BORE)
+ int sched_update_scaling(void)
+ {
+ 	unsigned int factor = get_update_sysctl_factor();
+@@ -978,6 +1000,7 @@ int sched_update_scaling(void)
+ 
+ 	return 0;
+ }
++#endif // CONFIG_SCHED_BORE
+ #endif
+ #endif
+ 
+@@ -1178,6 +1201,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ 	if (unlikely(delta_exec <= 0))
+ 		return;
+ 
++#ifdef CONFIG_SCHED_BORE
++	curr->burst_time += delta_exec;
++	update_burst_penalty(curr);
++#endif // CONFIG_SCHED_BORE
+ 	curr->vruntime += calc_delta_fair(delta_exec, curr);
+ 	update_deadline(cfs_rq, curr);
+ 	update_min_vruntime(cfs_rq);
+@@ -3804,7 +3831,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
+ 	se->deadline = avruntime + vslice;
+ }
+ 
+-static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
++void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ 			    unsigned long weight)
+ {
+ 	bool curr = cfs_rq->curr == se;
+@@ -5212,6 +5239,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 	 *
+ 	 * EEVDF: placement strategy #1 / #2
+ 	 */
++#ifdef CONFIG_SCHED_BORE
++	if (se->vlag)
++#endif // CONFIG_SCHED_BORE
+ 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
+ 		struct sched_entity *curr = cfs_rq->curr;
+ 		unsigned long load;
+@@ -5282,6 +5312,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 
+ 	se->vruntime = vruntime - lag;
+ 
++	if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
++		se->deadline += se->vruntime;
++		se->rel_deadline = 0;
++		return;
++	}
++#ifdef CONFIG_SCHED_BORE
++	else if (likely(sched_bore))
++		vslice >>= !!(flags & sched_deadline_boost_mask);
++	else
++#endif // CONFIG_SCHED_BORE
+ 	/*
+ 	 * When joining the competition; the existing tasks will be,
+ 	 * on average, halfway through their slice, as such start tasks
+@@ -5391,6 +5431,7 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+ static void
+ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
++	bool sleep = flags & DEQUEUE_SLEEP;
+ 	int action = UPDATE_TG;
+ 
+ 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
+@@ -5418,6 +5459,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 	clear_buddies(cfs_rq, se);
+ 
+ 	update_entity_lag(cfs_rq, se);
++	if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
++		se->deadline -= se->vruntime;
++		se->rel_deadline = 1;
++	}
++
+ 	if (se != cfs_rq->curr)
+ 		__dequeue_entity(cfs_rq, se);
+ 	se->on_rq = 0;
+@@ -6869,6 +6915,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 	bool was_sched_idle = sched_idle_rq(rq);
+ 
+ 	util_est_dequeue(&rq->cfs, p);
++#ifdef CONFIG_SCHED_BORE
++	if (task_sleep) {
++		cfs_rq = cfs_rq_of(se);
++		if (cfs_rq->curr == se)
++			update_curr(cfs_rq);
++		restart_burst(se);
++	}
++#endif // CONFIG_SCHED_BORE
+ 
+ 	for_each_sched_entity(se) {
+ 		cfs_rq = cfs_rq_of(se);
+@@ -8651,16 +8705,25 @@ static void yield_task_fair(struct rq *rq)
+ 	/*
+ 	 * Are we the only task in the tree?
+ 	 */
++#if !defined(CONFIG_SCHED_BORE)
+ 	if (unlikely(rq->nr_running == 1))
+ 		return;
+ 
+ 	clear_buddies(cfs_rq, se);
++#endif // CONFIG_SCHED_BORE
+ 
+ 	update_rq_clock(rq);
+ 	/*
+ 	 * Update run-time statistics of the 'current'.
+ 	 */
+ 	update_curr(cfs_rq);
++#ifdef CONFIG_SCHED_BORE
++	restart_burst_rescale_deadline(se);
++	if (unlikely(rq->nr_running == 1))
++		return;
++
++	clear_buddies(cfs_rq, se);
++#endif // CONFIG_SCHED_BORE
+ 	/*
+ 	 * Tell update_rq_clock() that we've just updated,
+ 	 * so we don't do microscopic update in schedule()
+@@ -12725,6 +12788,9 @@ static void task_fork_fair(struct task_struct *p)
+ 	curr = cfs_rq->curr;
+ 	if (curr)
+ 		update_curr(cfs_rq);
++#ifdef CONFIG_SCHED_BORE
++	update_burst_score(se);
++#endif // CONFIG_SCHED_BORE
+ 	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
+ 	rq_unlock(rq, &rf);
+ }
+@@ -12837,6 +12903,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
+ 
+ static void switched_from_fair(struct rq *rq, struct task_struct *p)
+ {
++	p->se.rel_deadline = 0;
++#ifdef CONFIG_SCHED_BORE
++	init_task_bore(p);
++#endif // CONFIG_SCHED_BORE
+ 	detach_task_cfs_rq(p);
+ }
+ 
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index 143f55df890b..e97b7b68bdd3 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -6,6 +6,10 @@
+  */
+ SCHED_FEAT(PLACE_LAG, true)
+ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
++/*
++ * Preserve relative virtual deadline on 'migration'.
++ */
++SCHED_FEAT(PLACE_REL_DEADLINE, true)
+ SCHED_FEAT(RUN_TO_PARITY, true)
+ 
+ /*
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 48d893de632b..62e7e9e5fd9c 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2065,7 +2065,11 @@ static inline void update_sched_domain_debugfs(void) { }
+ static inline void dirty_sched_domain_sysctl(int cpu) { }
+ #endif
+ 
++#ifdef CONFIG_SCHED_BORE
++extern void sched_update_min_base_slice(void);
++#else // !CONFIG_SCHED_BORE
+ extern int sched_update_scaling(void);
++#endif // CONFIG_SCHED_BORE
+ 
+ static inline const struct cpumask *task_user_cpus(struct task_struct *p)
+ {
+@@ -2738,6 +2742,9 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
+ extern const_debug unsigned int sysctl_sched_migration_cost;
+ 
+ extern unsigned int sysctl_sched_base_slice;
++#ifdef CONFIG_SCHED_BORE
++extern unsigned int sysctl_sched_min_base_slice;
++#endif // CONFIG_SCHED_BORE
+ 
+ #ifdef CONFIG_SCHED_DEBUG
+ extern int sysctl_resched_latency_warn_ms;
+-- 
+2.47.0
diff --git a/patches/series b/patches/series
new file mode 100644
index 0000000..6fc82b3
--- /dev/null
+++ b/patches/series
@@ -0,0 +1,3 @@
+0001-cachyos-base-all.patch
+0002-sched-ext.patch
+0003-bore-cachy-ext.patch
\ No newline at end of file
diff --git a/release.sh b/release.sh
new file mode 100755
index 0000000..28de1f2
--- /dev/null
+++ b/release.sh
@@ -0,0 +1,2 @@
+# send debs to server
+rsync -azP --include './' --include '*.deb' --exclude '*' ./output/ ferreo@direct.pika-os.com:/srv/www/cockatiel-incoming/       
\ No newline at end of file
diff --git a/scripts/build.sh b/scripts/build.sh
new file mode 100755
index 0000000..e0a4474
--- /dev/null
+++ b/scripts/build.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+echo "Pika Kernel - Building"
+
+make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-101pika4
diff --git a/scripts/config.sh b/scripts/config.sh
new file mode 100755
index 0000000..0817a2e
--- /dev/null
+++ b/scripts/config.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+echo "Pika Kernel - Applying configuration"
+
+cp ../config .config
+
+scripts/config -k -d CONFIG_GENERIC_CPU
+scripts/config -k -e CONFIG_GENERIC_CPU3
+scripts/config -e CACHY
+scripts/config -e SCHED_BORE
+
+scripts/config -e HZ_1000 --set-val HZ 1000
+scripts/config -d HZ_PERIODIC -d NO_HZ_IDLE -d CONTEXT_TRACKING_FORCE -e NO_HZ_FULL_NODEF -e NO_HZ_FULL -e NO_HZ -e NO_HZ_COMMON -e CONTEXT_TRACKING
+scripts/config -e PREEMPT_BUILD -d PREEMPT_NONE -d PREEMPT_VOLUNTARY -e PREEMPT -e PREEMPT_COUNT -e PREEMPTION -e PREEMPT_DYNAMIC
+
+scripts/config -d CC_OPTIMIZE_FOR_PERFORMANCE \
+            -e CC_OPTIMIZE_FOR_PERFORMANCE_O3
+
+scripts/config -m TCP_CONG_CUBIC \
+            -d DEFAULT_CUBIC \
+            -e TCP_CONG_BBR \
+            -e DEFAULT_BBR \
+            --set-str DEFAULT_TCP_CONG bbr
+
+scripts/config -e SCHED_CLASS_EXT
+
+scripts/config -e LRU_GEN -e LRU_GEN_ENABLED -d LRU_GEN_STATS
+
+scripts/config -d TRANSPARENT_HUGEPAGE_MADVISE -e TRANSPARENT_HUGEPAGE_ALWAYS
+
+scripts/config -e PER_VMA_LOCK -d PER_VMA_LOCK_STATS
+
+scripts/config -e DAMON \
+            -e DAMON_VADDR \
+            -e DAMON_DBGFS \
+            -e DAMON_SYSFS \
+            -e DAMON_PADDR \
+            -e DAMON_RECLAIM \
+            -e DAMON_LRU_SORT
+
+scripts/config --set-val MODULE_COMPRESS_ZSTD_LEVEL 19 -e MODULE_COMPRESS_ZSTD_ULTRA --set-val MODULE_COMPRESS_ZSTD_LEVEL_ULTRA 22 --set-val ZSTD_COMP_VAL 22
+
+scripts/config -e EFI_HANDOVER_PROTOCOL
+
+scripts/config -e USER_NS
+
+make prepare
diff --git a/scripts/output.sh b/scripts/output.sh
new file mode 100755
index 0000000..7dbe8a2
--- /dev/null
+++ b/scripts/output.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+echo "Pika Kernel - Copying Output"
+
+cd ..
+rm ./linux-libc*.deb
+
+for f in *.deb; 
+do
+    cp $f ./output/$f
+done
diff --git a/scripts/patch.sh b/scripts/patch.sh
new file mode 100755
index 0000000..f30424b
--- /dev/null
+++ b/scripts/patch.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo "Pika Kernel - Applying patches"
+
+if [ -f ../patches/series ]
+then
+    for i in $(cat ../patches/series | grep -v '^#') ; do echo "Applying Patch: $i" && patch -Np1 -i ../patches/$i || bash -c "echo "Applying Patch $i Failed!" && exit 2"; done
+fi
diff --git a/scripts/source.sh b/scripts/source.sh
new file mode 100755
index 0000000..481f9af
--- /dev/null
+++ b/scripts/source.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo "Pika Kernel - Getting source"
+
+wget -nv https://cdn.kernel.org/pub/linux/kernel/v"$(echo $(cat ./VERSION) | cut -f1 -d".")".x/linux-"$(cat ./VERSION)".tar.gz
+tar -xf ./linux-"$(cat ./VERSION)".tar.gz
+
+cd linux-"$(cat ./VERSION)"