From 2fd49cc4758d3ce835e4e4f574113d9b7958bc48 Mon Sep 17 00:00:00 2001
From: ferrreo <harderthanfire@gmail.com>
Date: Sat, 16 Mar 2024 10:46:26 +0000
Subject: [PATCH] 6.8.1 stable

---
 .github/workflows/release.yml               |    2 -
 VERSION                                     |    2 +-
 config                                      |  116 +-
 patches/cachyos/0001-bore-cachy.patch       |  180 +-
 patches/cachyos/0001-cachyos-base-all.patch |  336 ++-
 patches/cachyos/0003-nvidia.patch           |  230 ++
 patches/cachyos/0004-intel.patch            | 2203 +++++++++++++++++++
 patches/series                              |    2 +
 scripts/source.sh                           |    5 +-
 9 files changed, 2873 insertions(+), 203 deletions(-)
 create mode 100644 patches/cachyos/0003-nvidia.patch
 create mode 100644 patches/cachyos/0004-intel.patch

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4c32f4c..52c984d 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -14,8 +14,6 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-      with:
-        ref: 6.8RC
     
     - name: Import GPG key
       id: import_gpg
diff --git a/VERSION b/VERSION
index 1269a95..5f6c086 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-6.8-rc6
+6.8.1
diff --git a/config b/config
index 8bd93d6..53b954e 100644
--- a/config
+++ b/config
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 6.8.0-rc6 Kernel Configuration
+# Linux/x86 6.8.1 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.1 20230801"
 CONFIG_CC_IS_GCC=y
@@ -16,10 +16,11 @@ CONFIG_CC_CAN_LINK=y
 CONFIG_CC_CAN_LINK_STATIC=y
 CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
 CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
+CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y
 CONFIG_TOOLS_SUPPORT_RELR=y
 CONFIG_CC_HAS_ASM_INLINE=y
 CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
-CONFIG_PAHOLE_VERSION=125
+CONFIG_PAHOLE_VERSION=126
 CONFIG_IRQ_WORK=y
 CONFIG_BUILDTIME_TABLE_SORT=y
 CONFIG_THREAD_INFO_IN_TASK=y
@@ -137,6 +138,7 @@ CONFIG_PREEMPT_COUNT=y
 CONFIG_PREEMPTION=y
 CONFIG_PREEMPT_DYNAMIC=y
 CONFIG_SCHED_CORE=y
+# CONFIG_SCHED_CLASS_EXT is not set
 
 #
 # CPU/Task time and stats accounting
@@ -199,17 +201,16 @@ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
 #
 CONFIG_UCLAMP_TASK=y
 CONFIG_UCLAMP_BUCKETS_COUNT=5
-# CONFIG_SCHED_ALT is not set
 # end of Scheduler features
 
 CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
 CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
 CONFIG_CC_HAS_INT128=y
 CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
-CONFIG_GCC11_NO_ARRAY_BOUNDS=y
+CONFIG_GCC10_NO_ARRAY_BOUNDS=y
+CONFIG_CC_NO_ARRAY_BOUNDS=y
 CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
 CONFIG_CC_NO_STRINGOP_OVERFLOW=y
-CONFIG_CC_NO_ARRAY_BOUNDS=y
 CONFIG_ARCH_SUPPORTS_INT128=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
@@ -224,6 +225,7 @@ CONFIG_CGROUP_SCHED=y
 CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
 # CONFIG_RT_GROUP_SCHED is not set
+# CONFIG_EXT_GROUP_SCHED is not set
 CONFIG_SCHED_MM_CID=y
 CONFIG_UCLAMP_TASK_GROUP=y
 CONFIG_CGROUP_PIDS=y
@@ -614,6 +616,7 @@ CONFIG_CPU_IBRS_ENTRY=y
 CONFIG_CPU_SRSO=y
 CONFIG_SLS=y
 # CONFIG_GDS_FORCE_MITIGATION is not set
+CONFIG_MITIGATION_RFDS=y
 CONFIG_ARCH_HAS_ADD_PAGES=y
 
 #
@@ -1141,7 +1144,7 @@ CONFIG_SWAP=y
 CONFIG_ZSWAP=y
 CONFIG_ZSWAP_DEFAULT_ON=y
 # CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON is not set
-# CONFIG_ZSWAP_SHRINKER_DEFAULT_ON is not set
+CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
@@ -1296,8 +1299,8 @@ CONFIG_TLS_DEVICE=y
 # CONFIG_TLS_TOE is not set
 CONFIG_XFRM=y
 CONFIG_XFRM_OFFLOAD=y
-CONFIG_XFRM_ALGO=y
-CONFIG_XFRM_USER=y
+CONFIG_XFRM_ALGO=m
+CONFIG_XFRM_USER=m
 # CONFIG_XFRM_USER_COMPAT is not set
 CONFIG_XFRM_INTERFACE=m
 CONFIG_XFRM_SUB_POLICY=y
@@ -2218,7 +2221,7 @@ CONFIG_LWTUNNEL_BPF=y
 CONFIG_DST_CACHE=y
 CONFIG_GRO_CELLS=y
 CONFIG_SOCK_VALIDATE_XMIT=y
-CONFIG_NET_SELFTESTS=y
+CONFIG_NET_SELFTESTS=m
 CONFIG_NET_SOCK_MSG=y
 CONFIG_NET_DEVLINK=y
 CONFIG_PAGE_POOL=y
@@ -2291,7 +2294,7 @@ CONFIG_PCI_HYPERV_INTERFACE=m
 #
 CONFIG_PCIE_DW=y
 CONFIG_PCIE_DW_HOST=y
-CONFIG_PCI_MESON=y
+CONFIG_PCI_MESON=m
 CONFIG_PCIE_DW_PLAT=y
 CONFIG_PCIE_DW_PLAT_HOST=y
 # end of DesignWare-based PCIe controllers
@@ -3624,10 +3627,10 @@ CONFIG_SKFP=m
 # CONFIG_HIPPI is not set
 CONFIG_NET_SB1000=m
 CONFIG_PHYLINK=m
-CONFIG_PHYLIB=y
+CONFIG_PHYLIB=m
 CONFIG_SWPHY=y
 CONFIG_LED_TRIGGER_PHY=y
-CONFIG_FIXED_PHY=y
+CONFIG_FIXED_PHY=m
 CONFIG_SFP=m
 
 #
@@ -3761,11 +3764,11 @@ CONFIG_MCTP_SERIAL=m
 CONFIG_MCTP_TRANSPORT_I2C=m
 # end of MCTP Device Drivers
 
-CONFIG_MDIO_DEVICE=y
-CONFIG_MDIO_BUS=y
-CONFIG_FWNODE_MDIO=y
-CONFIG_ACPI_MDIO=y
-CONFIG_MDIO_DEVRES=y
+CONFIG_MDIO_DEVICE=m
+CONFIG_MDIO_BUS=m
+CONFIG_FWNODE_MDIO=m
+CONFIG_ACPI_MDIO=m
+CONFIG_MDIO_DEVRES=m
 CONFIG_MDIO_BITBANG=m
 CONFIG_MDIO_BCM_UNIMAC=m
 CONFIG_MDIO_CAVIUM=m
@@ -4197,7 +4200,7 @@ CONFIG_IEEE802154_HWSIM=m
 #
 # Wireless WAN
 #
-CONFIG_WWAN=y
+CONFIG_WWAN=m
 CONFIG_WWAN_DEBUGFS=y
 CONFIG_WWAN_HWSIM=m
 CONFIG_MHI_WWAN_CTRL=m
@@ -4626,9 +4629,9 @@ CONFIG_SERIAL_8250_DWLIB=y
 CONFIG_SERIAL_8250_DFL=m
 CONFIG_SERIAL_8250_DW=m
 CONFIG_SERIAL_8250_RT288X=y
-CONFIG_SERIAL_8250_LPSS=y
-CONFIG_SERIAL_8250_MID=y
-CONFIG_SERIAL_8250_PERICOM=y
+CONFIG_SERIAL_8250_LPSS=m
+CONFIG_SERIAL_8250_MID=m
+CONFIG_SERIAL_8250_PERICOM=m
 
 #
 # Non-8250 serial port support
@@ -4905,7 +4908,7 @@ CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
 CONFIG_SPI_DYNAMIC=y
 # CONFIG_SPMI is not set
 # CONFIG_HSI is not set
-CONFIG_PPS=y
+CONFIG_PPS=m
 # CONFIG_PPS_DEBUG is not set
 
 #
@@ -4923,8 +4926,8 @@ CONFIG_PPS_CLIENT_GPIO=m
 #
 # PTP clock support
 #
-CONFIG_PTP_1588_CLOCK=y
-CONFIG_PTP_1588_CLOCK_OPTIONAL=y
+CONFIG_PTP_1588_CLOCK=m
+CONFIG_PTP_1588_CLOCK_OPTIONAL=m
 CONFIG_DP83640_PHY=m
 CONFIG_PTP_1588_CLOCK_INES=m
 CONFIG_PTP_1588_CLOCK_KVM=m
@@ -4961,25 +4964,25 @@ CONFIG_PINCTRL_CS47L92=y
 #
 CONFIG_PINCTRL_BAYTRAIL=y
 CONFIG_PINCTRL_CHERRYVIEW=y
-CONFIG_PINCTRL_LYNXPOINT=y
+CONFIG_PINCTRL_LYNXPOINT=m
 CONFIG_PINCTRL_INTEL=y
-CONFIG_PINCTRL_INTEL_PLATFORM=y
-CONFIG_PINCTRL_ALDERLAKE=y
-CONFIG_PINCTRL_BROXTON=y
-CONFIG_PINCTRL_CANNONLAKE=y
-CONFIG_PINCTRL_CEDARFORK=y
-CONFIG_PINCTRL_DENVERTON=y
-CONFIG_PINCTRL_ELKHARTLAKE=y
-CONFIG_PINCTRL_EMMITSBURG=y
-CONFIG_PINCTRL_GEMINILAKE=y
-CONFIG_PINCTRL_ICELAKE=y
-CONFIG_PINCTRL_JASPERLAKE=y
-CONFIG_PINCTRL_LAKEFIELD=y
-CONFIG_PINCTRL_LEWISBURG=y
-CONFIG_PINCTRL_METEORLAKE=y
-CONFIG_PINCTRL_METEORPOINT=y
-CONFIG_PINCTRL_SUNRISEPOINT=y
-CONFIG_PINCTRL_TIGERLAKE=y
+CONFIG_PINCTRL_INTEL_PLATFORM=m
+CONFIG_PINCTRL_ALDERLAKE=m
+CONFIG_PINCTRL_BROXTON=m
+CONFIG_PINCTRL_CANNONLAKE=m
+CONFIG_PINCTRL_CEDARFORK=m
+CONFIG_PINCTRL_DENVERTON=m
+CONFIG_PINCTRL_ELKHARTLAKE=m
+CONFIG_PINCTRL_EMMITSBURG=m
+CONFIG_PINCTRL_GEMINILAKE=m
+CONFIG_PINCTRL_ICELAKE=m
+CONFIG_PINCTRL_JASPERLAKE=m
+CONFIG_PINCTRL_LAKEFIELD=m
+CONFIG_PINCTRL_LEWISBURG=m
+CONFIG_PINCTRL_METEORLAKE=m
+CONFIG_PINCTRL_METEORPOINT=m
+CONFIG_PINCTRL_SUNRISEPOINT=m
+CONFIG_PINCTRL_TIGERLAKE=m
 # end of Intel pinctrl drivers
 
 #
@@ -5199,7 +5202,7 @@ CONFIG_CHARGER_TWL4030=m
 CONFIG_CHARGER_LP8727=m
 CONFIG_CHARGER_LP8788=m
 CONFIG_CHARGER_GPIO=m
-CONFIG_CHARGER_MANAGER=y
+CONFIG_CHARGER_MANAGER=m
 CONFIG_CHARGER_LT3651=m
 CONFIG_CHARGER_LTC4162L=m
 CONFIG_CHARGER_MAX14577=m
@@ -5754,7 +5757,7 @@ CONFIG_MFD_SYSCON=y
 CONFIG_MFD_LP3943=m
 CONFIG_MFD_LP8788=y
 CONFIG_MFD_TI_LMU=m
-CONFIG_MFD_PALMAS=y
+CONFIG_MFD_PALMAS=m
 CONFIG_TPS6105X=m
 CONFIG_TPS65010=m
 CONFIG_TPS6507X=m
@@ -7873,7 +7876,7 @@ CONFIG_HID=y
 CONFIG_HID_BATTERY_STRENGTH=y
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
-CONFIG_HID_GENERIC=y
+CONFIG_HID_GENERIC=m
 
 #
 # Special HID drivers
@@ -8638,6 +8641,7 @@ CONFIG_LEDS_TRIGGER_NETDEV=m
 CONFIG_LEDS_TRIGGER_PATTERN=m
 CONFIG_LEDS_TRIGGER_AUDIO=m
 CONFIG_LEDS_TRIGGER_TTY=m
+CONFIG_LEDS_TRIGGER_BLKDEV=m
 
 #
 # Simple LED drivers
@@ -8713,7 +8717,7 @@ CONFIG_EDAC_SUPPORT=y
 CONFIG_EDAC=y
 CONFIG_EDAC_LEGACY_SYSFS=y
 # CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_DECODE_MCE=m
+CONFIG_EDAC_DECODE_MCE=y
 CONFIG_EDAC_GHES=y
 CONFIG_EDAC_AMD64=m
 CONFIG_EDAC_E752X=m
@@ -8898,7 +8902,7 @@ CONFIG_DW_DMAC=m
 CONFIG_DW_DMAC_PCI=y
 CONFIG_DW_EDMA=m
 CONFIG_DW_EDMA_PCIE=m
-CONFIG_HSU_DMA=y
+CONFIG_HSU_DMA=m
 CONFIG_SF_PDMA=m
 CONFIG_INTEL_LDMA=y
 
@@ -8944,7 +8948,7 @@ CONFIG_VFIO_CONTAINER=y
 CONFIG_VFIO_IOMMU_TYPE1=m
 # CONFIG_VFIO_NOIOMMU is not set
 CONFIG_VFIO_VIRQFD=y
-# CONFIG_VFIO_DEBUGFS is not set
+CONFIG_VFIO_DEBUGFS=y
 
 #
 # VFIO support for PCI devices
@@ -9042,7 +9046,7 @@ CONFIG_SWIOTLB_XEN=y
 CONFIG_XEN_PCI_STUB=y
 CONFIG_XEN_PCIDEV_BACKEND=m
 CONFIG_XEN_PVCALLS_FRONTEND=m
-CONFIG_XEN_PVCALLS_BACKEND=y
+CONFIG_XEN_PVCALLS_BACKEND=m
 CONFIG_XEN_SCSI_BACKEND=m
 CONFIG_XEN_PRIVCMD=m
 CONFIG_XEN_PRIVCMD_EVENTFD=y
@@ -9931,7 +9935,7 @@ CONFIG_AM2315=m
 CONFIG_DHT11=m
 CONFIG_HDC100X=m
 CONFIG_HDC2010=m
-CONFIG_HDC3010=m
+CONFIG_HDC3020=m
 CONFIG_HID_SENSOR_HUMIDITY=m
 CONFIG_HTS221=m
 CONFIG_HTS221_I2C=m
@@ -10229,7 +10233,7 @@ CONFIG_PWM=y
 CONFIG_PWM_SYSFS=y
 # CONFIG_PWM_DEBUG is not set
 CONFIG_PWM_CLK=m
-CONFIG_PWM_CRC=y
+CONFIG_PWM_CRC=m
 CONFIG_PWM_CROS_EC=m
 CONFIG_PWM_DWC_CORE=m
 CONFIG_PWM_DWC=m
@@ -10310,7 +10314,7 @@ CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
 # CONFIG_ANDROID_BINDER_IPC_SELFTEST is not set
 # end of Android
 
-CONFIG_LIBNVDIMM=y
+CONFIG_LIBNVDIMM=m
 CONFIG_BLK_DEV_PMEM=m
 CONFIG_ND_CLAIM=y
 CONFIG_ND_BTT=m
@@ -10318,6 +10322,8 @@ CONFIG_BTT=y
 CONFIG_ND_PFN=m
 CONFIG_NVDIMM_PFN=y
 CONFIG_NVDIMM_DAX=y
+CONFIG_NVDIMM_KEYS=y
+# CONFIG_NVDIMM_SECURITY_TEST is not set
 CONFIG_DAX=y
 CONFIG_DEV_DAX=m
 CONFIG_DEV_DAX_PMEM=m
@@ -10944,7 +10950,7 @@ CONFIG_CRYPTO=y
 #
 CONFIG_CRYPTO_ALGAPI=y
 CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD=m
 CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_SIG2=y
 CONFIG_CRYPTO_SKCIPHER=y
@@ -11600,7 +11606,7 @@ CONFIG_STACKTRACE=y
 #
 # Debug kernel data structures
 #
-CONFIG_DEBUG_LIST=y
+# CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_PLIST is not set
 # CONFIG_DEBUG_SG is not set
 # CONFIG_DEBUG_NOTIFIERS is not set
@@ -11624,7 +11630,7 @@ CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0
 
 # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
 # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
-CONFIG_LATENCYTOP=y
+# CONFIG_LATENCYTOP is not set
 # CONFIG_DEBUG_CGROUP_REF is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
 CONFIG_NOP_TRACER=y
diff --git a/patches/cachyos/0001-bore-cachy.patch b/patches/cachyos/0001-bore-cachy.patch
index 5de21f9..8aeeab5 100644
--- a/patches/cachyos/0001-bore-cachy.patch
+++ b/patches/cachyos/0001-bore-cachy.patch
@@ -1,24 +1,24 @@
-From 97dcd5da7813021da6111c09488a1ebe75f1d935 Mon Sep 17 00:00:00 2001
+From 1ab81cfa061f454316364a32761ce45a7479e616 Mon Sep 17 00:00:00 2001
 From: Piotr Gorski <lucjan.lucjanov@gmail.com>
-Date: Mon, 26 Feb 2024 09:09:36 +0100
+Date: Thu, 7 Mar 2024 22:28:47 +0100
 Subject: [PATCH] bore-cachy
 
 Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
 ---
- include/linux/sched.h   |  11 ++
+ include/linux/sched.h   |  12 ++
  init/Kconfig            |  19 +++
- kernel/sched/core.c     | 146 +++++++++++++++++++++
- kernel/sched/debug.c    |  57 +++++++-
- kernel/sched/fair.c     | 281 ++++++++++++++++++++++++++++++++++++----
+ kernel/sched/core.c     | 148 +++++++++++++++++++
+ kernel/sched/debug.c    |  61 +++++++-
+ kernel/sched/fair.c     | 319 ++++++++++++++++++++++++++++++++++++----
  kernel/sched/features.h |   4 +
  kernel/sched/sched.h    |   7 +
- 7 files changed, 501 insertions(+), 24 deletions(-)
+ 7 files changed, 542 insertions(+), 28 deletions(-)
 
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index ffe8f618a..314c2c981 100644
+index ffe8f618a..7ac6163f9 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -547,6 +547,17 @@ struct sched_entity {
+@@ -547,6 +547,18 @@ struct sched_entity {
  	u64				sum_exec_runtime;
  	u64				prev_sum_exec_runtime;
  	u64				vruntime;
@@ -29,6 +29,7 @@ index ffe8f618a..314c2c981 100644
 +	u8				burst_penalty;
 +	u8				burst_score;
 +	u32				burst_load;
++	bool			on_cfs_rq;
 +	u8				child_burst;
 +	u32				child_burst_cnt;
 +	u64				child_burst_last_cached;
@@ -67,10 +68,10 @@ index 47671886d..c99132cf6 100644
  	bool "Automatic process group scheduling"
  	select CGROUPS
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 9116bcc90..64b663a7b 100644
+index 9116bcc90..43e4311db 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -4507,6 +4507,141 @@ int wake_up_state(struct task_struct *p, unsigned int state)
+@@ -4507,6 +4507,143 @@ int wake_up_state(struct task_struct *p, unsigned int state)
  	return try_to_wake_up(p, state, 0);
  }
  
@@ -85,6 +86,7 @@ index 9116bcc90..64b663a7b 100644
 +	init_task.se.curr_burst_penalty = 0;
 +	init_task.se.burst_penalty = 0;
 +	init_task.se.burst_score = 0;
++	init_task.se.on_cfs_rq = false;
 +	init_task.se.child_burst_last_cached = 0;
 +	init_task.se.burst_load = 0;
 +}
@@ -93,6 +95,7 @@ index 9116bcc90..64b663a7b 100644
 +	p->se.burst_time = 0;
 +	p->se.curr_burst_penalty = 0;
 +	p->se.burst_score = 0;
++	p->se.on_cfs_rq = false;
 +	p->se.child_burst_last_cached = 0;
 +	p->se.burst_load = 0;
 +}
@@ -212,7 +215,7 @@ index 9116bcc90..64b663a7b 100644
  /*
   * Perform scheduler related setup for a newly forked process p.
   * p is forked by current.
-@@ -4523,6 +4658,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+@@ -4523,6 +4660,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  	p->se.prev_sum_exec_runtime	= 0;
  	p->se.nr_migrations		= 0;
  	p->se.vruntime			= 0;
@@ -222,7 +225,7 @@ index 9116bcc90..64b663a7b 100644
  	p->se.vlag			= 0;
  	p->se.slice			= sysctl_sched_base_slice;
  	INIT_LIST_HEAD(&p->se.group_node);
-@@ -4839,6 +4977,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
+@@ -4839,6 +4979,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
  
  void sched_post_fork(struct task_struct *p)
  {
@@ -232,20 +235,20 @@ index 9116bcc90..64b663a7b 100644
  	uclamp_post_fork(p);
  }
  
-@@ -9910,6 +10051,11 @@ void __init sched_init(void)
+@@ -9910,6 +10053,11 @@ void __init sched_init(void)
  	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
  #endif
  
 +#ifdef CONFIG_SCHED_BORE
 +	sched_init_bore();
-+	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 4.2.4 by Masahito Suzuki");
++	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 4.5.2 by Masahito Suzuki");
 +#endif // CONFIG_SCHED_BORE
 +
  	wait_bit_init();
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index 8d5d98a58..3f37534f8 100644
+index 8d5d98a58..a565363fd 100644
 --- a/kernel/sched/debug.c
 +++ b/kernel/sched/debug.c
 @@ -167,7 +167,52 @@ static const struct file_operations sched_feat_fops = {
@@ -341,8 +344,19 @@ index 8d5d98a58..3f37534f8 100644
  #ifdef CONFIG_NUMA_BALANCING
  	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
  #endif
+@@ -1068,6 +1123,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ 
+ 	P(se.load.weight);
+ #ifdef CONFIG_SMP
++#ifdef CONFIG_SCHED_BORE
++	P(se.burst_load);
++	P(se.burst_score);
++#endif // CONFIG_SCHED_BORE
+ 	P(se.avg.load_sum);
+ 	P(se.avg.runnable_sum);
+ 	P(se.avg.util_sum);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index fc0a9de42..f85eab965 100644
+index fc0a9de42..3ee4e7e70 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -19,6 +19,9 @@
@@ -355,7 +369,7 @@ index fc0a9de42..f85eab965 100644
   */
  #include <linux/energy_model.h>
  #include <linux/mmap_lock.h>
-@@ -64,28 +67,126 @@
+@@ -64,28 +67,128 @@
   *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
   *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
   *
@@ -405,6 +419,8 @@ index fc0a9de42..f85eab965 100644
 +u8   __read_mostly sched_burst_penalty_offset   = 22;
 +uint __read_mostly sched_burst_penalty_scale    = 1280;
 +uint __read_mostly sched_burst_cache_lifetime   = 60000000;
++u8   __read_mostly sched_vlag_deviation_limit   = 11;
++static int __maybe_unused thirty_two     = 32;
 +static int __maybe_unused sixty_four     = 64;
 +static int __maybe_unused maxval_12_bits = 4095;
 +
@@ -450,7 +466,7 @@ index fc0a9de42..f85eab965 100644
 +	if (sched_burst_score_rounding) penalty += 0x2U;
 +	se->burst_score = penalty >> 2;
 +
-+	if ((se->burst_score != prev_score) && se->burst_load) {
++	if ((se->burst_score != prev_score) && se->on_cfs_rq) {
 +		avg_vruntime_sub(cfs_rq, se);
 +		avg_vruntime_add(cfs_rq, se);
 +	}
@@ -493,7 +509,7 @@ index fc0a9de42..f85eab965 100644
  
  int sched_thermal_decay_shift;
  static int __init setup_sched_thermal_decay_shift(char *str)
-@@ -136,12 +237,8 @@ int __weak arch_asym_cpu_priority(int cpu)
+@@ -136,12 +239,8 @@ int __weak arch_asym_cpu_priority(int cpu)
   *
   * (default: 5 msec, units: microseconds)
   */
@@ -506,7 +522,7 @@ index fc0a9de42..f85eab965 100644
  
  #ifdef CONFIG_NUMA_BALANCING
  /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
-@@ -150,6 +247,78 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
+@@ -150,6 +249,87 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
  
  #ifdef CONFIG_SYSCTL
  static struct ctl_table sched_fair_sysctls[] = {
@@ -581,11 +597,20 @@ index fc0a9de42..f85eab965 100644
 +		.mode		= 0644,
 +		.proc_handler = proc_douintvec,
 +	},
++	{
++		.procname	= "sched_vlag_deviation_limit",
++		.data		= &sched_vlag_deviation_limit,
++		.maxlen		= sizeof(u8),
++		.mode		= 0644,
++		.proc_handler = proc_dou8vec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &thirty_two,
++	},
 +#endif // CONFIG_SCHED_BORE
  #ifdef CONFIG_CFS_BANDWIDTH
  	{
  		.procname       = "sched_cfs_bandwidth_slice_us",
-@@ -208,6 +377,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
+@@ -208,6 +388,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
   *
   * This idea comes from the SD scheduler of Con Kolivas:
   */
@@ -599,7 +624,7 @@ index fc0a9de42..f85eab965 100644
  static unsigned int get_update_sysctl_factor(void)
  {
  	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
-@@ -238,6 +414,7 @@ static void update_sysctl(void)
+@@ -238,6 +425,7 @@ static void update_sysctl(void)
  	SET_SYSCTL(sched_base_slice);
  #undef SET_SYSCTL
  }
@@ -607,7 +632,7 @@ index fc0a9de42..f85eab965 100644
  
  void __init sched_init_granularity(void)
  {
-@@ -311,6 +488,9 @@ static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
+@@ -311,6 +499,9 @@ static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
  	if (unlikely(se->load.weight != NICE_0_LOAD))
  		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
  
@@ -617,7 +642,7 @@ index fc0a9de42..f85eab965 100644
  	return delta;
  }
  
-@@ -637,10 +817,26 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -637,10 +828,26 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
   *
   * As measured, the max (key * weight) value was ~44 bits for a kernel build.
   */
@@ -626,11 +651,11 @@ index fc0a9de42..f85eab965 100644
 +#else // CONFIG_SCHED_BORE
 +static unsigned long entity_weight(struct sched_entity *se) {
 +	unsigned long weight = se->load.weight;
-+	if (likely(weight && sched_bore)) weight = unscale_slice(weight, se);
++	if (likely(sched_bore)) weight = unscale_slice(weight, se);
 +#ifdef CONFIG_64BIT
-+	weight >>= SCHED_FIXEDPOINT_SHIFT - 5;
++	weight >>= SCHED_FIXEDPOINT_SHIFT - 3;
 +#endif // CONFIG_64BIT
-+	return max(1UL, weight);
++	return weight;
 +}
 +#endif // CONFIG_SCHED_BORE
 +
@@ -645,7 +670,7 @@ index fc0a9de42..f85eab965 100644
  	s64 key = entity_key(cfs_rq, se);
  
  	cfs_rq->avg_vruntime += key * weight;
-@@ -650,7 +846,12 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -650,7 +857,12 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
  static void
  avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
@@ -659,7 +684,15 @@ index fc0a9de42..f85eab965 100644
  	s64 key = entity_key(cfs_rq, se);
  
  	cfs_rq->avg_vruntime -= key * weight;
-@@ -677,7 +878,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
+@@ -670,14 +882,14 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+  * Specifically: avg_runtime() + 0 must result in entity_eligible() := true
+  * For this to be so, the result of this function must have a left bias.
+  */
+-u64 avg_vruntime(struct cfs_rq *cfs_rq)
++static u64 avg_key(struct cfs_rq *cfs_rq)
+ {
+ 	struct sched_entity *curr = cfs_rq->curr;
+ 	s64 avg = cfs_rq->avg_vruntime;
  	long load = cfs_rq->avg_load;
  
  	if (curr && curr->on_rq) {
@@ -668,7 +701,7 @@ index fc0a9de42..f85eab965 100644
  
  		avg += entity_key(cfs_rq, curr) * weight;
  		load += weight;
-@@ -687,7 +888,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
+@@ -687,12 +899,15 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
  		/* sign flips effective floor / ceil */
  		if (avg < 0)
  			avg -= (load - 1);
@@ -676,8 +709,17 @@ index fc0a9de42..f85eab965 100644
 +		avg = div64_s64(avg, load);
  	}
  
- 	return cfs_rq->min_vruntime + avg;
-@@ -717,6 +918,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+-	return cfs_rq->min_vruntime + avg;
++	return avg;
+ }
+ 
++u64 avg_vruntime(struct cfs_rq *cfs_rq) {
++	return cfs_rq->min_vruntime + avg_key(cfs_rq);
++}
+ /*
+  * lag_i = S - s_i = w_i * (V - v_i)
+  *
+@@ -717,6 +932,9 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	lag = avg_vruntime(cfs_rq) - se->vruntime;
  
  	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
@@ -687,7 +729,7 @@ index fc0a9de42..f85eab965 100644
  	se->vlag = clamp(lag, -limit, limit);
  }
  
-@@ -744,7 +948,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
+@@ -744,7 +962,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
  	long load = cfs_rq->avg_load;
  
  	if (curr && curr->on_rq) {
@@ -696,7 +738,24 @@ index fc0a9de42..f85eab965 100644
  
  		avg += entity_key(cfs_rq, curr) * weight;
  		load += weight;
-@@ -968,6 +1172,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+@@ -840,10 +1058,16 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ 	se->min_vruntime = se->vruntime;
+ 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+ 				__entity_less, &min_vruntime_cb);
++#ifdef CONFIG_SCHED_BORE
++	se->on_cfs_rq = true;
++#endif // CONFIG_SCHED_BORE
+ }
+ 
+ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++#ifdef CONFIG_SCHED_BORE
++	se->on_cfs_rq = false;
++#endif // CONFIG_SCHED_BORE
+ 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+ 				  &min_vruntime_cb);
+ 	avg_vruntime_sub(cfs_rq, se);
+@@ -968,6 +1192,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
   * Scheduling class statistics methods:
   */
  #ifdef CONFIG_SMP
@@ -704,7 +763,7 @@ index fc0a9de42..f85eab965 100644
  int sched_update_scaling(void)
  {
  	unsigned int factor = get_update_sysctl_factor();
-@@ -979,6 +1184,7 @@ int sched_update_scaling(void)
+@@ -979,6 +1204,7 @@ int sched_update_scaling(void)
  
  	return 0;
  }
@@ -712,7 +771,7 @@ index fc0a9de42..f85eab965 100644
  #endif
  #endif
  
-@@ -1178,7 +1384,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
+@@ -1178,7 +1404,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
  	if (unlikely(delta_exec <= 0))
  		return;
  
@@ -726,17 +785,28 @@ index fc0a9de42..f85eab965 100644
  	update_deadline(cfs_rq, curr);
  	update_min_vruntime(cfs_rq);
  
-@@ -3787,6 +3999,9 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
- 	 */
- 	vslice = (s64)(se->deadline - avruntime);
- 	vslice = div_s64(vslice * old_weight, weight);
-+#ifdef CONFIG_SCHED_BORE
-+	if (unlikely(!sched_bore) || (s64)(avruntime + vslice - se->deadline) < 0)
-+#endif // CONFIG_SCHED_BORE
- 	se->deadline = avruntime + vslice;
- }
+@@ -5170,8 +5402,8 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
+ static void
+ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
+-	u64 vslice, vruntime = avg_vruntime(cfs_rq);
+-	s64 lag = 0;
++	s64 lag = 0, key = avg_key(cfs_rq);
++	u64 vslice, vruntime = cfs_rq->min_vruntime + key;
  
-@@ -5244,12 +5459,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 	se->slice = sysctl_sched_base_slice;
+ 	vslice = calc_delta_fair(se->slice, se);
+@@ -5184,6 +5416,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 	 *
+ 	 * EEVDF: placement strategy #1 / #2
+ 	 */
++#ifdef CONFIG_SCHED_BORE
++	if (unlikely(!sched_bore) || se->vlag)
++#endif // CONFIG_SCHED_BORE
+ 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
+ 		struct sched_entity *curr = cfs_rq->curr;
+ 		unsigned long load;
+@@ -5244,12 +5479,22 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  		 */
  		load = cfs_rq->avg_load;
  		if (curr && curr->on_rq)
@@ -745,14 +815,24 @@ index fc0a9de42..f85eab965 100644
  
 -		lag *= load + scale_load_down(se->load.weight);
 +		lag *= load + entity_weight(se);
++#if !defined(CONFIG_SCHED_BORE)
  		if (WARN_ON_ONCE(!load))
++#else // CONFIG_SCHED_BORE
++		if (unlikely(!load))
++#endif // CONFIG_SCHED_BORE
  			load = 1;
 -		lag = div_s64(lag, load);
 +		lag = div64_s64(lag, load);
++#ifdef CONFIG_SCHED_BORE
++		if (likely(sched_bore)) {
++			s64 limit = vslice << sched_vlag_deviation_limit;
++			lag = clamp(lag, -limit, limit);
++		}
++#endif // CONFIG_SCHED_BORE
  	}
  
  	se->vruntime = vruntime - lag;
-@@ -6816,6 +7031,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -6816,6 +7061,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  	bool was_sched_idle = sched_idle_rq(rq);
  
  	util_est_dequeue(&rq->cfs, p);
@@ -767,7 +847,7 @@ index fc0a9de42..f85eab965 100644
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
-@@ -8565,16 +8788,25 @@ static void yield_task_fair(struct rq *rq)
+@@ -8565,16 +8818,25 @@ static void yield_task_fair(struct rq *rq)
  	/*
  	 * Are we the only task in the tree?
  	 */
@@ -793,7 +873,7 @@ index fc0a9de42..f85eab965 100644
  	/*
  	 * Tell update_rq_clock() that we've just updated,
  	 * so we don't do microscopic update in schedule()
-@@ -12664,6 +12896,9 @@ static void task_fork_fair(struct task_struct *p)
+@@ -12664,6 +12926,9 @@ static void task_fork_fair(struct task_struct *p)
  	curr = cfs_rq->curr;
  	if (curr)
  		update_curr(cfs_rq);
diff --git a/patches/cachyos/0001-cachyos-base-all.patch b/patches/cachyos/0001-cachyos-base-all.patch
index e18294a..148a8a7 100644
--- a/patches/cachyos/0001-cachyos-base-all.patch
+++ b/patches/cachyos/0001-cachyos-base-all.patch
@@ -1,6 +1,6 @@
-From 83b6cdeff5fe00d3225b6593453ed3782289b0fb Mon Sep 17 00:00:00 2001
+From 8f03bb4df21c5746b9f1c3e399faa3c932737e4f Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Mon, 26 Feb 2024 15:46:47 +0100
+Date: Fri, 15 Mar 2024 20:08:47 +0100
 Subject: [PATCH 1/7] amd-pstate
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -8,18 +8,20 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  .../admin-guide/kernel-parameters.txt         |   5 +
  Documentation/admin-guide/pm/amd-pstate.rst   |  70 ++-
  arch/x86/Kconfig                              |   5 +-
+ arch/x86/include/asm/msr-index.h              |   2 +
  arch/x86/kernel/acpi/cppc.c                   |   2 +-
  drivers/acpi/cppc_acpi.c                      |  17 +-
  drivers/acpi/processor_driver.c               |   6 +
+ drivers/cpufreq/acpi-cpufreq.c                |   2 -
  drivers/cpufreq/amd-pstate-ut.c               |   2 +-
- drivers/cpufreq/amd-pstate.c                  | 440 +++++++++++++++---
+ drivers/cpufreq/amd-pstate.c                  | 501 +++++++++++++++---
  include/acpi/cppc_acpi.h                      |   5 +
- include/linux/amd-pstate.h                    |  31 +-
+ include/linux/amd-pstate.h                    |  32 +-
  include/linux/cpufreq.h                       |   1 +
- 11 files changed, 523 insertions(+), 61 deletions(-)
+ 13 files changed, 562 insertions(+), 88 deletions(-)
 
 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 31b3a25680d0..522530432548 100644
+index 73062d47a462..a493d93e0d2c 100644
 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
 @@ -374,6 +374,11 @@
@@ -130,7 +132,7 @@ index 9eb26014d34b..82fbd01da658 100644
  ===============================================
  
 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 5edec175b9bf..29d110285438 100644
+index 637e337c332e..de39c296ea3f 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
 @@ -1054,8 +1054,9 @@ config SCHED_MC
@@ -145,6 +147,19 @@ index 5edec175b9bf..29d110285438 100644
  	select CPU_FREQ
  	default y
  	help
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index d1b5edaf6c34..bfe139eb75b6 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -744,6 +744,8 @@
+ #define MSR_K7_HWCR_IRPERF_EN		BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+ #define MSR_K7_FID_VID_CTL		0xc0010041
+ #define MSR_K7_FID_VID_STATUS		0xc0010042
++#define MSR_K7_HWCR_CPB_DIS_BIT		25
++#define MSR_K7_HWCR_CPB_DIS		BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
+ 
+ /* K6 MSRs */
+ #define MSR_K6_WHCR			0xc0000082
 diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
 index 8d8752b44f11..ff8f25faca3d 100644
 --- a/arch/x86/kernel/acpi/cppc.c
@@ -218,8 +233,21 @@ index 4bd16b3f0781..67db60eda370 100644
  	default:
  		acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
  		break;
+diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
+index 37f1cdf46d29..2fc82831bddd 100644
+--- a/drivers/cpufreq/acpi-cpufreq.c
++++ b/drivers/cpufreq/acpi-cpufreq.c
+@@ -50,8 +50,6 @@ enum {
+ #define AMD_MSR_RANGE		(0x7)
+ #define HYGON_MSR_RANGE		(0x7)
+ 
+-#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
+-
+ struct acpi_cpufreq_data {
+ 	unsigned int resume;
+ 	unsigned int cpu_feature;
 diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
-index f04ae67dda37..c5e2ca02f5ea 100644
+index f04ae67dda37..b3601b0e6dd3 100644
 --- a/drivers/cpufreq/amd-pstate-ut.c
 +++ b/drivers/cpufreq/amd-pstate-ut.c
 @@ -226,7 +226,7 @@ static void amd_pstate_ut_check_freq(u32 index)
@@ -227,12 +255,12 @@ index f04ae67dda37..c5e2ca02f5ea 100644
  		}
  
 -		if (cpudata->boost_supported) {
-+		if (amd_pstate_global_params.cpb_supported) {
++		if (amd_pstate_global_params.cpb_boost) {
  			if ((policy->max == cpudata->max_freq) ||
  					(policy->max == cpudata->nominal_freq))
  				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
 diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
-index 1791d37fbc53..91572dbe0cd1 100644
+index 1791d37fbc53..651055df1710 100644
 --- a/drivers/cpufreq/amd-pstate.c
 +++ b/drivers/cpufreq/amd-pstate.c
 @@ -37,6 +37,7 @@
@@ -296,63 +324,94 @@ index 1791d37fbc53..91572dbe0cd1 100644
  static inline int get_mode_idx_from_str(const char *str, size_t size)
  {
  	int i;
-@@ -296,14 +336,12 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
+@@ -291,16 +331,20 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	u64 cap1;
+ 	u32 highest_perf;
++	struct cppc_perf_caps cppc_perf;
++	int ret;
+ 
+-	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
++	ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
  				     &cap1);
  	if (ret)
  		return ret;
--
+ 
 -	/*
 -	 * TODO: Introduce AMD specific power feature.
 -	 *
 -	 * CPPC entry doesn't indicate the highest performance in some ASICs.
-+ 
-+	/* Some CPUs have different highest_perf from others, it is safer 
++	ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
++	if (ret)
++		return ret;
++
++	/* Some CPUs have different highest_perf from others, it is safer
 +	 * to read it than to assume some erroneous value, leading to performance issues.
  	 */
  	highest_perf = amd_get_highest_perf();
--	if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
-+	if(highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
- 		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
- 
- 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
-@@ -311,6 +349,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
+ 	if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
+@@ -311,7 +355,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
  	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
  	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
  	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
 +	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
  	WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
++	WRITE_ONCE(cpudata->lowest_freq, cppc_perf.lowest_freq);
++	WRITE_ONCE(cpudata->nominal_freq, cppc_perf.nominal_freq);
++
  	return 0;
  }
-@@ -324,8 +363,11 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
+ 
+@@ -319,11 +367,15 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
+ {
+ 	struct cppc_perf_caps cppc_perf;
+ 	u32 highest_perf;
++	int ret;
+ 
+-	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
++	ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
  	if (ret)
  		return ret;
  
-+	/* Some CPUs have different highest_perf from others, it is safer 
++	/* Some CPUs have different highest_perf from others, it is safer
 +	 * to read it than to assume some erroneous value, leading to performance issues.
 +	 */
  	highest_perf = amd_get_highest_perf();
--	if (highest_perf > cppc_perf.highest_perf)
-+	if(highest_perf > cppc_perf.highest_perf)
+ 	if (highest_perf > cppc_perf.highest_perf)
  		highest_perf = cppc_perf.highest_perf;
- 
- 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
-@@ -334,6 +376,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
+@@ -334,7 +386,10 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
  	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
  		   cppc_perf.lowest_nonlinear_perf);
  	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
 +	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
  	WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
++	WRITE_ONCE(cpudata->lowest_freq, cppc_perf.lowest_freq);
++	WRITE_ONCE(cpudata->nominal_freq, cppc_perf.nominal_freq);
  
  	if (cppc_state == AMD_PSTATE_ACTIVE)
-@@ -431,6 +474,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
+ 		return 0;
+@@ -430,7 +485,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
+ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
  			      u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
  {
++	unsigned long max_freq;
++	struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
  	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
 +	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
  	u64 value = prev;
  
  	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
-@@ -450,6 +494,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
+@@ -439,6 +497,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
+ 			cpudata->max_limit_perf);
+ 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+ 
++	max_freq = READ_ONCE(cpudata->max_limit_freq);
++	policy->cur = div_u64(des_perf * max_freq, max_perf);
++
+ 	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
+ 		min_perf = des_perf;
+ 		des_perf = 0;
+@@ -450,6 +511,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
  	value &= ~AMD_CPPC_DES_PERF(~0L);
  	value |= AMD_CPPC_DES_PERF(des_perf);
  
@@ -363,7 +422,46 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	value &= ~AMD_CPPC_MAX_PERF(~0L);
  	value |= AMD_CPPC_MAX_PERF(max_perf);
  
-@@ -570,7 +618,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+@@ -477,12 +542,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
+ 
+ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+ {
+-	u32 max_limit_perf, min_limit_perf;
++	u32 max_limit_perf, min_limit_perf, lowest_perf;
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+ 
+ 	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+ 	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ 
++	lowest_perf = READ_ONCE(cpudata->lowest_perf);
++	if (min_limit_perf < lowest_perf)
++		min_limit_perf = lowest_perf;
++
++	if (max_limit_perf < min_limit_perf)
++		max_limit_perf = min_limit_perf;
++
+ 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+ 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
+ 	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
+@@ -553,10 +625,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ 				   unsigned long capacity)
+ {
+ 	unsigned long max_perf, min_perf, des_perf,
+-		      cap_perf, lowest_nonlinear_perf, max_freq;
++		      cap_perf, lowest_nonlinear_perf;
+ 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ 	struct amd_cpudata *cpudata = policy->driver_data;
+-	unsigned int target_freq;
+ 
+ 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+ 		amd_pstate_update_min_max_limit(policy);
+@@ -564,13 +635,12 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ 
+ 	cap_perf = READ_ONCE(cpudata->highest_perf);
+ 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+-	max_freq = READ_ONCE(cpudata->max_freq);
+ 
+ 	des_perf = cap_perf;
  	if (target_perf < capacity)
  		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
  
@@ -372,31 +470,45 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	if (_min_perf < capacity)
  		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
  
-@@ -593,13 +641,19 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+@@ -582,8 +652,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ 		max_perf = min_perf;
+ 
+ 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+-	target_freq = div_u64(des_perf * max_freq, max_perf);
+-	policy->cur = target_freq;
+ 
+ 	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
+ 			policy->governor->flags);
+@@ -592,30 +660,30 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+ 
  static int amd_get_min_freq(struct amd_cpudata *cpudata)
  {
- 	struct cppc_perf_caps cppc_perf;
+-	struct cppc_perf_caps cppc_perf;
 +	u32 lowest_freq;
  
- 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- 	if (ret)
- 		return ret;
- 
+-	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+-	if (ret)
+-		return ret;
 +	if (quirks && quirks->lowest_freq)
 +		lowest_freq = quirks->lowest_freq;
 +	else
-+		lowest_freq = cppc_perf.lowest_freq;
-+
++		lowest_freq = READ_ONCE(cpudata->lowest_freq);
+ 
  	/* Switch to khz */
 -	return cppc_perf.lowest_freq * 1000;
 +	return lowest_freq * 1000;
  }
  
  static int amd_get_max_freq(struct amd_cpudata *cpudata)
-@@ -612,10 +666,14 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata)
- 	if (ret)
- 		return ret;
+ {
+-	struct cppc_perf_caps cppc_perf;
+ 	u32 max_perf, max_freq, nominal_freq, nominal_perf;
+ 	u64 boost_ratio;
  
+-	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+-	if (ret)
+-		return ret;
+-
 -	nominal_freq = cppc_perf.nominal_freq;
 +	nominal_freq = READ_ONCE(cpudata->nominal_freq);
  	nominal_perf = READ_ONCE(cpudata->nominal_perf);
@@ -409,37 +521,47 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
  			      nominal_perf);
  
-@@ -628,13 +686,18 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata)
+@@ -627,31 +695,25 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata)
+ 
  static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
  {
- 	struct cppc_perf_caps cppc_perf;
+-	struct cppc_perf_caps cppc_perf;
 +	u32 nominal_freq;
  
- 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- 	if (ret)
- 		return ret;
- 
--	/* Switch to khz */
--	return cppc_perf.nominal_freq * 1000;
+-	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+-	if (ret)
+-		return ret;
 +	if (quirks && quirks->nominal_freq)
 +		nominal_freq = quirks->nominal_freq;
 +	else
-+		nominal_freq = cppc_perf.nominal_freq;
-+
++		nominal_freq = READ_ONCE(cpudata->nominal_freq);
+ 
+-	/* Switch to khz */
+-	return cppc_perf.nominal_freq * 1000;
 +	return nominal_freq;
  }
  
  static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
-@@ -648,7 +711,7 @@ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
- 	if (ret)
- 		return ret;
+ {
+-	struct cppc_perf_caps cppc_perf;
+ 	u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
+ 	    nominal_freq, nominal_perf;
+ 	u64 lowest_nonlinear_ratio;
  
+-	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+-	if (ret)
+-		return ret;
+-
 -	nominal_freq = cppc_perf.nominal_freq;
 +	nominal_freq = READ_ONCE(cpudata->nominal_freq);
  	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+-
+-	lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
++	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
  
- 	lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
-@@ -662,48 +725,164 @@ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
+ 	lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
+ 					 nominal_perf);
+@@ -662,48 +724,164 @@ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
  	return lowest_nonlinear_freq * 1000;
  }
  
@@ -463,7 +585,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
 -		policy->cpuinfo.max_freq = cpudata->max_freq;
 -	else
 -		policy->cpuinfo.max_freq = cpudata->nominal_freq;
-+	amd_pstate_global_params.cpb_supported = !((boost_val >> 25) & 0x1);
++	amd_pstate_global_params.cpb_supported = !(boost_val & MSR_K7_HWCR_CPB_DIS);
 +	amd_pstate_global_params.cpb_boost = amd_pstate_global_params.cpb_supported;
  
 -	policy->max = policy->cpuinfo.max_freq;
@@ -507,8 +629,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
 -	nominal_perf = READ_ONCE(cpudata->nominal_perf);
 +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
 +		u64 cap1;
- 
--	if (highest_perf <= nominal_perf)
++
 +		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
 +		if (ret)
 +			return ret;
@@ -531,7 +652,8 @@ index 1791d37fbc53..91572dbe0cd1 100644
 +{
 +	int ret, prio;
 +	u32 highest_perf;
-+
+ 
+-	if (highest_perf <= nominal_perf)
 +	ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
 +	if (ret)
 +		return;
@@ -628,7 +750,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  }
  
  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
-@@ -727,24 +906,30 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+@@ -727,24 +905,30 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
  
  	cpudata->cpu = policy->cpu;
  
@@ -665,7 +787,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  
  	policy->min = min_freq;
  	policy->max = max_freq;
-@@ -777,12 +962,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+@@ -777,12 +961,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
  	cpudata->min_freq = min_freq;
  	cpudata->max_limit_freq = max_freq;
  	cpudata->min_limit_freq = min_freq;
@@ -678,7 +800,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	if (!current_pstate_driver->adjust_perf)
  		current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
  
-@@ -877,6 +1060,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
+@@ -877,6 +1059,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
  	return sysfs_emit(buf, "%u\n", perf);
  }
  
@@ -707,7 +829,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  static ssize_t show_energy_performance_available_preferences(
  				struct cpufreq_policy *policy, char *buf)
  {
-@@ -1074,18 +1279,125 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
+@@ -1074,18 +1278,125 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
  	return ret < 0 ? ret : count;
  }
  
@@ -751,7 +873,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
 +	if (on)
 +		policy->cpuinfo.max_freq = cpudata->max_freq;
 +	else
-+		policy->cpuinfo.max_freq = cpudata->nominal_freq;
++		policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
 +
 +	policy->max = policy->cpuinfo.max_freq;
 +
@@ -790,7 +912,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
 +
 +	amd_pstate_global_params.cpb_boost = !!new_state;
 +
-+	for_each_possible_cpu(cpu) {
++	for_each_online_cpu(cpu) {
 +
 +		struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 +		struct amd_cpudata *cpudata = policy->driver_data;
@@ -833,7 +955,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	NULL,
  };
  
-@@ -1093,6 +1405,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
+@@ -1093,6 +1404,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
  	&amd_pstate_max_freq,
  	&amd_pstate_lowest_nonlinear_freq,
  	&amd_pstate_highest_perf,
@@ -842,7 +964,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	&energy_performance_preference,
  	&energy_performance_available_preferences,
  	NULL,
-@@ -1100,6 +1414,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
+@@ -1100,6 +1413,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
  
  static struct attribute *pstate_global_attributes[] = {
  	&dev_attr_status.attr,
@@ -851,7 +973,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	NULL
  };
  
-@@ -1151,17 +1467,23 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+@@ -1151,17 +1466,23 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
  	cpudata->cpu = policy->cpu;
  	cpudata->epp_policy = 0;
  
@@ -879,7 +1001,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  		ret = -EINVAL;
  		goto free_cpudata1;
  	}
-@@ -1174,7 +1496,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+@@ -1174,7 +1495,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
  	/* Initial processor data capability frequencies */
  	cpudata->max_freq = max_freq;
  	cpudata->min_freq = min_freq;
@@ -887,7 +1009,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
  
  	policy->driver_data = cpudata;
-@@ -1205,7 +1526,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+@@ -1205,7 +1525,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
  			return ret;
  		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
  	}
@@ -895,7 +1017,33 @@ index 1791d37fbc53..91572dbe0cd1 100644
  
  	return 0;
  
-@@ -1431,7 +1751,7 @@ static struct cpufreq_driver amd_pstate_driver = {
+@@ -1232,6 +1551,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+ 	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+ 	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ 
++	if (min_limit_perf < min_perf)
++		min_limit_perf = min_perf;
++
++	if (max_limit_perf < min_limit_perf)
++		max_limit_perf = min_limit_perf;
++
+ 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+ 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
+ 
+@@ -1294,6 +1619,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+ 
+ 	amd_pstate_epp_update_limit(policy);
+ 
++	/*
++	 * policy->cur is never updated with the amd_pstate_epp driver, but it
++	 * is used as a stale frequency value. So, keep it within limits.
++	 */
++	policy->cur = policy->min;
++
+ 	return 0;
+ }
+ 
+@@ -1431,7 +1762,7 @@ static struct cpufreq_driver amd_pstate_driver = {
  	.exit		= amd_pstate_cpu_exit,
  	.suspend	= amd_pstate_cpu_suspend,
  	.resume		= amd_pstate_cpu_resume,
@@ -904,7 +1052,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	.name		= "amd-pstate",
  	.attr		= amd_pstate_attr,
  };
-@@ -1446,6 +1766,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
+@@ -1446,6 +1777,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
  	.online		= amd_pstate_epp_cpu_online,
  	.suspend	= amd_pstate_epp_suspend,
  	.resume		= amd_pstate_epp_resume,
@@ -912,7 +1060,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	.name		= "amd-pstate-epp",
  	.attr		= amd_pstate_epp_attr,
  };
-@@ -1486,6 +1807,11 @@ static int __init amd_pstate_init(void)
+@@ -1486,6 +1818,11 @@ static int __init amd_pstate_init(void)
  	if (cpufreq_get_current_driver())
  		return -EEXIST;
  
@@ -924,7 +1072,7 @@ index 1791d37fbc53..91572dbe0cd1 100644
  	switch (cppc_state) {
  	case AMD_PSTATE_UNDEFINED:
  		/* Disable on the following configs by default:
-@@ -1567,7 +1893,17 @@ static int __init amd_pstate_param(char *str)
+@@ -1567,7 +1904,17 @@ static int __init amd_pstate_param(char *str)
  
  	return amd_pstate_set_driver(mode_idx);
  }
@@ -966,7 +1114,7 @@ index 3a0995f8bce8..930b6afba6f4 100644
  {
  	return -ENOTSUPP;
 diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
-index 6ad02ad9c7b4..f6e2c9825700 100644
+index 6ad02ad9c7b4..e89cf1249715 100644
 --- a/include/linux/amd-pstate.h
 +++ b/include/linux/amd-pstate.h
 @@ -39,11 +39,16 @@ struct amd_aperf_mperf {
@@ -1005,7 +1153,13 @@ index 6ad02ad9c7b4..f6e2c9825700 100644
  	u32     min_limit_perf;
  	u32     max_limit_perf;
  	u32     min_limit_freq;
-@@ -84,7 +92,7 @@ struct amd_cpudata {
+@@ -79,12 +87,13 @@ struct amd_cpudata {
+ 	u32	min_freq;
+ 	u32	nominal_freq;
+ 	u32	lowest_nonlinear_freq;
++	u32	lowest_freq;
+ 
+ 	struct amd_aperf_mperf cur;
  	struct amd_aperf_mperf prev;
  
  	u64	freq;
@@ -1014,7 +1168,7 @@ index 6ad02ad9c7b4..f6e2c9825700 100644
  
  	/* EPP feature related attributes*/
  	s16	epp_policy;
-@@ -114,4 +122,23 @@ static const char * const amd_pstate_mode_string[] = {
+@@ -114,4 +123,23 @@ static const char * const amd_pstate_mode_string[] = {
  	[AMD_PSTATE_GUIDED]      = "guided",
  	NULL,
  };
@@ -1053,7 +1207,7 @@ index afda5f24d3dd..9bebeec24abb 100644
 -- 
 2.44.0
 
-From 73dd80071220cff0908e4e8561b5c0a815e8520d Mon Sep 17 00:00:00 2001
+From 93aefd5f98b793e9447e64dcbaa69221102e304a Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Mon, 26 Feb 2024 15:46:58 +0100
 Subject: [PATCH 2/7] bbr3
@@ -4439,7 +4593,7 @@ index d1ad20ce1c8c..ef74f33c7905 100644
 -- 
 2.44.0
 
-From 173737dc7aacb08dc475afa58212800f7a34b240 Mon Sep 17 00:00:00 2001
+From fb681aa9768aa30b3b17152a221868238394dd64 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Mon, 26 Feb 2024 15:47:11 +0100
 Subject: [PATCH 3/7] block
@@ -4928,7 +5082,7 @@ index f958e79277b8..1b0de4fc3958 100644
 -- 
 2.44.0
 
-From 4b43d78e522b63355e09a4fb91365a1e11891a01 Mon Sep 17 00:00:00 2001
+From 4f371ea8a1f8a47e624592a91f9e961080aec2eb Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Mon, 26 Feb 2024 15:47:21 +0100
 Subject: [PATCH 4/7] cachy
@@ -4984,10 +5138,10 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  create mode 100644 drivers/platform/x86/steamdeck.c
 
 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 522530432548..65b1952a783b 100644
+index a493d93e0d2c..8d6a2ce37f8f 100644
 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4375,6 +4375,15 @@
+@@ -4396,6 +4396,15 @@
  		nomsi		[MSI] If the PCI_MSI kernel config parameter is
  				enabled, this kernel boot option can be used to
  				disable the use of MSI interrupts system-wide.
@@ -5004,7 +5158,7 @@ index 522530432548..65b1952a783b 100644
  				Safety option to keep boot IRQs enabled. This
  				should never be necessary.
 diff --git a/Makefile b/Makefile
-index 6cdb5717bfe0..dc9adf866df1 100644
+index 95b320ada47c..0b7d42037c3e 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -808,9 +808,164 @@ endif # need-config
@@ -8151,7 +8305,7 @@ index 6030a8235617..60b7fe5fa74a 100644
  {
  	return &init_user_ns;
 diff --git a/init/Kconfig b/init/Kconfig
-index 8426d59cc634..47671886d579 100644
+index bee58f7468c3..9ea39297f149 100644
 --- a/init/Kconfig
 +++ b/init/Kconfig
 @@ -132,6 +132,10 @@ config THREAD_INFO_IN_TASK
@@ -8394,7 +8548,7 @@ index ffc3a2ba3a8c..0e440573033c 100644
  
  #
 diff --git a/mm/compaction.c b/mm/compaction.c
-index 4add68d40e8d..b692129f63f4 100644
+index b961db601df4..91d627e8a93d 100644
 --- a/mm/compaction.c
 +++ b/mm/compaction.c
 @@ -1830,7 +1830,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
@@ -8454,7 +8608,7 @@ index 3f255534986a..01b3e5cb8da1 100644
  EXPORT_SYMBOL_GPL(dirty_writeback_interval);
  
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 150d4f23b010..d5ec35e0b3a2 100644
+index a663202045dc..7c48b114331b 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -287,7 +287,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
@@ -8557,7 +8711,7 @@ index bd5183dfd879..3a410f53a07c 100644
  
  /*
 diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 4f9c854ce6cc..fd1d9b4194e3 100644
+index 4255619a1a31..5a3fbaf34158 100644
 --- a/mm/vmscan.c
 +++ b/mm/vmscan.c
 @@ -185,7 +185,11 @@ struct scan_control {
@@ -8587,7 +8741,7 @@ index 4f9c854ce6cc..fd1d9b4194e3 100644
 -- 
 2.44.0
 
-From 9b3faef691a9c9a202c27e5285fcd67d8a95564c Mon Sep 17 00:00:00 2001
+From 516559b0e31629dafbe60212d041e63af1b12c1c Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Mon, 26 Feb 2024 15:47:43 +0100
 Subject: [PATCH 5/7] fixes
@@ -8622,7 +8776,7 @@ index a5af0edd3eb8..0731bc203aa9 100644
 -- 
 2.44.0
 
-From 6adc19960a6a214361b1099a732af82e9edb6b62 Mon Sep 17 00:00:00 2001
+From e01d8909a6a6d90eb2ff29871d79f4e9359638ca Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Mon, 26 Feb 2024 15:48:00 +0100
 Subject: [PATCH 6/7] ksm
@@ -9062,7 +9216,7 @@ index faad00cce269..c7c9eb656468 100644
 -- 
 2.44.0
 
-From b66054cf4e9ef095844e6d3a673214a8088f500c Mon Sep 17 00:00:00 2001
+From 0634ad09765970da5be85d61cb4b8b4b38adb3c0 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Thu, 1 Feb 2024 16:54:48 +0100
 Subject: [PATCH 7/7] zstd
diff --git a/patches/cachyos/0003-nvidia.patch b/patches/cachyos/0003-nvidia.patch
new file mode 100644
index 0000000..ce7fb7f
--- /dev/null
+++ b/patches/cachyos/0003-nvidia.patch
@@ -0,0 +1,230 @@
+From d2db737a5be989688a7a5d805b7f299d0203d228 Mon Sep 17 00:00:00 2001
+From: Peter Jung <admin@ptr1337.dev>
+Date: Mon, 29 Jan 2024 15:09:44 +0100
+Subject: [PATCH] NVIDIA: Fixup GPL issue
+
+Signed-off-by: Peter Jung <admin@ptr1337.dev>
+---
+ kernel/rcu/tree_plugin.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index 41021080ad25..72474d8ec180 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -406,7 +406,7 @@ void __rcu_read_lock(void)
+ 		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
+ 	barrier();  /* critical section after entry code. */
+ }
+-EXPORT_SYMBOL_GPL(__rcu_read_lock);
++EXPORT_SYMBOL(__rcu_read_lock);
+ 
+ /*
+  * Preemptible RCU implementation for rcu_read_unlock().
+@@ -431,7 +431,7 @@ void __rcu_read_unlock(void)
+ 		WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
+ 	}
+ }
+-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
++EXPORT_SYMBOL(__rcu_read_unlock);
+ 
+ /*
+  * Advance a ->blkd_tasks-list pointer to the next entry, instead
+-- 
+2.43.0
+
+--- a/kernel/nvidia-drm/nvidia-drm-drv.c
++++ b/kernel/nvidia-drm/nvidia-drm-drv.c
+@@ -480,6 +480,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
+         return -ENODEV;
+     }
+ 
++#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
++    /*
++     * If fbdev is enabled, take modeset ownership now before other DRM clients
++     * can take master (and thus NVKMS ownership).
++     */
++    if (nv_drm_fbdev_module_param) {
++        if (!nvKms->grabOwnership(pDevice)) {
++            nvKms->freeDevice(pDevice);
++            NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
++            return -EBUSY;
++        }
++
++        nv_dev->hasFramebufferConsole = NV_TRUE;
++    }
++#endif
++
+     mutex_lock(&nv_dev->lock);
+ 
+     /* Set NvKmsKapiDevice */
+@@ -590,6 +606,15 @@ static void __nv_drm_unload(struct drm_device *dev)
+         return;
+     }
+ 
++    /* Release modeset ownership if fbdev is enabled */
++
++#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
++    if (nv_dev->hasFramebufferConsole) {
++        drm_atomic_helper_shutdown(dev);
++        nvKms->releaseOwnership(nv_dev->pDevice);
++    }
++#endif
++
+     cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
+     mutex_lock(&nv_dev->lock);
+ 
+@@ -1768,14 +1793,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
+     }
+ 
+ #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+-    if (nv_drm_fbdev_module_param &&
+-        drm_core_check_feature(dev, DRIVER_MODESET)) {
+-
+-        if (!nvKms->grabOwnership(nv_dev->pDevice)) {
+-            NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
+-            goto failed_grab_ownership;
+-        }
+-
++    if (nv_dev->hasFramebufferConsole) {
+         if (bus_is_pci) {
+             struct pci_dev *pdev = to_pci_dev(device);
+ 
+@@ -1786,8 +1804,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
+ #endif
+         }
+         drm_fbdev_generic_setup(dev, 32);
+-
+-        nv_dev->hasFramebufferConsole = NV_TRUE;
+     }
+ #endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
+ 
+@@ -1798,12 +1814,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
+ 
+     return; /* Success */
+ 
+-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+-failed_grab_ownership:
+-
+-    drm_dev_unregister(dev);
+-#endif
+-
+ failed_drm_register:
+ 
+     nv_drm_dev_free(dev);
+@@ -1870,12 +1880,6 @@ void nv_drm_remove_devices(void)
+         struct nv_drm_device *next = dev_list->next;
+         struct drm_device *dev = dev_list->dev;
+ 
+-#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+-        if (dev_list->hasFramebufferConsole) {
+-            drm_atomic_helper_shutdown(dev);
+-            nvKms->releaseOwnership(dev_list->pDevice);
+-        }
+-#endif
+         drm_dev_unregister(dev);
+         nv_drm_dev_free(dev);
+
+From d82eb6c87ee2e05b6bbd35f703a41e68b3adc3a7 Mon Sep 17 00:00:00 2001
+From: Aaron Plattner <aplattner@nvidia.com>
+Date: Tue, 26 Dec 2023 11:58:46 -0800
+Subject: [PATCH] nvidia-drm: Use a workqueue to defer calling
+ drm_kms_helper_hotplug_event
+
+---
+ kernel/nvidia-drm/nvidia-drm-drv.c     | 24 ++++++++++++++++++++++++
+ kernel/nvidia-drm/nvidia-drm-encoder.c |  4 ++--
+ kernel/nvidia-drm/nvidia-drm-priv.h    |  1 +
+ 3 files changed, 27 insertions(+), 2 deletions(-)
+
+diff --git kernel/nvidia-drm/nvidia-drm-drv.c kernel/nvidia-drm/nvidia-drm-drv.c
+index e0ddb6c..9f7424d 100644
+--- kernel/nvidia-drm/nvidia-drm-drv.c
++++ kernel/nvidia-drm/nvidia-drm-drv.c
+@@ -74,6 +74,7 @@
+ #endif
+ 
+ #include <linux/pci.h>
++#include <linux/workqueue.h>
+ 
+ /*
+  * Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
+@@ -405,6 +406,27 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
+     return 0;
+ }
+ 
++#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
++/*
++ * We can't just call drm_kms_helper_hotplug_event directly because
++ * fbdev_generic may attempt to set a mode from inside the hotplug event
++ * handler. Because kapi event handling runs on nvkms_kthread_q, this blocks
++ * other event processing including the flip completion notifier expected by
++ * nv_drm_atomic_commit.
++ *
++ * Defer hotplug event handling to a work item so that nvkms_kthread_q can
++ * continue processing events while a DRM modeset is in progress.
++ */
++static void nv_drm_handle_hotplug_event(struct work_struct *work)
++{
++    struct delayed_work *dwork = to_delayed_work(work);
++    struct nv_drm_device *nv_dev =
++        container_of(dwork, struct nv_drm_device, hotplug_event_work);
++
++    drm_kms_helper_hotplug_event(nv_dev->dev);
++}
++#endif
++
+ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
+ {
+ #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
+@@ -540,6 +562,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
+ 
+     /* Enable event handling */
+ 
++    INIT_DELAYED_WORK(&nv_dev->hotplug_event_work, nv_drm_handle_hotplug_event);
+     atomic_set(&nv_dev->enable_event_handling, true);
+ 
+     init_waitqueue_head(&nv_dev->flip_event_wq);
+@@ -567,6 +590,7 @@ static void __nv_drm_unload(struct drm_device *dev)
+         return;
+     }
+ 
++    cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
+     mutex_lock(&nv_dev->lock);
+ 
+     WARN_ON(nv_dev->subOwnershipGranted);
+diff --git kernel/nvidia-drm/nvidia-drm-encoder.c kernel/nvidia-drm/nvidia-drm-encoder.c
+index b5ef5a2..7c0c119 100644
+--- kernel/nvidia-drm/nvidia-drm-encoder.c
++++ kernel/nvidia-drm/nvidia-drm-encoder.c
+@@ -300,7 +300,7 @@ void nv_drm_handle_display_change(struct nv_drm_device *nv_dev,
+ 
+     nv_drm_connector_mark_connection_status_dirty(nv_encoder->nv_connector);
+ 
+-    drm_kms_helper_hotplug_event(dev);
++    schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
+ }
+ 
+ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
+@@ -347,6 +347,6 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
+     drm_reinit_primary_mode_group(dev);
+ #endif
+ 
+-    drm_kms_helper_hotplug_event(dev);
++    schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
+ }
+ #endif
+diff --git kernel/nvidia-drm/nvidia-drm-priv.h kernel/nvidia-drm/nvidia-drm-priv.h
+index 253155f..c9ce727 100644
+--- kernel/nvidia-drm/nvidia-drm-priv.h
++++ kernel/nvidia-drm/nvidia-drm-priv.h
+@@ -126,6 +126,7 @@ struct nv_drm_device {
+     NvU64 modifiers[6 /* block linear */ + 1 /* linear */ + 1 /* terminator */];
+ #endif
+ 
++    struct delayed_work hotplug_event_work;
+     atomic_t enable_event_handling;
+ 
+     /**
+-- 
+2.43.0
\ No newline at end of file
diff --git a/patches/cachyos/0004-intel.patch b/patches/cachyos/0004-intel.patch
new file mode 100644
index 0000000..87da0d3
--- /dev/null
+++ b/patches/cachyos/0004-intel.patch
@@ -0,0 +1,2203 @@
+From a06ef5a36a19553f48d73428311b241839d53b9c Mon Sep 17 00:00:00 2001
+From: Laio Oriel Seman <laioseman@gmail.com>
+Date: Fri, 8 Mar 2024 11:30:24 -0300
+Subject: [PATCH 1/2] ITD
+
+---
+ MAINTAINERS                              |   1 +
+ arch/x86/include/asm/cpufeatures.h       |   2 +
+ arch/x86/include/asm/disabled-features.h |   8 +-
+ arch/x86/include/asm/hfi.h               |  85 +++++
+ arch/x86/include/asm/hreset.h            |  30 ++
+ arch/x86/include/asm/msr-index.h         |  12 +
+ arch/x86/include/asm/topology.h          |  15 +
+ arch/x86/kernel/Makefile                 |   2 +
+ arch/x86/kernel/cpu/common.c             |  33 +-
+ arch/x86/kernel/cpu/cpuid-deps.c         |   1 +
+ arch/x86/kernel/process_32.c             |   3 +
+ arch/x86/kernel/process_64.c             |   3 +
+ arch/x86/kernel/sched_ipcc.c             |  93 +++++
+ drivers/thermal/intel/Kconfig            |   1 +
+ drivers/thermal/intel/intel_hfi.c        | 411 ++++++++++++++++++-----
+ drivers/thermal/thermal_netlink.c        |  62 +++-
+ drivers/thermal/thermal_netlink.h        |  26 ++
+ include/linux/sched.h                    |  24 +-
+ include/linux/sched/topology.h           |   6 +
+ init/Kconfig                             |  12 +
+ kernel/sched/core.c                      |  10 +-
+ kernel/sched/fair.c                      | 318 +++++++++++++++++-
+ kernel/sched/sched.h                     |  66 ++++
+ kernel/sched/topology.c                  |   9 +
+ kernel/time/timer.c                      |   2 +-
+ 25 files changed, 1127 insertions(+), 108 deletions(-)
+ create mode 100644 arch/x86/include/asm/hfi.h
+ create mode 100644 arch/x86/include/asm/hreset.h
+ create mode 100644 arch/x86/kernel/sched_ipcc.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 88b28f85587..9bb09b30526 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -21791,6 +21791,7 @@ L:	linux-pm@vger.kernel.org
+ S:	Supported
+ Q:	https://patchwork.kernel.org/project/linux-pm/list/
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
++F:	arch/x86/include/asm/hfi.h
+ F:	Documentation/ABI/testing/sysfs-class-thermal
+ F:	Documentation/admin-guide/thermal/
+ F:	Documentation/devicetree/bindings/thermal/
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 2b62cdd8dd1..31b1cea6847 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -326,6 +326,7 @@
+ #define X86_FEATURE_FSRC		(12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
+ #define X86_FEATURE_LKGS		(12*32+18) /* "" Load "kernel" (userspace) GS */
+ #define X86_FEATURE_AMX_FP16		(12*32+21) /* "" AMX fp16 Support */
++#define X86_FEATURE_HRESET		(12*32+22) /* Hardware history reset instruction */
+ #define X86_FEATURE_AVX_IFMA            (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
+ #define X86_FEATURE_LAM			(12*32+26) /* Linear Address Masking */
+ 
+@@ -360,6 +361,7 @@
+ #define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
+ #define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
+ #define X86_FEATURE_HFI			(14*32+19) /* Hardware Feedback Interface */
++#define X86_FEATURE_ITD			(14*32+23) /* Intel Thread Director */
+ 
+ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+ #define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 702d93fdd10..f4aa34cfd20 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -117,6 +117,12 @@
+ #define DISABLE_IBT	(1 << (X86_FEATURE_IBT & 31))
+ #endif
+ 
++#ifdef CONFIG_IPC_CLASSES
++# define DISABLE_ITD	0
++#else
++# define DISABLE_ITD	(1 << (X86_FEATURE_ITD & 31))
++#endif
++
+ /*
+  * Make sure to add features to the correct mask
+  */
+@@ -135,7 +141,7 @@
+ 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
+ #define DISABLED_MASK12	(DISABLE_LAM)
+ #define DISABLED_MASK13	0
+-#define DISABLED_MASK14	0
++#define DISABLED_MASK14	(DISABLE_ITD)
+ #define DISABLED_MASK15	0
+ #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+ 			 DISABLE_ENQCMD)
+diff --git a/arch/x86/include/asm/hfi.h b/arch/x86/include/asm/hfi.h
+new file mode 100644
+index 00000000000..b7fda3e0e8c
+--- /dev/null
++++ b/arch/x86/include/asm/hfi.h
+@@ -0,0 +1,85 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_HFI_H
++#define _ASM_X86_HFI_H
++
++/* CPUID detection and enumeration definitions for HFI */
++
++union hfi_capabilities {
++	struct {
++		u8	performance:1;
++		u8	energy_efficiency:1;
++		u8	__reserved:6;
++	} split;
++	u8 bits;
++};
++
++union cpuid6_edx {
++	struct {
++		union hfi_capabilities	capabilities;
++		u32			table_pages:4;
++		u32			__reserved:4;
++		s32			index:16;
++	} split;
++	u32 full;
++};
++
++union cpuid6_ecx {
++	struct {
++		u32	dont_care0:8;
++		u32	nr_classes:8;
++		u32	dont_care1:16;
++	} split;
++	u32 full;
++};
++
++/**
++ * struct hfi_hdr - Header of the HFI table
++ * @perf_updated:	Hardware updated performance capabilities
++ * @ee_updated:		Hardware updated energy efficiency capabilities
++ *
++ * Properties of the data in an HFI table. There exists one header per each
++ * HFI class.
++ */
++struct hfi_hdr {
++	u8	perf_updated;
++	u8	ee_updated;
++} __packed;
++
++/**
++ * struct hfi_table - Representation of an HFI table
++ * @base_addr:		Base address of the local copy of the HFI table
++ * @timestamp:		Timestamp of the last update of the local table.
++ *			Located at the base of the local table.
++ * @hdr:		Base address of the header of the local table
++ * @data:		Base address of the data of the local table
++ */
++struct hfi_table {
++	union {
++		void			*base_addr;
++		u64			*timestamp;
++	};
++	void			*hdr;
++	void			*data;
++};
++
++/**
++ * struct hfi_features - Supported HFI features
++ * @nr_classes:		Number of classes supported
++ * @nr_table_pages:	Size of the HFI table in 4KB pages
++ * @cpu_stride:		Stride size to locate the capability data of a logical
++ *			processor within the table (i.e., row stride)
++ * @class_stride:	Stride size to locate a class within the capability
++ *			data of a logical processor or the HFI table header
++ * @hdr_size:		Size of the table header
++ *
++ * Parameters and supported features that are common to all HFI instances
++ */
++struct hfi_features {
++	unsigned int	nr_classes;
++	size_t		nr_table_pages;
++	unsigned int	cpu_stride;
++	unsigned int	class_stride;
++	unsigned int	hdr_size;
++};
++
++#endif /* _ASM_X86_HFI_H */
+diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
+new file mode 100644
+index 00000000000..d68ca2fb864
+--- /dev/null
++++ b/arch/x86/include/asm/hreset.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_HRESET_H
++
++/**
++ * HRESET - History reset. Available since binutils v2.36.
++ *
++ * Request the processor to reset the history of task classification on the
++ * current logical processor. The history components to be
++ * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
++ * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
++ *
++ * The assembly code looks like:
++ *
++ *	hreset %eax
++ *
++ * The corresponding machine code looks like:
++ *
++ *	F3 0F 3A F0 ModRM Imm
++ *
++ * The value of ModRM is 0xc0 to specify %eax register addressing.
++ * The ignored immediate operand is set to 0.
++ *
++ * The instruction is documented in the Intel SDM.
++ */
++
++#define __ASM_HRESET  ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
++
++void reset_hardware_history(void);
++
++#endif /* _ASM_X86_HRESET_H */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index f1bd7b91b3c..f334c19b028 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -1143,7 +1143,19 @@
+ 
+ /* Hardware Feedback Interface */
+ #define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
++#define HW_FEEDBACK_PTR_VALID           BIT_ULL(0)
++#define HW_FEEDBACK_PTR_RESERVED_MASK   GENMASK_ULL(11, 1)
++
+ #define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
++#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
++#define MSR_IA32_HW_FEEDBACK_CHAR	0x17d2
++
++/* Hardware History Reset  */
++#define MSR_IA32_HW_HRESET_ENABLE	0x17da
++
++#define HW_FEEDBACK_CONFIG_HFI_ENABLE   BIT_ULL(0)
++#define HW_FEEDBACK_CONFIG_ITD_ENABLE   BIT_ULL(1)
++#define HW_FEEDBACK_THREAD_CONFIG_ENABLE BIT_ULL(0)
+ 
+ /* x2APIC locked status */
+ #define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
+index 5f87f6b9b09..29fc06efcb6 100644
+--- a/arch/x86/include/asm/topology.h
++++ b/arch/x86/include/asm/topology.h
+@@ -235,4 +235,19 @@ void init_freq_invariance_cppc(void);
+ #define arch_init_invariance_cppc init_freq_invariance_cppc
+ #endif
+ 
++#ifdef CONFIG_INTEL_HFI_THERMAL
++int intel_hfi_read_classid(u8 *classid);
++unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
++#else
++static inline int intel_hfi_read_classid(u8 *classid) { return -ENODEV; }
++static inline unsigned long
++intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu) { return -ENODEV; }
++#endif
++
++#ifdef CONFIG_IPC_CLASSES
++void intel_update_ipcc(struct task_struct *curr);
++#define arch_update_ipcc intel_update_ipcc
++#define arch_get_ipcc_score intel_hfi_get_ipcc_score
++#endif
++
+ #endif /* _ASM_X86_TOPOLOGY_H */
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 0000325ab98..9bc7319175d 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -150,6 +150,8 @@ obj-$(CONFIG_X86_CET)			+= cet.o
+ 
+ obj-$(CONFIG_X86_USER_SHADOW_STACK)	+= shstk.o
+ 
++obj-$(CONFIG_IPC_CLASSES)		+= sched_ipcc.o
++
+ ###
+ # 64 bit specific files
+ ifeq ($(CONFIG_X86_64),y)
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index fbc4e60d027..99ebd403fe4 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -57,6 +57,7 @@
+ #include <asm/mce.h>
+ #include <asm/msr.h>
+ #include <asm/cacheinfo.h>
++#include <asm/hreset.h>
+ #include <asm/memtype.h>
+ #include <asm/microcode.h>
+ #include <asm/intel-family.h>
+@@ -381,6 +382,35 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+ 	cr4_clear_bits(X86_CR4_UMIP);
+ }
+ 
++static u32 hardware_history_features __ro_after_init;
++
++
++void reset_hardware_history(void)
++{
++	asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
++			     : : "a" (hardware_history_features) : "memory");
++}
++
++EXPORT_SYMBOL(reset_hardware_history);
++
++static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_HRESET))
++		return;
++
++	/*
++	 * Use on all CPUs the hardware history features that the boot
++	 * CPU supports.
++	 */
++	if (c == &boot_cpu_data)
++		hardware_history_features = cpuid_ebx(0x20);
++
++	if (!hardware_history_features)
++		return;
++
++	wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
++}
++
+ /* These bits should not change their value after CPU init is finished. */
+ static const unsigned long cr4_pinned_mask =
+ 	X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
+@@ -1872,10 +1902,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
+ 	/* Disable the PN if appropriate */
+ 	squash_the_stupid_serial_number(c);
+ 
+-	/* Set up SMEP/SMAP/UMIP */
++	/* Set up SMEP/SMAP/UMIP/HRESET */
+ 	setup_smep(c);
+ 	setup_smap(c);
+ 	setup_umip(c);
++	setup_hreset(c);
+ 
+ 	/* Enable FSGSBASE instructions if available. */
+ 	if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
+index e462c1d3800..db62700cdac 100644
+--- a/arch/x86/kernel/cpu/cpuid-deps.c
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -81,6 +81,7 @@ static const struct cpuid_dep cpuid_deps[] = {
+ 	{ X86_FEATURE_XFD,			X86_FEATURE_XSAVES    },
+ 	{ X86_FEATURE_XFD,			X86_FEATURE_XGETBV1   },
+ 	{ X86_FEATURE_AMX_TILE,			X86_FEATURE_XFD       },
++	{ X86_FEATURE_ITD,			X86_FEATURE_HFI       },
+ 	{ X86_FEATURE_SHSTK,			X86_FEATURE_XSAVES    },
+ 	{}
+ };
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 708c87b88cc..7353bb119e7 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -52,6 +52,7 @@
+ #include <asm/switch_to.h>
+ #include <asm/vm86.h>
+ #include <asm/resctrl.h>
++#include <asm/hreset.h>
+ #include <asm/proto.h>
+ 
+ #include "process.h"
+@@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	/* Load the Intel cache allocation PQR MSR. */
+ 	resctrl_sched_in(next_p);
+ 
++	reset_hardware_history();
++
+ 	return prev_p;
+ }
+ 
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 33b268747bb..202a6735c09 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -54,6 +54,7 @@
+ #include <asm/xen/hypervisor.h>
+ #include <asm/vdso.h>
+ #include <asm/resctrl.h>
++#include <asm/hreset.h>
+ #include <asm/unistd.h>
+ #include <asm/fsgsbase.h>
+ #ifdef CONFIG_IA32_EMULATION
+@@ -661,6 +662,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 	/* Load the Intel cache allocation PQR MSR. */
+ 	resctrl_sched_in(next_p);
+ 
++	reset_hardware_history();
++
+ 	return prev_p;
+ }
+ 
+diff --git a/arch/x86/kernel/sched_ipcc.c b/arch/x86/kernel/sched_ipcc.c
+new file mode 100644
+index 00000000000..dd73fc8be49
+--- /dev/null
++++ b/arch/x86/kernel/sched_ipcc.c
+@@ -0,0 +1,93 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Intel support for scheduler IPC classes
++ *
++ * Copyright (c) 2023, Intel Corporation.
++ *
++ * Author: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
++ *
++ * On hybrid processors, the architecture differences between types of CPUs
++ * lead to different number of retired instructions per cycle (IPC). IPCs may
++ * differ further by classes of instructions.
++ *
++ * The scheduler assigns an IPC class to every task with arch_update_ipcc()
++ * from data that hardware provides. Implement this interface for x86.
++ *
++ * See kernel/sched/sched.h for details.
++ */
++
++#include <linux/sched.h>
++
++#include <asm/intel-family.h>
++#include <asm/topology.h>
++
++#define CLASS_DEBOUNCER_SKIPS 4
++
++/**
++ * debounce_and_update_class() - Process and update a task's classification
++ *
++ * @p:		The task of which the classification will be updated
++ * @new_ipcc:	The new IPC classification
++ *
++ * Update the classification of @p with the new value that hardware provides.
++ * Only update the classification of @p if it has been the same during
++ * CLASS_DEBOUNCER_SKIPS consecutive ticks.
++ */
++static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
++{
++	u16 debounce_skip;
++
++	/* The class of @p changed. Only restart the debounce counter. */
++	if (p->ipcc_tmp != new_ipcc) {
++		p->ipcc_cntr = 1;
++		goto out;
++	}
++
++	/*
++	 * The class of @p did not change. Update it if it has been the same
++	 * for CLASS_DEBOUNCER_SKIPS user ticks.
++	 */
++	debounce_skip = p->ipcc_cntr + 1;
++	if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
++		p->ipcc_cntr++;
++	else
++		p->ipcc = new_ipcc;
++
++out:
++	p->ipcc_tmp = new_ipcc;
++}
++
++static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
++{
++	switch (boot_cpu_data.x86_model) {
++	case INTEL_FAM6_ALDERLAKE:
++	case INTEL_FAM6_ALDERLAKE_L:
++	case INTEL_FAM6_RAPTORLAKE:
++	case INTEL_FAM6_RAPTORLAKE_P:
++	case INTEL_FAM6_RAPTORLAKE_S:
++		if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
++			return true;
++
++		return false;
++
++	default:
++		return false;
++	}
++}
++
++void intel_update_ipcc(struct task_struct *curr)
++{
++	u8 hfi_class;
++	bool idle;
++
++	if (intel_hfi_read_classid(&hfi_class))
++		return;
++
++	/*
++	 * 0 is a valid classification for Intel Thread Director. A scheduler
++	 * IPCC class of 0 means that the task is unclassified. Adjust.
++	 */
++	idle = sched_smt_siblings_idle(task_cpu(curr));
++	if (classification_is_accurate(hfi_class, idle))
++		debounce_and_update_class(curr, hfi_class + 1);
++}
+diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
+index b43953b5539..03da183ff99 100644
+--- a/drivers/thermal/intel/Kconfig
++++ b/drivers/thermal/intel/Kconfig
+@@ -109,6 +109,7 @@ config INTEL_HFI_THERMAL
+ 	depends on CPU_SUP_INTEL
+ 	depends on X86_THERMAL_VECTOR
+ 	select THERMAL_NETLINK
++	select IPC_CLASSES
+ 	help
+ 	  Select this option to enable the Hardware Feedback Interface. If
+ 	  selected, hardware provides guidance to the operating system on
+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
+index 3b04c6ec4fc..b791906914b 100644
+--- a/drivers/thermal/intel/intel_hfi.c
++++ b/drivers/thermal/intel/intel_hfi.c
+@@ -30,9 +30,12 @@
+ #include <linux/kernel.h>
+ #include <linux/math.h>
+ #include <linux/mutex.h>
++#include <linux/percpu.h>
+ #include <linux/percpu-defs.h>
+ #include <linux/printk.h>
+ #include <linux/processor.h>
++#include <linux/sched/topology.h>
++#include <linux/seqlock.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+ #include <linux/suspend.h>
+@@ -41,6 +44,7 @@
+ #include <linux/topology.h>
+ #include <linux/workqueue.h>
+ 
++#include <asm/hfi.h>
+ #include <asm/msr.h>
+ 
+ #include "intel_hfi.h"
+@@ -48,32 +52,20 @@
+ 
+ #include "../thermal_netlink.h"
+ 
+-/* Hardware Feedback Interface MSR configuration bits */
+-#define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
+-#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
+ 
+ /* CPUID detection and enumeration definitions for HFI */
+ 
+ #define CPUID_HFI_LEAF 6
+ 
+-union hfi_capabilities {
++union hfi_thread_feedback_char_msr {
+ 	struct {
+-		u8	performance:1;
+-		u8	energy_efficiency:1;
+-		u8	__reserved:6;
++		u64	classid : 8;
++		u64	__reserved : 55;
++		u64	valid : 1;
+ 	} split;
+-	u8 bits;
++	u64 full;
+ };
+ 
+-union cpuid6_edx {
+-	struct {
+-		union hfi_capabilities	capabilities;
+-		u32			table_pages:4;
+-		u32			__reserved:4;
+-		s32			index:16;
+-	} split;
+-	u32 full;
+-};
+ 
+ /**
+  * struct hfi_cpu_data - HFI capabilities per CPU
+@@ -81,32 +73,17 @@ union cpuid6_edx {
+  * @ee_cap:		Energy efficiency capability
+  *
+  * Capabilities of a logical processor in the HFI table. These capabilities are
+- * unitless.
++ * unitless and specific to each HFI class.
+  */
+ struct hfi_cpu_data {
+ 	u8	perf_cap;
+ 	u8	ee_cap;
+ } __packed;
+ 
+-/**
+- * struct hfi_hdr - Header of the HFI table
+- * @perf_updated:	Hardware updated performance capabilities
+- * @ee_updated:		Hardware updated energy efficiency capabilities
+- *
+- * Properties of the data in an HFI table.
+- */
+-struct hfi_hdr {
+-	u8	perf_updated;
+-	u8	ee_updated;
+-} __packed;
+ 
+ /**
+  * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+- * @local_table:	Base of the local copy of the HFI table
+- * @timestamp:		Timestamp of the last update of the local table.
+- *			Located at the base of the local table.
+- * @hdr:		Base address of the header of the local table
+- * @data:		Base address of the data of the local table
++ * @local_table:	Local copy of HFI table for this instance
+  * @cpus:		CPUs represented in this HFI table instance
+  * @hw_table:		Pointer to the HFI table of this instance
+  * @update_work:	Delayed work to process HFI updates
+@@ -116,12 +93,7 @@ struct hfi_hdr {
+  * A set of parameters to parse and navigate a specific HFI table.
+  */
+ struct hfi_instance {
+-	union {
+-		void			*local_table;
+-		u64			*timestamp;
+-	};
+-	void			*hdr;
+-	void			*data;
++	struct hfi_table	local_table;
+ 	cpumask_var_t		cpus;
+ 	void			*hw_table;
+ 	struct delayed_work	update_work;
+@@ -129,20 +101,6 @@ struct hfi_instance {
+ 	raw_spinlock_t		event_lock;
+ };
+ 
+-/**
+- * struct hfi_features - Supported HFI features
+- * @nr_table_pages:	Size of the HFI table in 4KB pages
+- * @cpu_stride:		Stride size to locate the capability data of a logical
+- *			processor within the table (i.e., row stride)
+- * @hdr_size:		Size of the table header
+- *
+- * Parameters and supported features that are common to all HFI instances
+- */
+-struct hfi_features {
+-	size_t		nr_table_pages;
+-	unsigned int	cpu_stride;
+-	unsigned int	hdr_size;
+-};
+ 
+ /**
+  * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+@@ -159,6 +117,7 @@ struct hfi_cpu_info {
+ static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+ 
+ static int max_hfi_instances;
++static int hfi_clients_nr;
+ static struct hfi_instance *hfi_instances;
+ 
+ static struct hfi_features hfi_features;
+@@ -168,6 +127,139 @@ static struct workqueue_struct *hfi_updates_wq;
+ #define HFI_UPDATE_INTERVAL		HZ
+ #define HFI_MAX_THERM_NOTIFY_COUNT	16
+ 
++/*
++ * A task may be unclassified if it has been recently created, spend most of
++ * its lifetime sleeping, or hardware has not provided a classification.
++ *
++ * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
++ * eventually. Meanwhile, the scheduler will place classes of tasks with higher
++ * IPC scores on higher-performance CPUs.
++ *
++ * IPC class 1 is a reasonable choice. It matches the performance capability
++ * of the legacy, classless, HFI table.
++ */
++#define HFI_UNCLASSIFIED_DEFAULT 1
++
++/* A cache of the HFI perf capabilities for lockless access. */
++static int __percpu *hfi_ipcc_scores;
++/* Sequence counter for hfi_ipcc_scores */
++static seqcount_t hfi_ipcc_seqcount = SEQCNT_ZERO(hfi_ipcc_seqcount);
++
++static int alloc_hfi_ipcc_scores(void)
++{
++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
++		return 0;
++
++	hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
++					 hfi_features.nr_classes,
++					 sizeof(*hfi_ipcc_scores));
++
++	return hfi_ipcc_scores ? 0 : -ENOMEM;
++}
++
++unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
++{
++	int *scores, score;
++	unsigned long seq;
++
++	scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
++	if (!scores)
++		return -ENODEV;
++
++	if (cpu < 0 || cpu >= nr_cpu_ids)
++		return -EINVAL;
++
++	if (ipcc == IPC_CLASS_UNCLASSIFIED)
++		ipcc = HFI_UNCLASSIFIED_DEFAULT;
++
++	/*
++	 * Scheduler IPC classes start at 1. HFI classes start at 0.
++	 * See note intel_hfi_update_ipcc().
++	 */
++	if (ipcc >= hfi_features.nr_classes + 1)
++		return -EINVAL;
++
++	/*
++	 * The seqcount implies load-acquire semantics to order loads with
++	 * lockless stores of the write side in set_hfi_ipcc_score(). It
++	 * also implies a compiler barrier.
++	 */
++	do {
++		seq = read_seqcount_begin(&hfi_ipcc_seqcount);
++		/* @ipcc is never 0. */
++		score = scores[ipcc - 1];
++	} while (read_seqcount_retry(&hfi_ipcc_seqcount, seq));
++
++	return score;
++}
++
++static void set_hfi_ipcc_scores(struct hfi_instance *hfi_instance)
++{
++	int cpu;
++
++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
++		return;
++
++	/*
++	 * Serialize with writes to the HFI table. It also protects the write
++	 * loop against seqcount readers running in interrupt context.
++	 */
++	raw_spin_lock_irq(&hfi_instance->table_lock);
++	/*
++	 * The seqcount implies store-release semantics to order stores with
++	 * lockless loads from the seqcount read side in
++	 * intel_hfi_get_ipcc_score(). It also implies a compiler barrier.
++	 */
++	write_seqcount_begin(&hfi_ipcc_seqcount);
++	for_each_cpu(cpu, hfi_instance->cpus) {
++		int c, *scores;
++		s16 index;
++
++		index = per_cpu(hfi_cpu_info, cpu).index;
++		scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
++
++		for (c = 0;  c < hfi_features.nr_classes; c++) {
++			struct hfi_cpu_data *caps;
++
++			caps = hfi_instance->local_table.data +
++			       index * hfi_features.cpu_stride +
++			       c * hfi_features.class_stride;
++			scores[c] = caps->perf_cap;
++		}
++	}
++
++	write_seqcount_end(&hfi_ipcc_seqcount);
++	raw_spin_unlock_irq(&hfi_instance->table_lock);
++}
++
++/**
++ * intel_hfi_read_classid() - Read the currrent classid
++ * @classid:	Variable to which the classid will be written.
++ *
++ * Read the classification that Intel Thread Director has produced when this
++ * function is called. Thread classification must be enabled before calling
++ * this function.
++ *
++ * Return: 0 if the produced classification is valid. Error otherwise.
++ */
++int intel_hfi_read_classid(u8 *classid)
++{
++	union hfi_thread_feedback_char_msr msr;
++
++	/* We should not be here if ITD is not supported. */
++	if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
++		pr_warn_once("task classification requested but not supported!");
++		return -ENODEV;
++	}
++
++	rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
++	if (!msr.split.valid)
++		return -EINVAL;
++
++	*classid = msr.split.classid;
++	return 0;
++}
++
+ static void get_hfi_caps(struct hfi_instance *hfi_instance,
+ 			 struct thermal_genl_cpu_caps *cpu_caps)
+ {
+@@ -179,7 +271,7 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
+ 		s16 index;
+ 
+ 		index = per_cpu(hfi_cpu_info, cpu).index;
+-		caps = hfi_instance->data + index * hfi_features.cpu_stride;
++		caps = hfi_instance->local_table.data + index * hfi_features.cpu_stride;
+ 		cpu_caps[i].cpu = cpu;
+ 
+ 		/*
+@@ -235,6 +327,8 @@ static void update_capabilities(struct hfi_instance *hfi_instance)
+ 		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+ 
+ 	kfree(cpu_caps);
++
++	set_hfi_ipcc_scores(hfi_instance);
+ out:
+ 	mutex_unlock(&hfi_instance_lock);
+ }
+@@ -296,7 +390,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+ 	 * where a lagging CPU entered the locked region.
+ 	 */
+ 	new_timestamp = *(u64 *)hfi_instance->hw_table;
+-	if (*hfi_instance->timestamp == new_timestamp) {
++	if (*hfi_instance->local_table.timestamp == new_timestamp) {
+ 		thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
+ 		raw_spin_unlock(&hfi_instance->event_lock);
+ 		return;
+@@ -308,7 +402,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+ 	 * Copy the updated table into our local copy. This includes the new
+ 	 * timestamp.
+ 	 */
+-	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
++	memcpy(hfi_instance->local_table.base_addr, hfi_instance->hw_table,
+ 	       hfi_features.nr_table_pages << PAGE_SHIFT);
+ 
+ 	/*
+@@ -337,17 +431,18 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+ }
+ 
+ /*
+- * The format of the HFI table depends on the number of capabilities that the
+- * hardware supports. Keep a data structure to navigate the table.
++ * The format of the HFI table depends on the number of capabilities and classes
++ * that the hardware supports. Keep a data structure to navigate the table.
+  */
+ static void init_hfi_instance(struct hfi_instance *hfi_instance)
+ {
+ 	/* The HFI header is below the time-stamp. */
+-	hfi_instance->hdr = hfi_instance->local_table +
+-			    sizeof(*hfi_instance->timestamp);
++	hfi_instance->local_table.hdr = hfi_instance->local_table.base_addr +
++					sizeof(*hfi_instance->local_table.timestamp);
+ 
+ 	/* The HFI data starts below the header. */
+-	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
++	hfi_instance->local_table.data = hfi_instance->local_table.hdr +
++					 hfi_features.hdr_size;
+ }
+ 
+ /* Caller must hold hfi_instance_lock. */
+@@ -356,8 +451,13 @@ static void hfi_enable(void)
+ 	u64 msr_val;
+ 
+ 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+-	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
++	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE;
++
++	if (cpu_feature_enabled(X86_FEATURE_ITD))
++		msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE;
++
+ 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
++
+ }
+ 
+ static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
+@@ -366,7 +466,7 @@ static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
+ 	u64 msr_val;
+ 
+ 	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+-	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
++	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID;
+ 	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+ }
+ 
+@@ -377,7 +477,11 @@ static void hfi_disable(void)
+ 	int i;
+ 
+ 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+-	msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
++	msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE;
++
++	if (cpu_feature_enabled(X86_FEATURE_ITD))
++		msr_val &= ~HW_FEEDBACK_CONFIG_ITD_ENABLE;
++
+ 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ 
+ 	/*
+@@ -396,6 +500,30 @@ static void hfi_disable(void)
+ 	}
+ }
+ 
++static void hfi_enable_itd_classification(void)
++{
++	u64 msr_val;
++
++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
++		return;
++
++	rdmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
++	msr_val |= HW_FEEDBACK_THREAD_CONFIG_ENABLE;
++	wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
++}
++
++static void hfi_disable_itd_classification(void)
++{
++	u64 msr_val;
++
++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
++		return;
++
++	rdmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
++	msr_val &= ~HW_FEEDBACK_THREAD_CONFIG_ENABLE;
++	wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
++}
++
+ /**
+  * intel_hfi_online() - Enable HFI on @cpu
+  * @cpu:	CPU in which the HFI will be enabled
+@@ -436,6 +564,8 @@ void intel_hfi_online(unsigned int cpu)
+ 
+ 	init_hfi_cpu_index(info);
+ 
++	hfi_enable_itd_classification();
++
+ 	/*
+ 	 * Now check if the HFI instance of the package/die of @cpu has been
+ 	 * initialized (by checking its header). In such case, all we have to
+@@ -443,7 +573,7 @@ void intel_hfi_online(unsigned int cpu)
+ 	 * if needed.
+ 	 */
+ 	mutex_lock(&hfi_instance_lock);
+-	if (hfi_instance->hdr)
++	if (hfi_instance->local_table.hdr)
+ 		goto enable;
+ 
+ 	/*
+@@ -463,9 +593,9 @@ void intel_hfi_online(unsigned int cpu)
+ 	 * Allocate memory to keep a local copy of the table that
+ 	 * hardware generates.
+ 	 */
+-	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+-					    GFP_KERNEL);
+-	if (!hfi_instance->local_table)
++	hfi_instance->local_table.base_addr = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
++						      GFP_KERNEL);
++	if (!hfi_instance->local_table.base_addr)
+ 		goto free_hw_table;
+ 
+ 	init_hfi_instance(hfi_instance);
+@@ -477,11 +607,23 @@ void intel_hfi_online(unsigned int cpu)
+ enable:
+ 	cpumask_set_cpu(cpu, hfi_instance->cpus);
+ 
+-	/* Enable this HFI instance if this is its first online CPU. */
+-	if (cpumask_weight(hfi_instance->cpus) == 1) {
++	/*
++	 * Enable this HFI instance if this is its first online CPU and
++	 * there are user-space clients of thermal events.
++	 */
++	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_clients_nr > 0) {
+ 		hfi_set_hw_table(hfi_instance);
+ 		hfi_enable();
+ 	}
++	/*
++	 * We have all we need to support IPC classes. Task classification is
++	 * now working.
++	 *
++	 * All class scores are zero until after the first HFI update. That is
++	 * OK. The scheduler queries these scores at every load balance.
++	 */
++	if (cpu_feature_enabled(X86_FEATURE_ITD))
++		sched_enable_ipc_classes();
+ 
+ unlock:
+ 	mutex_unlock(&hfi_instance_lock);
+@@ -516,9 +658,11 @@ void intel_hfi_offline(unsigned int cpu)
+ 	if (!hfi_instance)
+ 		return;
+ 
+-	if (!hfi_instance->hdr)
++	if (!hfi_instance->local_table.hdr)
+ 		return;
+ 
++	hfi_disable_itd_classification();
++
+ 	mutex_lock(&hfi_instance_lock);
+ 	cpumask_clear_cpu(cpu, hfi_instance->cpus);
+ 
+@@ -557,44 +701,133 @@ static __init int hfi_parse_features(void)
+ 	/* The number of 4KB pages required by the table */
+ 	hfi_features.nr_table_pages = edx.split.table_pages + 1;
+ 
++	/*
++	 * Capability fields of an HFI class are grouped together. Classes are
++	 * contiguous in memory.  Hence, use the number of supported features to
++	 * locate a specific class.
++	 */
++	hfi_features.class_stride = nr_capabilities;
++
++	if (cpu_feature_enabled(X86_FEATURE_ITD)) {
++		union cpuid6_ecx ecx;
++
++		ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
++		hfi_features.nr_classes = ecx.split.nr_classes;
++	} else {
++		hfi_features.nr_classes = 1;
++	}
++
+ 	/*
+ 	 * The header contains change indications for each supported feature.
+ 	 * The size of the table header is rounded up to be a multiple of 8
+ 	 * bytes.
+ 	 */
+-	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
++	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
++					     hfi_features.nr_classes, 8) * 8;
+ 
+ 	/*
+ 	 * Data of each logical processor is also rounded up to be a multiple
+ 	 * of 8 bytes.
+ 	 */
+-	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
++	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
++					       hfi_features.nr_classes, 8) * 8;
+ 
+ 	return 0;
+ }
+ 
+-static void hfi_do_enable(void)
++/*
++ * If concurrency is not prevented by other means, the HFI enable/disable
++ * routines must be called under hfi_instance_lock."
++ */
++static void hfi_enable_instance(void *ptr)
++{
++	hfi_set_hw_table(ptr);
++	hfi_enable();
++}
++
++static void hfi_disable_instance(void *ptr)
++{
++	hfi_disable();
++}
++
++static void hfi_syscore_resume(void)
+ {
+ 	/* This code runs only on the boot CPU. */
+ 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ 	struct hfi_instance *hfi_instance = info->hfi_instance;
+ 
+ 	/* No locking needed. There is no concurrency with CPU online. */
+-	hfi_set_hw_table(hfi_instance);
+-	hfi_enable();
++	if (hfi_clients_nr > 0) {
++		hfi_set_hw_table(hfi_instance);
++		hfi_enable_instance(hfi_instance);
++		hfi_enable_itd_classification();
++	}
+ }
+ 
+-static int hfi_do_disable(void)
++static int hfi_syscore_suspend(void)
+ {
+ 	/* No locking needed. There is no concurrency with CPU offline. */
++
++	hfi_disable_itd_classification();
++
+ 	hfi_disable();
+ 
+ 	return 0;
+ }
+ 
+ static struct syscore_ops hfi_pm_ops = {
+-	.resume = hfi_do_enable,
+-	.suspend = hfi_do_disable,
++	.resume = hfi_syscore_resume,
++	.suspend = hfi_syscore_suspend,
++};
++
++static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
++			      void *_notify)
++{
++	struct thermal_genl_notify *notify = _notify;
++	struct hfi_instance *hfi_instance;
++	smp_call_func_t func = NULL;
++	unsigned int cpu;
++	int i;
++
++	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
++		return NOTIFY_DONE;
++
++	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
++		return NOTIFY_DONE;
++
++	mutex_lock(&hfi_instance_lock);
++
++	switch (state) {
++	case THERMAL_NOTIFY_BIND:
++		if (++hfi_clients_nr == 1)
++			func = hfi_enable_instance;
++		break;
++	case THERMAL_NOTIFY_UNBIND:
++		if (--hfi_clients_nr == 0)
++			func = hfi_disable_instance;
++		break;
++	}
++
++	if (!func)
++		goto out;
++
++	for (i = 0; i < max_hfi_instances; i++) {
++		hfi_instance = &hfi_instances[i];
++		if (cpumask_empty(hfi_instance->cpus))
++			continue;
++
++		cpu = cpumask_any(hfi_instance->cpus);
++		smp_call_function_single(cpu, func, hfi_instance, true);
++	}
++
++out:
++	mutex_unlock(&hfi_instance_lock);
++
++	return NOTIFY_OK;
++}
++
++static struct notifier_block hfi_thermal_nb = {
++	.notifier_call = hfi_thermal_notify,
+ };
+ 
+ void __init intel_hfi_init(void)
+@@ -628,10 +861,28 @@ void __init intel_hfi_init(void)
+ 	if (!hfi_updates_wq)
+ 		goto err_nomem;
+ 
++	/*
++	 * Both thermal core and Intel HFI can not be build as modules.
++	 * As kernel build-in drivers they are initialized before user-space
++	 * starts, hence we can not miss BIND/UNBIND events when applications
++	 * add/remove thermal multicast group to/from a netlink socket.
++	 */
++	if (thermal_genl_register_notifier(&hfi_thermal_nb))
++		goto err_nl_notif;
++
+ 	register_syscore_ops(&hfi_pm_ops);
+ 
++	if (alloc_hfi_ipcc_scores())
++		goto err_ipcc;
++
+ 	return;
+ 
++err_nl_notif:
++	destroy_workqueue(hfi_updates_wq);
++
++err_ipcc:
++	destroy_workqueue(hfi_updates_wq);
++
+ err_nomem:
+ 	for (j = 0; j < i; ++j) {
+ 		hfi_instance = &hfi_instances[j];
+diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
+index 76a231a2965..bef14ce69ec 100644
+--- a/drivers/thermal/thermal_netlink.c
++++ b/drivers/thermal/thermal_netlink.c
+@@ -7,17 +7,13 @@
+  * Generic netlink for thermal management framework
+  */
+ #include <linux/module.h>
++#include <linux/notifier.h>
+ #include <linux/kernel.h>
+ #include <net/genetlink.h>
+ #include <uapi/linux/thermal.h>
+ 
+ #include "thermal_core.h"
+ 
+-enum thermal_genl_multicast_groups {
+-	THERMAL_GENL_SAMPLING_GROUP = 0,
+-	THERMAL_GENL_EVENT_GROUP = 1,
+-};
+-
+ static const struct genl_multicast_group thermal_genl_mcgrps[] = {
+ 	[THERMAL_GENL_SAMPLING_GROUP] = { .name = THERMAL_GENL_SAMPLING_GROUP_NAME, },
+ 	[THERMAL_GENL_EVENT_GROUP]  = { .name = THERMAL_GENL_EVENT_GROUP_NAME,  },
+@@ -74,11 +70,12 @@ struct param {
+ 
+ typedef int (*cb_t)(struct param *);
+ 
+-static struct genl_family thermal_gnl_family;
++static struct genl_family thermal_genl_family;
++static BLOCKING_NOTIFIER_HEAD(thermal_genl_chain);
+ 
+ static int thermal_group_has_listeners(enum thermal_genl_multicast_groups group)
+ {
+-	return genl_has_listeners(&thermal_gnl_family, &init_net, group);
++	return genl_has_listeners(&thermal_genl_family, &init_net, group);
+ }
+ 
+ /************************** Sampling encoding *******************************/
+@@ -95,7 +92,7 @@ int thermal_genl_sampling_temp(int id, int temp)
+ 	if (!skb)
+ 		return -ENOMEM;
+ 
+-	hdr = genlmsg_put(skb, 0, 0, &thermal_gnl_family, 0,
++	hdr = genlmsg_put(skb, 0, 0, &thermal_genl_family, 0,
+ 			  THERMAL_GENL_SAMPLING_TEMP);
+ 	if (!hdr)
+ 		goto out_free;
+@@ -108,7 +105,7 @@ int thermal_genl_sampling_temp(int id, int temp)
+ 
+ 	genlmsg_end(skb, hdr);
+ 
+-	genlmsg_multicast(&thermal_gnl_family, skb, 0, THERMAL_GENL_SAMPLING_GROUP, GFP_KERNEL);
++	genlmsg_multicast(&thermal_genl_family, skb, 0, THERMAL_GENL_SAMPLING_GROUP, GFP_KERNEL);
+ 
+ 	return 0;
+ out_cancel:
+@@ -282,7 +279,7 @@ static int thermal_genl_send_event(enum thermal_genl_event event,
+ 		return -ENOMEM;
+ 	p->msg = msg;
+ 
+-	hdr = genlmsg_put(msg, 0, 0, &thermal_gnl_family, 0, event);
++	hdr = genlmsg_put(msg, 0, 0, &thermal_genl_family, 0, event);
+ 	if (!hdr)
+ 		goto out_free_msg;
+ 
+@@ -292,7 +289,7 @@ static int thermal_genl_send_event(enum thermal_genl_event event,
+ 
+ 	genlmsg_end(msg, hdr);
+ 
+-	genlmsg_multicast(&thermal_gnl_family, msg, 0, THERMAL_GENL_EVENT_GROUP, GFP_KERNEL);
++	genlmsg_multicast(&thermal_genl_family, msg, 0, THERMAL_GENL_EVENT_GROUP, GFP_KERNEL);
+ 
+ 	return 0;
+ 
+@@ -593,7 +590,7 @@ static int thermal_genl_cmd_dumpit(struct sk_buff *skb,
+ 	int ret;
+ 	void *hdr;
+ 
+-	hdr = genlmsg_put(skb, 0, 0, &thermal_gnl_family, 0, cmd);
++	hdr = genlmsg_put(skb, 0, 0, &thermal_genl_family, 0, cmd);
+ 	if (!hdr)
+ 		return -EMSGSIZE;
+ 
+@@ -625,7 +622,7 @@ static int thermal_genl_cmd_doit(struct sk_buff *skb,
+ 		return -ENOMEM;
+ 	p.msg = msg;
+ 
+-	hdr = genlmsg_put_reply(msg, info, &thermal_gnl_family, 0, cmd);
++	hdr = genlmsg_put_reply(msg, info, &thermal_genl_family, 0, cmd);
+ 	if (!hdr)
+ 		goto out_free_msg;
+ 
+@@ -645,6 +642,27 @@ static int thermal_genl_cmd_doit(struct sk_buff *skb,
+ 	return ret;
+ }
+ 
++static int thermal_genl_bind(int mcgrp)
++{
++	struct thermal_genl_notify n = { .mcgrp = mcgrp };
++
++	if (WARN_ON_ONCE(mcgrp > THERMAL_GENL_MAX_GROUP))
++		return -EINVAL;
++
++	blocking_notifier_call_chain(&thermal_genl_chain, THERMAL_NOTIFY_BIND, &n);
++	return 0;
++}
++
++static void thermal_genl_unbind(int mcgrp)
++{
++	struct thermal_genl_notify n = { .mcgrp = mcgrp };
++
++	if (WARN_ON_ONCE(mcgrp > THERMAL_GENL_MAX_GROUP))
++		return;
++
++	blocking_notifier_call_chain(&thermal_genl_chain, THERMAL_NOTIFY_UNBIND, &n);
++}
++
+ static const struct genl_small_ops thermal_genl_ops[] = {
+ 	{
+ 		.cmd = THERMAL_GENL_CMD_TZ_GET_ID,
+@@ -673,12 +691,14 @@ static const struct genl_small_ops thermal_genl_ops[] = {
+ 	},
+ };
+ 
+-static struct genl_family thermal_gnl_family __ro_after_init = {
++static struct genl_family thermal_genl_family __ro_after_init = {
+ 	.hdrsize	= 0,
+ 	.name		= THERMAL_GENL_FAMILY_NAME,
+ 	.version	= THERMAL_GENL_VERSION,
+ 	.maxattr	= THERMAL_GENL_ATTR_MAX,
+ 	.policy		= thermal_genl_policy,
++	.bind		= thermal_genl_bind,
++	.unbind		= thermal_genl_unbind,
+ 	.small_ops	= thermal_genl_ops,
+ 	.n_small_ops	= ARRAY_SIZE(thermal_genl_ops),
+ 	.resv_start_op	= THERMAL_GENL_CMD_CDEV_GET + 1,
+@@ -686,12 +706,22 @@ static struct genl_family thermal_gnl_family __ro_after_init = {
+ 	.n_mcgrps	= ARRAY_SIZE(thermal_genl_mcgrps),
+ };
+ 
++int thermal_genl_register_notifier(struct notifier_block *nb)
++{
++	return blocking_notifier_chain_register(&thermal_genl_chain, nb);
++}
++
++int thermal_genl_unregister_notifier(struct notifier_block *nb)
++{
++	return blocking_notifier_chain_unregister(&thermal_genl_chain, nb);
++}
++
+ int __init thermal_netlink_init(void)
+ {
+-	return genl_register_family(&thermal_gnl_family);
++	return genl_register_family(&thermal_genl_family);
+ }
+ 
+ void __init thermal_netlink_exit(void)
+ {
+-	genl_unregister_family(&thermal_gnl_family);
++	genl_unregister_family(&thermal_genl_family);
+ }
+diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
+index 93a927e144d..e01221e8816 100644
+--- a/drivers/thermal/thermal_netlink.h
++++ b/drivers/thermal/thermal_netlink.h
+@@ -10,6 +10,19 @@ struct thermal_genl_cpu_caps {
+ 	int efficiency;
+ };
+ 
++enum thermal_genl_multicast_groups {
++	THERMAL_GENL_SAMPLING_GROUP = 0,
++	THERMAL_GENL_EVENT_GROUP = 1,
++	THERMAL_GENL_MAX_GROUP = THERMAL_GENL_EVENT_GROUP,
++};
++
++#define THERMAL_NOTIFY_BIND	0
++#define THERMAL_NOTIFY_UNBIND	1
++
++struct thermal_genl_notify {
++	int mcgrp;
++};
++
+ struct thermal_zone_device;
+ struct thermal_trip;
+ struct thermal_cooling_device;
+@@ -18,6 +31,9 @@ struct thermal_cooling_device;
+ #ifdef CONFIG_THERMAL_NETLINK
+ int __init thermal_netlink_init(void);
+ void __init thermal_netlink_exit(void);
++int thermal_genl_register_notifier(struct notifier_block *nb);
++int thermal_genl_unregister_notifier(struct notifier_block *nb);
++
+ int thermal_notify_tz_create(const struct thermal_zone_device *tz);
+ int thermal_notify_tz_delete(const struct thermal_zone_device *tz);
+ int thermal_notify_tz_enable(const struct thermal_zone_device *tz);
+@@ -48,6 +64,16 @@ static inline int thermal_notify_tz_create(const struct thermal_zone_device *tz)
+ 	return 0;
+ }
+ 
++static inline int thermal_genl_register_notifier(struct notifier_block *nb)
++{
++	return 0;
++}
++
++static inline int thermal_genl_unregister_notifier(struct notifier_block *nb)
++{
++	return 0;
++}
++
+ static inline int thermal_notify_tz_delete(const struct thermal_zone_device *tz)
+ {
+ 	return 0;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index ffe8f618ab8..8d458554bae 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -137,6 +137,8 @@ struct user_event_mm;
+ 					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
+ 					 TASK_PARKED)
+ 
++#define IPC_CLASS_UNCLASSIFIED		0
++
+ #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
+ 
+ #define task_is_traced(task)		((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
+@@ -301,7 +303,7 @@ enum {
+ 	TASK_COMM_LEN = 16,
+ };
+ 
+-extern void scheduler_tick(void);
++extern void scheduler_tick(bool user_tick);
+ 
+ #define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
+ 
+@@ -1547,6 +1549,24 @@ struct task_struct {
+ 	struct user_event_mm		*user_event_mm;
+ #endif
+ 
++#ifdef CONFIG_IPC_CLASSES
++	/*
++	 * A hardware-defined classification of task that reflects but is
++	 * not identical to the number of instructions per cycle.
++	 */
++	unsigned int			ipcc : 9;
++	/*
++	 * A candidate classification that arch-specific implementations
++	 * qualify for correctness.
++	 */
++	unsigned int			ipcc_tmp : 9;
++	/*
++	 * Counter to filter out transient candidate classifications
++	 * of a task.
++	 */
++	unsigned int			ipcc_cntr : 14;
++#endif
++
+ 	/*
+ 	 * New fields for task_struct should be added above here, so that
+ 	 * they are included in the randomized portion of task_struct.
+@@ -2183,4 +2203,6 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
+ 
+ extern void sched_set_stop_task(int cpu, struct task_struct *stop);
+ 
++extern bool sched_smt_siblings_idle(int cpu);
++
+ #endif
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index a6e04b4a21d..f32fce3fc8e 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -292,4 +292,10 @@ static inline int task_node(const struct task_struct *p)
+ 	return cpu_to_node(task_cpu(p));
+ }
+ 
++#ifdef CONFIG_IPC_CLASSES
++extern void sched_enable_ipc_classes(void);
++#else
++static inline void sched_enable_ipc_classes(void) { }
++#endif
++
+ #endif /* _LINUX_SCHED_TOPOLOGY_H */
+diff --git a/init/Kconfig b/init/Kconfig
+index bee58f7468c..3447c10cbdd 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -849,6 +849,18 @@ config UCLAMP_BUCKETS_COUNT
+ 
+ 	  If in doubt, use the default value.
+ 
++config IPC_CLASSES
++	bool "IPC classes of tasks"
++	depends on SMP
++	help
++	  If selected, each task is assigned a classification value that
++	  reflects the type of instructions that the task executes. This
++	  classification reflects but is not equal to the number of
++	  instructions retired per cycle.
++
++	  The scheduler uses the classification value to improve the placement
++	  of tasks.
++
+ endmenu
+ 
+ #
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 9116bcc9034..5e07149813c 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4515,6 +4515,11 @@ int wake_up_state(struct task_struct *p, unsigned int state)
+  */
+ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ {
++#ifdef CONFIG_IPC_CLASSES
++	p->ipcc				= IPC_CLASS_UNCLASSIFIED;
++	p->ipcc_tmp			= IPC_CLASS_UNCLASSIFIED;
++	p->ipcc_cntr			= 0;
++#endif
+ 	p->on_rq			= 0;
+ 
+ 	p->se.on_rq			= 0;
+@@ -5653,7 +5658,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
+  * This function gets called by the timer code, with HZ frequency.
+  * We call it with interrupts disabled.
+  */
+-void scheduler_tick(void)
++void scheduler_tick(bool user_tick)
+ {
+ 	int cpu = smp_processor_id();
+ 	struct rq *rq = cpu_rq(cpu);
+@@ -5665,6 +5670,9 @@ void scheduler_tick(void)
+ 	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
+ 		arch_scale_freq_tick();
+ 
++	if (sched_ipcc_enabled() && user_tick)
++		arch_update_ipcc(curr);
++
+ 	sched_clock_tick();
+ 
+ 	rq_lock(rq, &rf);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 533547e3c90..38e0acfefb0 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1305,7 +1305,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+  * Scheduling class queueing methods:
+  */
+ 
+-static inline bool is_core_idle(int cpu)
++/**
++ * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
++ * @cpu:	The CPU to check
++ *
++ * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
++ * SMT siblings. The idle state of @cpu is not considered.
++ */
++bool sched_smt_siblings_idle(int cpu)
+ {
+ #ifdef CONFIG_SCHED_SMT
+ 	int sibling;
+@@ -2008,7 +2015,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
+ 	 * Prefer cores instead of packing HT siblings
+ 	 * and triggering future load balancing.
+ 	 */
+-	if (is_core_idle(cpu))
++	if (sched_smt_siblings_idle(cpu))
+ 		idle_core = cpu;
+ 
+ 	return idle_core;
+@@ -9449,6 +9456,13 @@ struct sg_lb_stats {
+ 	unsigned int nr_numa_running;
+ 	unsigned int nr_preferred_running;
+ #endif
++#ifdef CONFIG_IPC_CLASSES
++	unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
++	unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
++	unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
++	long ipcc_score_after; /* Prospective IPCC score after load balancing */
++	unsigned long ipcc_score_before; /* IPCC score before load balancing */
++#endif
+ };
+ 
+ /*
+@@ -9727,6 +9741,248 @@ group_type group_classify(unsigned int imbalance_pct,
+ 	return group_has_spare;
+ }
+ 
++#ifdef CONFIG_IPC_CLASSES
++static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
++{
++	/* All IPCC stats have been set to zero in update_sg_lb_stats(). */
++	sgs->min_score = ULONG_MAX;
++}
++
++static int rq_last_task_ipcc(int dst_cpu, struct rq *rq, unsigned short *ipcc)
++{
++	struct list_head *tasks = &rq->cfs_tasks;
++	struct task_struct *p;
++	struct rq_flags rf;
++	int ret = -EINVAL;
++
++	rq_lock_irqsave(rq, &rf);
++	if (list_empty(tasks))
++		goto out;
++
++	p = list_last_entry(tasks, struct task_struct, se.group_node);
++	if (p->flags & PF_EXITING || is_idle_task(p) ||
++	    !cpumask_test_cpu(dst_cpu, p->cpus_ptr))
++		goto out;
++
++	ret = 0;
++	*ipcc = p->ipcc;
++out:
++	rq_unlock(rq, &rf);
++	return ret;
++}
++
++/* Called only if cpu_of(@rq) is not idle and has tasks running. */
++static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
++				    struct rq *rq)
++{
++	unsigned short ipcc;
++	unsigned long score;
++
++	if (!sched_ipcc_enabled())
++		return;
++
++	if (rq_last_task_ipcc(dst_cpu, rq, &ipcc))
++		return;
++
++	score = arch_get_ipcc_score(ipcc, cpu_of(rq));
++
++	/*
++	 * Ignore tasks with invalid scores. When finding the busiest group, we
++	 * prefer those with higher sum_score. This group will not be selected.
++	 */
++	if (IS_ERR_VALUE(score))
++		return;
++
++	sgs->sum_score += score;
++
++	if (score < sgs->min_score) {
++		sgs->min_score = score;
++		sgs->min_ipcc = ipcc;
++	}
++}
++
++static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
++				      struct sched_group *sg,
++				      struct lb_env *env)
++{
++	unsigned long score_on_dst_cpu, before;
++	int busy_cpus;
++	long after;
++
++	if (!sched_ipcc_enabled())
++		return;
++
++	/*
++	 * IPCC scores are only useful during idle load balancing. For now,
++	 * only asym_packing uses IPCC scores.
++	 */
++	if (!(env->sd->flags & SD_ASYM_PACKING) ||
++	    env->idle == CPU_NOT_IDLE)
++		return;
++
++	/*
++	 * IPCC scores are used to break ties only between these types of
++	 * groups.
++	 */
++	if (sgs->group_type != group_fully_busy &&
++	    sgs->group_type != group_asym_packing)
++		return;
++
++	busy_cpus = sgs->group_weight - sgs->idle_cpus;
++
++	/* No busy CPUs in the group. No tasks to move. */
++	if (!busy_cpus)
++		return;
++
++	score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
++
++	/*
++	 * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
++	 * and not used.
++	 */
++	if (IS_ERR_VALUE(score_on_dst_cpu))
++		return;
++
++	before = sgs->sum_score;
++	after = before - sgs->min_score;
++
++	/* SMT siblings share throughput. */
++	if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
++		before /= busy_cpus;
++		/* One sibling will become idle after load balance. */
++		after /= busy_cpus - 1;
++	}
++
++	sgs->ipcc_score_after = after + score_on_dst_cpu;
++	sgs->ipcc_score_before = before;
++}
++
++/**
++ * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
++ * @a:	Load balancing statistics of a sched group
++ * @b:	Load balancing statistics of a second sched group
++ *
++ * Returns: true if @a has a higher IPCC score than @b after load balance.
++ * False otherwise.
++ */
++static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
++				   struct sg_lb_stats *b)
++{
++	if (!sched_ipcc_enabled())
++		return false;
++
++	/* @a increases overall throughput after load balance. */
++	if (a->ipcc_score_after > b->ipcc_score_after)
++		return true;
++
++	/*
++	 * If @a and @b yield the same overall throughput, pick @a if
++	 * its current throughput is lower than that of @b.
++	 */
++	if (a->ipcc_score_after == b->ipcc_score_after)
++		return a->ipcc_score_before < b->ipcc_score_before;
++
++	return false;
++}
++
++/**
++ * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
++ * @a:		A scheduling group
++ * @b:		A second scheduling group
++ * @a_stats:	Load balancing statistics of @a
++ * @b_stats:	Load balancing statistics of @b
++ *
++ * Returns: true if @a has the same priority and @a has tasks with IPC classes
++ * that yield higher overall throughput after load balance. False otherwise.
++ */
++static bool sched_asym_ipcc_pick(struct sched_group *a,
++				 struct sched_group *b,
++				 struct sg_lb_stats *a_stats,
++				 struct sg_lb_stats *b_stats)
++{
++	/*
++	 * Only use the class-specific preference selection if both sched
++	 * groups have the same priority.
++	 */
++	if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
++	    arch_asym_cpu_priority(b->asym_prefer_cpu))
++		return false;
++
++	return sched_asym_ipcc_prefer(a_stats, b_stats);
++}
++
++/**
++ * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
++ * @rq:		A runqueue
++ * @env:	Load balancing environment
++ *
++ * Returns: The IPCC score delta that the last task enqueued in @rq would get
++ * if placed in the destination CPU of @env. LONG_MIN to indicate that the
++ * delta should not be used.
++ */
++static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
++{
++	unsigned long score_src, score_dst;
++	unsigned short ipcc;
++
++	if (!sched_ipcc_enabled())
++		return LONG_MIN;
++
++	/* Only asym_packing uses IPCC scores at the moment. */
++	if (!(env->sd->flags & SD_ASYM_PACKING))
++		return LONG_MIN;
++
++	if (rq_last_task_ipcc(env->dst_cpu, rq, &ipcc))
++		return LONG_MIN;
++
++	score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
++	if (IS_ERR_VALUE(score_dst))
++		return LONG_MIN;
++
++	score_src = arch_get_ipcc_score(ipcc, cpu_of(rq));
++	if (IS_ERR_VALUE(score_src))
++		return LONG_MIN;
++
++	return score_dst - score_src;
++}
++
++#else /* CONFIG_IPC_CLASSES */
++static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
++				    struct rq *rq)
++{
++}
++
++static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
++{
++}
++
++static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
++				      struct sched_group *sg,
++				      struct lb_env *env)
++{
++}
++
++static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
++				   struct sg_lb_stats *b)
++{
++	return false;
++}
++
++static bool sched_asym_ipcc_pick(struct sched_group *a,
++				 struct sched_group *b,
++				 struct sg_lb_stats *a_stats,
++				 struct sg_lb_stats *b_stats)
++{
++	return false;
++}
++
++static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
++{
++	return LONG_MIN;
++}
++
++#endif /* CONFIG_IPC_CLASSES */
++
+ /**
+  * sched_use_asym_prio - Check whether asym_packing priority must be used
+  * @sd:		The scheduling domain of the load balancing
+@@ -9743,7 +9999,7 @@ static bool sched_use_asym_prio(struct sched_domain *sd, int cpu)
+ 	if (!sched_smt_active())
+ 		return true;
+ 
+-	return sd->flags & SD_SHARE_CPUCAPACITY || is_core_idle(cpu);
++	return sd->flags & SD_SHARE_CPUCAPACITY || sched_smt_siblings_idle(cpu);
+ }
+ 
+ /**
+@@ -9882,6 +10138,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+ 	int i, nr_running, local_group;
+ 
+ 	memset(sgs, 0, sizeof(*sgs));
++	init_rq_ipcc_stats(sgs);
+ 
+ 	local_group = group == sds->local;
+ 
+@@ -9931,6 +10188,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+ 			if (sgs->group_misfit_task_load < load)
+ 				sgs->group_misfit_task_load = load;
+ 		}
++
++		update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
+ 	}
+ 
+ 	sgs->group_capacity = group->sgc->capacity;
+@@ -9950,6 +10209,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+ 
+ 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
+ 
++	if (!local_group)
++		update_sg_lb_stats_scores(sgs, group, env);
++
+ 	/* Computing avg_load makes sense only when group is overloaded */
+ 	if (sgs->group_type == group_overloaded)
+ 		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
+@@ -10021,6 +10283,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
+ 		/* Prefer to move from lowest priority CPU's work */
+ 		if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
+ 			return false;
++
++		/*
++		 * Unlike other callers of sched_asym_prefer(), here both @sg
++		 * and @sds::busiest have tasks running. When they have equal
++		 * priority, their IPC class scores can be used to select a
++		 * better busiest.
++		 */
++		if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
++			return false;
++
+ 		break;
+ 
+ 	case group_misfit_task:
+@@ -10061,10 +10333,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
+ 		if (sgs->avg_load == busiest->avg_load) {
+ 			/*
+ 			 * SMT sched groups need more help than non-SMT groups.
+-			 * If @sg happens to also be SMT, either choice is good.
+ 			 */
+-			if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
+-				return false;
++			if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
++				if (!(sg->flags & SD_SHARE_CPUCAPACITY))
++					return false;
++
++				/*
++				 * Between two SMT groups, use IPCC scores to pick the
++				 * one that would improve throughput the most (only
++				 * asym_packing uses IPCC scores for now).
++				 */
++				if (sched_ipcc_enabled() &&
++				    env->sd->flags & SD_ASYM_PACKING &&
++				    sched_asym_ipcc_prefer(busiest, sgs))
++					return false;
++			}
+ 		}
+ 
+ 		break;
+@@ -10981,6 +11264,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
+ {
+ 	struct rq *busiest = NULL, *rq;
+ 	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
++	long busiest_ipcc_delta = LONG_MIN;
+ 	unsigned int busiest_nr = 0;
+ 	int i;
+ 
+@@ -11097,6 +11381,26 @@ static struct rq *find_busiest_queue(struct lb_env *env,
+ 			if (busiest_nr < nr_running) {
+ 				busiest_nr = nr_running;
+ 				busiest = rq;
++
++				/*
++				 * Remember the IPCC score of the busiest
++				 * runqueue. We may need it to break a tie with
++				 * other queues with equal nr_running.
++				 */
++				busiest_ipcc_delta = ipcc_score_delta(busiest, env);
++			/*
++			 * For ties, select @rq if doing would give its last
++			 * queued task a bigger IPC boost when migrated to
++			 * dst_cpu.
++			 */
++			} else if (busiest_nr == nr_running) {
++				long delta = ipcc_score_delta(rq, env);
++
++				if (busiest_ipcc_delta < delta) {
++					busiest_ipcc_delta = delta;
++					busiest_nr = nr_running;
++					busiest = rq;
++				}
+ 			}
+ 			break;
+ 
+@@ -11228,7 +11532,7 @@ static int should_we_balance(struct lb_env *env)
+ 		 * balancing cores, but remember the first idle SMT CPU for
+ 		 * later consideration.  Find CPU on an idle core first.
+ 		 */
+-		if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
++		if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !sched_smt_siblings_idle(cpu)) {
+ 			if (idle_smt == -1)
+ 				idle_smt = cpu;
+ 			/*
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 001fe047bd5..b741fca335b 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2622,6 +2622,72 @@ void arch_scale_freq_tick(void)
+ }
+ #endif
+ 
++#ifdef CONFIG_IPC_CLASSES
++DECLARE_STATIC_KEY_FALSE(sched_ipcc);
++
++static inline bool sched_ipcc_enabled(void)
++{
++	return static_branch_unlikely(&sched_ipcc);
++}
++
++#ifndef arch_update_ipcc
++/**
++ * arch_update_ipcc() - Update the IPC class of the current task
++ * @curr:		The current task
++ *
++ * Request that the IPC classification of @curr is updated.
++ *
++ * Returns: none
++ */
++static __always_inline
++void arch_update_ipcc(struct task_struct *curr)
++{
++}
++#endif
++
++#ifndef arch_get_ipcc_score
++
++#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
++/**
++ * arch_get_ipcc_score() - Get the IPC score of a class of task
++ * @ipcc:	The IPC class
++ * @cpu:	A CPU number
++ *
++ * The IPC performance scores reflects (but it is not identical to) the number
++ * of instructions retired per cycle for a given IPC class. It is a linear and
++ * abstract metric. Higher scores reflect better performance.
++ *
++ * The IPC score can be normalized with respect to the class, i, with the
++ * highest IPC score on the CPU, c, with highest performance:
++ *
++ *            IPC(i, c)
++ *  ------------------------------------ * SCHED_IPCC_SCORE_SCALE
++ *     max(IPC(i, c) : (i, c))
++ *
++ * Scheduling schemes that want to use the IPC score along with other
++ * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
++ * it.
++ *
++ * Other scheduling schemes (e.g., asym_packing) do not need normalization.
++ *
++ * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
++ * Error when either @ipcc or @cpu are invalid.
++ */
++static __always_inline
++unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
++{
++	return SCHED_IPCC_SCORE_SCALE;
++}
++#endif
++#else /* CONFIG_IPC_CLASSES */
++
++#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
++#define arch_update_ipcc(curr)
++
++static inline bool sched_ipcc_enabled(void) { return false; }
++
++#endif /* CONFIG_IPC_CLASSES */
++
+ #ifndef arch_scale_freq_capacity
+ /**
+  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 10d1391e741..da49c3c5162 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -677,6 +677,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
+ DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
+ DEFINE_STATIC_KEY_FALSE(sched_cluster_active);
+ 
++#ifdef CONFIG_IPC_CLASSES
++DEFINE_STATIC_KEY_FALSE(sched_ipcc);
++
++void sched_enable_ipc_classes(void)
++{
++	static_branch_enable_cpuslocked(&sched_ipcc);
++}
++#endif
++
+ static void update_top_cache_domain(int cpu)
+ {
+ 	struct sched_domain_shared *sds = NULL;
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index 352b161113c..f739cd5912d 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -2089,7 +2089,7 @@ void update_process_times(int user_tick)
+ 	if (in_irq())
+ 		irq_work_tick();
+ #endif
+-	scheduler_tick();
++	scheduler_tick(user_tick);
+ 	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
+ 		run_posix_cpu_timers();
+ }
+-- 
+2.44.0
+
+
+From 6ac91be34077c54e9f7459098aff5b9d183de7f8 Mon Sep 17 00:00:00 2001
+From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
+Date: Mon, 12 Feb 2024 17:16:13 +0100
+Subject: [PATCH 2/2] genetlink: Add per family bind/unbind callbacks
+
+Add genetlink family bind()/unbind() callbacks when adding/removing
+multicast group to/from netlink client socket via setsockopt() or
+bind() syscall.
+
+They can be used to track if consumers of netlink multicast messages
+emerge or disappear. Thus, a client implementing callbacks, can now
+send events only when there are active consumers, preventing unnecessary
+work when none exist.
+
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20240212161615.161935-2-stanislaw.gruszka@linux.intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+ include/net/genetlink.h |  4 ++++
+ net/netlink/genetlink.c | 30 ++++++++++++++++++++++++++++++
+ 2 files changed, 34 insertions(+)
+
+diff --git a/include/net/genetlink.h b/include/net/genetlink.h
+index e6146912940..ecadba836ae 100644
+--- a/include/net/genetlink.h
++++ b/include/net/genetlink.h
+@@ -41,6 +41,8 @@ struct genl_info;
+  *	do additional, common, filtering and return an error
+  * @post_doit: called after an operation's doit callback, it may
+  *	undo operations done by pre_doit, for example release locks
++ * @bind: called when family multicast group is added to a netlink socket
++ * @unbind: called when family multicast group is removed from a netlink socket
+  * @module: pointer to the owning module (set to THIS_MODULE)
+  * @mcgrps: multicast groups used by this family
+  * @n_mcgrps: number of multicast groups
+@@ -84,6 +86,8 @@ struct genl_family {
+ 	void			(*post_doit)(const struct genl_split_ops *ops,
+ 					     struct sk_buff *skb,
+ 					     struct genl_info *info);
++	int			(*bind)(int mcgrp);
++	void			(*unbind)(int mcgrp);
+ 	const struct genl_ops *	ops;
+ 	const struct genl_small_ops *small_ops;
+ 	const struct genl_split_ops *split_ops;
+diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
+index 8c7af02f845..50ec599a5cf 100644
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -1836,6 +1836,9 @@ static int genl_bind(struct net *net, int group)
+ 		    !ns_capable(net->user_ns, CAP_SYS_ADMIN))
+ 			ret = -EPERM;
+ 
++		if (family->bind)
++			family->bind(i);
++
+ 		break;
+ 	}
+ 
+@@ -1843,12 +1846,39 @@ static int genl_bind(struct net *net, int group)
+ 	return ret;
+ }
+ 
++static void genl_unbind(struct net *net, int group)
++{
++	const struct genl_family *family;
++	unsigned int id;
++
++	down_read(&cb_lock);
++
++	idr_for_each_entry(&genl_fam_idr, family, id) {
++		int i;
++
++		if (family->n_mcgrps == 0)
++			continue;
++
++		i = group - family->mcgrp_offset;
++		if (i < 0 || i >= family->n_mcgrps)
++			continue;
++
++		if (family->unbind)
++			family->unbind(i);
++
++		break;
++	}
++
++	up_read(&cb_lock);
++}
++
+ static int __net_init genl_pernet_init(struct net *net)
+ {
+ 	struct netlink_kernel_cfg cfg = {
+ 		.input		= genl_rcv,
+ 		.flags		= NL_CFG_F_NONROOT_RECV,
+ 		.bind		= genl_bind,
++		.unbind		= genl_unbind,
+ 		.release	= genl_release,
+ 	};
+ 
+-- 
+2.44.0
+
+From 68a15ef01803c252261ebb47d86dfc1f2c68ae1e Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 6 Oct 2023 15:58:56 -0700
+Subject: [PATCH] sched/fair: Don't force smt balancing when CPU has spare
+ capacity
+
+Currently group_smt_balance is picked whenever there are more
+than two tasks on a core with two SMT.  However, the utilization
+of those tasks may be low and do not warrant a task
+migration to a CPU of lower priority.
+
+Adjust sched group clssification and sibling_imbalance()
+to reflect this consideration.  Use sibling_imbalance() to
+compute imbalance in calculate_imbalance() for the group_smt_balance
+case.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+
+---
+ kernel/sched/fair.c | 23 +++++++++++------------
+ 1 file changed, 11 insertions(+), 12 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index ef7490c4b8b4..7dd7c2d2367a 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -9460,14 +9460,15 @@ group_type group_classify(unsigned int imbalance_pct,
+ 	if (sgs->group_asym_packing)
+ 		return group_asym_packing;
+ 
+-	if (sgs->group_smt_balance)
+-		return group_smt_balance;
+-
+ 	if (sgs->group_misfit_task_load)
+ 		return group_misfit_task;
+ 
+-	if (!group_has_capacity(imbalance_pct, sgs))
+-		return group_fully_busy;
++	if (!group_has_capacity(imbalance_pct, sgs)) {
++		if (sgs->group_smt_balance)
++			return group_smt_balance;
++		else
++			return group_fully_busy;
++	}
+ 
+ 	return group_has_spare;
+ }
+@@ -9573,6 +9574,11 @@ static inline long sibling_imbalance(struct lb_env *env,
+ 	if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running)
+ 		return 0;
+ 
++	/* Do not pull tasks off preferred group with spare capacity */
++	if (busiest->group_type == group_has_spare &&
++	    sched_asym_prefer(sds->busiest->asym_prefer_cpu, env->dst_cpu))
++		return 0;
++
+ 	ncores_busiest = sds->busiest->cores;
+ 	ncores_local = sds->local->cores;
+ 
+@@ -10411,13 +10417,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+ 		return;
+ 	}
+ 
+-	if (busiest->group_type == group_smt_balance) {
+-		/* Reduce number of tasks sharing CPU capacity */
+-		env->migration_type = migrate_task;
+-		env->imbalance = 1;
+-		return;
+-	}
+-
+ 	if (busiest->group_type == group_imbalanced) {
+ 		/*
+ 		 * In the group_imb case we cannot rely on group-wide averages
+-- 
+2.32.0
\ No newline at end of file
diff --git a/patches/series b/patches/series
index e61b72f..50989b1 100644
--- a/patches/series
+++ b/patches/series
@@ -1,6 +1,8 @@
 cachyos/0001-cachyos-base-all.patch
 cachyos/0001-bore-cachy.patch
 cachyos/0002-ntsync.patch
+cachyos/0003-nvidia.patch
+cachyos/0004-intel.patch
 nobara/0001-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
 nobara/0001-Revert-PCI-Add-a-REBAR-size-quirk-for-Sapphire-RX-56.patch
 nobara/0001-Revert-nvme-pci-drop-redundant-pci_enable_pcie_error.patch
diff --git a/scripts/source.sh b/scripts/source.sh
index 9f46d4b..481f9af 100755
--- a/scripts/source.sh
+++ b/scripts/source.sh
@@ -2,10 +2,7 @@
 
 echo "Pika Kernel - Getting source"
 
-#wget -nv https://cdn.kernel.org/pub/linux/kernel/v"$(echo $(cat ./VERSION) | cut -f1 -d".")".x/linux-"$(cat ./VERSION)".tar.gz
-#tar -xf ./linux-"$(cat ./VERSION)".tar.gz
-
-wget -nv https://git.kernel.org/torvalds/t/linux-6.8-rc6.tar.gz
+wget -nv https://cdn.kernel.org/pub/linux/kernel/v"$(echo $(cat ./VERSION) | cut -f1 -d".")".x/linux-"$(cat ./VERSION)".tar.gz
 tar -xf ./linux-"$(cat ./VERSION)".tar.gz
 
 cd linux-"$(cat ./VERSION)"