diff --git a/config b/config
index 9449157..7aa2860 100644
--- a/config
+++ b/config
@@ -7067,7 +7067,7 @@ CONFIG_SND_SERIAL_U16550=m
 CONFIG_SND_MPU401=m
 CONFIG_SND_PORTMAN2X4=m
 CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=10
 CONFIG_SND_SB_COMMON=m
 CONFIG_SND_PCI=y
 CONFIG_SND_AD1889=m
@@ -7172,7 +7172,7 @@ CONFIG_SND_HDA_CODEC_CA0132_DSP=y
 CONFIG_SND_HDA_CODEC_CMEDIA=m
 CONFIG_SND_HDA_CODEC_SI3054=m
 CONFIG_SND_HDA_GENERIC=m
-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10
 CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
 # CONFIG_SND_HDA_CTL_DEV_ID is not set
 # end of HD-Audio
diff --git a/patches/0001-cachy-all.patch b/patches/0001-cachy-all.patch
index bc7f710..aa7ecc2 100644
--- a/patches/0001-cachy-all.patch
+++ b/patches/0001-cachy-all.patch
@@ -1,4 +1,4 @@
-From e1cfa351424a722e33443e5c9a6a937034eb18bd Mon Sep 17 00:00:00 2001
+From 8b27eca196447f74bfa5a346df10212b900ce82a Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Tue, 9 May 2023 18:38:36 +0200
 Subject: [PATCH 1/8] bbr2
@@ -51,7 +51,7 @@ index c2b15f7e5516..d85858efa571 100644
  };
  
 diff --git a/include/net/tcp.h b/include/net/tcp.h
-index 18a038d16434..070d0aad5204 100644
+index 5066e4586cf0..b34661204315 100644
 --- a/include/net/tcp.h
 +++ b/include/net/tcp.h
 @@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
@@ -62,7 +62,7 @@ index 18a038d16434..070d0aad5204 100644
  
  enum tcp_tw_status {
  	TCP_TW_SUCCESS = 0,
-@@ -823,6 +824,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+@@ -824,6 +825,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
  	return max_t(s64, t1 - t0, 0);
  }
  
@@ -74,7 +74,7 @@ index 18a038d16434..070d0aad5204 100644
  static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  {
  	return tcp_ns_to_ts(skb->skb_mstamp_ns);
-@@ -898,9 +904,14 @@ struct tcp_skb_cb {
+@@ -899,9 +905,14 @@ struct tcp_skb_cb {
  			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
  			__u32 delivered;
  			/* start of send pipeline phase */
@@ -91,7 +91,7 @@ index 18a038d16434..070d0aad5204 100644
  		} tx;   /* only used for outgoing skbs */
  		union {
  			struct inet_skb_parm	h4;
-@@ -1026,7 +1037,11 @@ enum tcp_ca_ack_event_flags {
+@@ -1027,7 +1038,11 @@ enum tcp_ca_ack_event_flags {
  #define TCP_CONG_NON_RESTRICTED 0x1
  /* Requires ECN/ECT set on all packets */
  #define TCP_CONG_NEEDS_ECN	0x2
@@ -104,7 +104,7 @@ index 18a038d16434..070d0aad5204 100644
  
  union tcp_cc_info;
  
-@@ -1046,8 +1061,11 @@ struct ack_sample {
+@@ -1047,8 +1062,11 @@ struct ack_sample {
   */
  struct rate_sample {
  	u64  prior_mstamp; /* starting timestamp for interval */
@@ -116,7 +116,7 @@ index 18a038d16434..070d0aad5204 100644
  	s32  delivered;		/* number of packets delivered over interval */
  	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
  	long interval_us;	/* time for tp->delivered to incr "delivered" */
-@@ -1061,6 +1079,7 @@ struct rate_sample {
+@@ -1062,6 +1080,7 @@ struct rate_sample {
  	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
  	bool is_retrans;	/* is sample from retransmission? */
  	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
@@ -124,7 +124,7 @@ index 18a038d16434..070d0aad5204 100644
  };
  
  struct tcp_congestion_ops {
-@@ -1084,8 +1103,11 @@ struct tcp_congestion_ops {
+@@ -1085,8 +1104,11 @@ struct tcp_congestion_ops {
  	/* hook for packet ack accounting (optional) */
  	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
  
@@ -138,7 +138,7 @@ index 18a038d16434..070d0aad5204 100644
  
  	/* call when packets are delivered to update cwnd and pacing rate,
  	 * after all the ca_state processing. (optional)
-@@ -1151,6 +1173,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+@@ -1152,6 +1174,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
  }
  #endif
  
@@ -153,7 +153,7 @@ index 18a038d16434..070d0aad5204 100644
  static inline bool tcp_ca_needs_ecn(const struct sock *sk)
  {
  	const struct inet_connection_sock *icsk = inet_csk(sk);
-@@ -1170,6 +1200,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
+@@ -1171,6 +1201,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
  void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
  
  /* From tcp_rate.c */
@@ -268,10 +268,10 @@ index b18ba8ef93ad..b4e3dcb27a20 100644
  obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
  obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index a60f6f4e7cd9..158d0ed5a7c4 100644
+index 8d20d9221238..99c2e0357dec 100644
 --- a/net/ipv4/tcp.c
 +++ b/net/ipv4/tcp.c
-@@ -3186,6 +3186,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+@@ -3192,6 +3192,7 @@ int tcp_disconnect(struct sock *sk, int flags)
  	tp->rx_opt.dsack = 0;
  	tp->rx_opt.num_sacks = 0;
  	tp->rcv_ooopack = 0;
@@ -3037,7 +3037,7 @@ index 1b34050a7538..66d40449b3f4 100644
  		icsk->icsk_ca_ops->init(sk);
  	if (tcp_ca_needs_ecn(sk))
 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 61b6710f337a..2efb52fbeee3 100644
+index bf8b22218dd4..3ae56b0676a8 100644
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
 @@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
@@ -3269,10 +3269,10 @@ index a8f6d9d06f2e..a8b4c9504570 100644
  	rs->interval_us = max(snd_us, ack_us);
  
 diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
-index b839c2f91292..ae272ae2b707 100644
+index 39eb947fe392..61ab4ee55b22 100644
 --- a/net/ipv4/tcp_timer.c
 +++ b/net/ipv4/tcp_timer.c
-@@ -605,6 +605,7 @@ void tcp_write_timer_handler(struct sock *sk)
+@@ -615,6 +615,7 @@ void tcp_write_timer_handler(struct sock *sk)
  		return;
  	}
  
@@ -3283,7 +3283,7 @@ index b839c2f91292..ae272ae2b707 100644
 -- 
 2.41.0
 
-From 2414bafa00ccf9c4dca1327546ff9cfa2f87676f Mon Sep 17 00:00:00 2001
+From 4b326373e0295ad142c417ef510d28cc491e0e73 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Thu, 1 Jun 2023 16:35:02 +0200
 Subject: [PATCH 2/8] cachy
@@ -3411,7 +3411,7 @@ index 3c399f132e2d..a62ad01e6d11 100644
  vmlinuz
  voffset.h
 diff --git a/Makefile b/Makefile
-index 836643eaefee..161c4a3c9e3a 100644
+index 0d3a9d3e73c1..f6ec2f6c57ca 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -818,6 +818,9 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning, address-of-packed-member)
@@ -9419,7 +9419,7 @@ index 38ef6d06888e..0f78364efd4f 100644
  
  config SCHED_HRTICK
 diff --git a/kernel/fork.c b/kernel/fork.c
-index ed4e01daccaa..ee9b6e4cf16a 100644
+index 41c964104b58..915ad6dae416 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -100,6 +100,10 @@
@@ -9444,7 +9444,7 @@ index ed4e01daccaa..ee9b6e4cf16a 100644
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
-@@ -3422,6 +3430,12 @@ int ksys_unshare(unsigned long unshare_flags)
+@@ -3419,6 +3427,12 @@ int ksys_unshare(unsigned long unshare_flags)
  	if (unshare_flags & CLONE_NEWNS)
  		unshare_flags |= CLONE_FS;
  
@@ -9758,7 +9758,7 @@ index ab0c5bd1a60f..f4989f706d7f 100644
 -- 
 2.41.0
 
-From c6c9513db571d0b72d3a7c37aa010db70992b6a6 Mon Sep 17 00:00:00 2001
+From d66ae67f1a8580742fdd5cda2e6dcade3cc770e0 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Tue, 9 May 2023 18:39:03 +0200
 Subject: [PATCH 3/8] ddcci
@@ -12558,7 +12558,7 @@ index 000000000000..a219f031e584
 -- 
 2.41.0
 
-From 7341d3f2d650ef7c81ace77bbaed7aeedf6d124b Mon Sep 17 00:00:00 2001
+From 72c060c5d2883853d8530a436380a788f74248b1 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Thu, 1 Jun 2023 16:35:21 +0200
 Subject: [PATCH 4/8] fixes
@@ -14250,7 +14250,7 @@ index b5210abb5141..4d8936e1f769 100644
 -- 
 2.41.0
 
-From 2b82b34c90d5a0b7f64f438ae45a77777059a810 Mon Sep 17 00:00:00 2001
+From 5cce371c10c8c702dda5f1f0ca4428ff0d336662 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Thu, 1 Jun 2023 16:35:38 +0200
 Subject: [PATCH 5/8] ksm
@@ -14702,7 +14702,7 @@ index 860b2dcf3ac4..96fe36a6d0f5 100644
 -- 
 2.41.0
 
-From d9705b7f78a157575856ee08474297f3abe38dfd Mon Sep 17 00:00:00 2001
+From 21d2dc84c885c791c47d30838a265cf6c532d567 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Thu, 1 Jun 2023 16:35:57 +0200
 Subject: [PATCH 6/8] sched
@@ -14806,7 +14806,7 @@ index 57bde66d95f7..fad77b5172e2 100644
  /*
   * Prefer to place tasks in a sibling domain
 diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
-index 625d7483951c..b26ae200abef 100644
+index 245cf62ce85a..2d3d13e52333 100644
 --- a/kernel/cgroup/cgroup.c
 +++ b/kernel/cgroup/cgroup.c
 @@ -3877,6 +3877,14 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
@@ -15515,9 +15515,9 @@ index 6682535e37c8..ca4472281c28 100644
 -- 
 2.41.0
 
-From b205c598effc456c3c47800294a7e3c4f110e0c2 Mon Sep 17 00:00:00 2001
+From de932d02ee465828c5f4902165e38d9fb74f7758 Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
-Date: Thu, 1 Jun 2023 16:36:10 +0200
+Date: Wed, 14 Jun 2023 19:42:38 +0200
 Subject: [PATCH 7/8] vma
 
 Signed-off-by: Peter Jung <admin@ptr1337.dev>
@@ -15526,20 +15526,24 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
  arch/powerpc/mm/fault.c  |  3 ++-
  arch/s390/mm/fault.c     |  3 ++-
  arch/x86/mm/fault.c      |  3 ++-
- include/linux/mm_types.h |  6 +++++-
- include/linux/pagemap.h  | 14 ++++++++++----
- mm/filemap.c             | 37 +++++++++++++++++++++++--------------
- mm/memory.c              | 39 ++++++++++++++++++++++++++++-----------
- 8 files changed, 74 insertions(+), 34 deletions(-)
+ fs/userfaultfd.c         | 42 ++++++++++++++++++-----------------
+ include/linux/mm_types.h |  7 +++++-
+ include/linux/pagemap.h  | 14 ++++++++----
+ mm/filemap.c             | 37 +++++++++++++++++++------------
+ mm/madvise.c             |  4 ++--
+ mm/memory.c              | 48 ++++++++++++++++++++++------------------
+ mm/swap.h                |  1 -
+ mm/swap_state.c          | 12 +++++-----
+ 12 files changed, 103 insertions(+), 74 deletions(-)
 
 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
-index cb21ccd7940d..92ecac055e4d 100644
+index 6045a5117ac1..8f59badbffb5 100644
 --- a/arch/arm64/mm/fault.c
 +++ b/arch/arm64/mm/fault.c
-@@ -602,7 +602,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+@@ -601,7 +601,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+ 		goto lock_mmap;
  	}
- 	fault = handle_mm_fault(vma, addr & PAGE_MASK,
- 				mm_flags | FAULT_FLAG_VMA_LOCK, regs);
+ 	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
 -	vma_end_read(vma);
 +	if (!(fault & VM_FAULT_VMA_UNLOCKED))
 +		vma_end_read(vma);
@@ -15588,19 +15592,119 @@ index e4399983c50c..ef62ab2fd211 100644
  
  	if (!(fault & VM_FAULT_RETRY)) {
  		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
+index 0fd96d6e39ce..23c3a4ce45d9 100644
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -277,17 +277,17 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
+  * hugepmd ranges.
+  */
+ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+-					 struct vm_area_struct *vma,
+-					 unsigned long address,
+-					 unsigned long flags,
+-					 unsigned long reason)
++					      struct vm_fault *vmf,
++					      unsigned long reason)
+ {
++	struct vm_area_struct *vma = vmf->vma;
+ 	pte_t *ptep, pte;
+ 	bool ret = true;
+ 
+-	mmap_assert_locked(ctx->mm);
++	if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
++		mmap_assert_locked(ctx->mm);
+ 
+-	ptep = hugetlb_walk(vma, address, vma_mmu_pagesize(vma));
++	ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma));
+ 	if (!ptep)
+ 		goto out;
+ 
+@@ -308,10 +308,8 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ }
+ #else
+ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+-					 struct vm_area_struct *vma,
+-					 unsigned long address,
+-					 unsigned long flags,
+-					 unsigned long reason)
++					      struct vm_fault *vmf,
++					      unsigned long reason)
+ {
+ 	return false;	/* should never get here */
+ }
+@@ -325,11 +323,11 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+  * threads.
+  */
+ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
+-					 unsigned long address,
+-					 unsigned long flags,
++					 struct vm_fault *vmf,
+ 					 unsigned long reason)
+ {
+ 	struct mm_struct *mm = ctx->mm;
++	unsigned long address = vmf->address;
+ 	pgd_t *pgd;
+ 	p4d_t *p4d;
+ 	pud_t *pud;
+@@ -337,7 +335,8 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
+ 	pte_t *pte;
+ 	bool ret = true;
+ 
+-	mmap_assert_locked(mm);
++	if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
++		mmap_assert_locked(mm);
+ 
+ 	pgd = pgd_offset(mm, address);
+ 	if (!pgd_present(*pgd))
+@@ -445,7 +444,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
+ 	 * Coredumping runs without mmap_lock so we can only check that
+ 	 * the mmap_lock is held, if PF_DUMPCORE was not set.
+ 	 */
+-	mmap_assert_locked(mm);
++	if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
++		mmap_assert_locked(mm);
+ 
+ 	ctx = vma->vm_userfaultfd_ctx.ctx;
+ 	if (!ctx)
+@@ -561,15 +561,17 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
+ 	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
+ 
+ 	if (!is_vm_hugetlb_page(vma))
+-		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
+-						  reason);
++		must_wait = userfaultfd_must_wait(ctx, vmf, reason);
+ 	else
+-		must_wait = userfaultfd_huge_must_wait(ctx, vma,
+-						       vmf->address,
+-						       vmf->flags, reason);
++		must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason);
+ 	if (is_vm_hugetlb_page(vma))
+ 		hugetlb_vma_unlock_read(vma);
+-	mmap_read_unlock(mm);
++	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
++		vma_end_read(vma);
++		/* WARNING: VMA can't be used after this */
++		ret |= VM_FAULT_VMA_UNLOCKED;
++	} else
++		mmap_read_unlock(mm);
+ 
+ 	if (likely(must_wait && !READ_ONCE(ctx->released))) {
+ 		wake_up_poll(&ctx->fd_wqh, EPOLLIN);
 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index 306a3d1a0fa6..b3b57c6da0e1 100644
+index 306a3d1a0fa6..bd6b95c82f7a 100644
 --- a/include/linux/mm_types.h
 +++ b/include/linux/mm_types.h
-@@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
+@@ -1030,6 +1030,8 @@ typedef __bitwise unsigned int vm_fault_t;
   *				fsync() to complete (for synchronous page faults
   *				in DAX)
   * @VM_FAULT_COMPLETED:		->fault completed, meanwhile mmap lock released
-+ * @VM_FAULT_VMA_UNLOCKED:	VMA lock was released
++ * @VM_FAULT_VMA_UNLOCKED:	VMA lock was released, vmf->vma should no longer
++ *				be accessed
   * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
   *
   */
-@@ -1047,6 +1048,7 @@ enum vm_fault_reason {
+@@ -1047,6 +1049,7 @@ enum vm_fault_reason {
  	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
  	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
  	VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
@@ -15608,7 +15712,7 @@ index 306a3d1a0fa6..b3b57c6da0e1 100644
  	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
  };
  
-@@ -1070,7 +1072,9 @@ enum vm_fault_reason {
+@@ -1070,7 +1073,9 @@ enum vm_fault_reason {
  	{ VM_FAULT_RETRY,               "RETRY" },	\
  	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
  	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
@@ -15726,8 +15830,30 @@ index b4c9bd368b7e..838955635fbc 100644
  	return true;
  }
  
+diff --git a/mm/madvise.c b/mm/madvise.c
+index b5ffbaf616f5..b1e8adf1234e 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -215,7 +215,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
+ 			continue;
+ 
+ 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
+-					     vma, index, false, &splug);
++					     vma, index, &splug);
+ 		if (page)
+ 			put_page(page);
+ 	}
+@@ -252,7 +252,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
+ 		rcu_read_unlock();
+ 
+ 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
+-					     NULL, 0, false, &splug);
++					     NULL, 0, &splug);
+ 		if (page)
+ 			put_page(page);
+ 
 diff --git a/mm/memory.c b/mm/memory.c
-index f69fbc251198..e1cd39f00756 100644
+index f69fbc251198..b2ea015dcb87 100644
 --- a/mm/memory.c
 +++ b/mm/memory.c
 @@ -3568,6 +3568,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
@@ -15750,12 +15876,11 @@ index f69fbc251198..e1cd39f00756 100644
  		return VM_FAULT_RETRY;
  	}
  	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
-@@ -3704,27 +3707,40 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
+@@ -3704,27 +3707,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
  	bool exclusive = false;
  	swp_entry_t entry;
  	pte_t pte;
 -	int locked;
-+	bool locked;
 +	bool lock_dropped;
  	vm_fault_t ret = 0;
  	void *shadow = NULL;
@@ -15779,7 +15904,7 @@ index f69fbc251198..e1cd39f00756 100644
 +			if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
 +				/* No need to hold VMA lock for migration */
 +				vma_end_read(vma);
-+				/* CAUTION! VMA can't be used after this */
++				/* WARNING: VMA can't be used after this */
 +				ret |= VM_FAULT_VMA_UNLOCKED;
 +			}
 +			migration_entry_wait(mm, vmf->pmd, vmf->address);
@@ -15799,23 +15924,108 @@ index f69fbc251198..e1cd39f00756 100644
  			vmf->page = pfn_swap_entry_to_page(entry);
  			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
  					vmf->address, &vmf->ptl);
-@@ -3825,9 +3841,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
+@@ -3825,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
  		goto out_release;
  	}
  
 -	locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
 -
-+	locked = folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped);
- 	if (!locked) {
+-	if (!locked) {
++	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
 +		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
 +			ret |= VM_FAULT_VMA_UNLOCKED;
  		ret |= VM_FAULT_RETRY;
  		goto out_release;
  	}
+@@ -5291,15 +5306,6 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+ 	if (!vma_start_read(vma))
+ 		goto inval;
+ 
+-	/*
+-	 * Due to the possibility of userfault handler dropping mmap_lock, avoid
+-	 * it for now and fall back to page fault handling under mmap_lock.
+-	 */
+-	if (userfaultfd_armed(vma)) {
+-		vma_end_read(vma);
+-		goto inval;
+-	}
+-
+ 	/* Check since vm_start/vm_end might change before we lock the VMA */
+ 	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
+ 		vma_end_read(vma);
+diff --git a/mm/swap.h b/mm/swap.h
+index 7c033d793f15..8a3c7a0ace4f 100644
+--- a/mm/swap.h
++++ b/mm/swap.h
+@@ -46,7 +46,6 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
+ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ 				   struct vm_area_struct *vma,
+ 				   unsigned long addr,
+-				   bool do_poll,
+ 				   struct swap_iocb **plug);
+ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ 				     struct vm_area_struct *vma,
+diff --git a/mm/swap_state.c b/mm/swap_state.c
+index b76a65ac28b3..a3839de71f3f 100644
+--- a/mm/swap_state.c
++++ b/mm/swap_state.c
+@@ -517,15 +517,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+  */
+ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ 				   struct vm_area_struct *vma,
+-				   unsigned long addr, bool do_poll,
+-				   struct swap_iocb **plug)
++				   unsigned long addr, struct swap_iocb **plug)
+ {
+ 	bool page_was_allocated;
+ 	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
+ 			vma, addr, &page_was_allocated);
+ 
+ 	if (page_was_allocated)
+-		swap_readpage(retpage, do_poll, plug);
++		swap_readpage(retpage, false, plug);
+ 
+ 	return retpage;
+ }
+@@ -620,7 +619,7 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+ 	struct swap_info_struct *si = swp_swap_info(entry);
+ 	struct blk_plug plug;
+ 	struct swap_iocb *splug = NULL;
+-	bool do_poll = true, page_allocated;
++	bool page_allocated;
+ 	struct vm_area_struct *vma = vmf->vma;
+ 	unsigned long addr = vmf->address;
+ 
+@@ -628,7 +627,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+ 	if (!mask)
+ 		goto skip;
+ 
+-	do_poll = false;
+ 	/* Read a page_cluster sized and aligned cluster around offset. */
+ 	start_offset = offset & ~mask;
+ 	end_offset = offset | mask;
+@@ -660,7 +658,7 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+ 	lru_add_drain();	/* Push any new pages onto the LRU now */
+ skip:
+ 	/* The page was likely read above, so no need for plugging here */
+-	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll, NULL);
++	return read_swap_cache_async(entry, gfp_mask, vma, addr, NULL);
+ }
+ 
+ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+@@ -825,7 +823,7 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+ skip:
+ 	/* The page was likely read above, so no need for plugging here */
+ 	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
+-				     ra_info.win == 1, NULL);
++				     NULL);
+ }
+ 
+ /**
 -- 
 2.41.0
 
-From 4c79598323457b04cefa717a2adbf82586477840 Mon Sep 17 00:00:00 2001
+From 8af5970aac59ebe84af695501c91e8c7f2a1b91d Mon Sep 17 00:00:00 2001
 From: Peter Jung <admin@ptr1337.dev>
 Date: Tue, 9 May 2023 18:40:05 +0200
 Subject: [PATCH 8/8] zstd 1.5.5
diff --git a/patches/0002-eevdf.patch b/patches/0002-eevdf.patch
index ae65e0d..e9814d3 100644
--- a/patches/0002-eevdf.patch
+++ b/patches/0002-eevdf.patch
@@ -1,2230 +1,543 @@
-From 5e4ded34523fcaf5aea5c77d45239b6dd33f1c91 Mon Sep 17 00:00:00 2001
-From: Peter Jung <admin@ptr1337.dev>
-Date: Thu, 1 Jun 2023 16:37:55 +0200
-Subject: [PATCH] EEVDF
+From 74b3a8a51481e8c38adb4954398fc7325cf42634 Mon Sep 17 00:00:00 2001
+From: Piotr Gorski <lucjan.lucjanov@gmail.com>
+Date: Thu, 8 Jun 2023 10:59:00 +0200
+Subject: [PATCH] bore-eevdf
 
-Signed-off-by: Peter Jung <admin@ptr1337.dev>
+Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
 ---
- Documentation/admin-guide/cgroup-v2.rst |   10 +
- include/linux/rbtree_augmented.h        |   26 +
- include/linux/sched.h                   |    8 +-
- include/uapi/linux/sched.h              |    4 +-
- include/uapi/linux/sched/types.h        |   19 +
- init/init_task.c                        |    3 +-
- kernel/sched/core.c                     |   65 +-
- kernel/sched/debug.c                    |   49 +-
- kernel/sched/fair.c                     | 1152 +++++++++++------------
- kernel/sched/features.h                 |   24 +-
- kernel/sched/sched.h                    |   22 +-
- tools/include/uapi/linux/sched.h        |    4 +-
- 12 files changed, 726 insertions(+), 660 deletions(-)
+ include/linux/sched.h   |  10 +++
+ init/Kconfig            |  20 +++++
+ kernel/sched/core.c     |  62 +++++++++++++
+ kernel/sched/debug.c    |   4 +
+ kernel/sched/fair.c     | 193 ++++++++++++++++++++++++++++++++++++++--
+ kernel/sched/features.h |   4 +
+ kernel/sched/sched.h    |   1 +
+ 7 files changed, 286 insertions(+), 8 deletions(-)
 
-diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
-index f67c0829350b..a39dfda3d032 100644
---- a/Documentation/admin-guide/cgroup-v2.rst
-+++ b/Documentation/admin-guide/cgroup-v2.rst
-@@ -1121,6 +1121,16 @@ All time durations are in microseconds.
-         values similar to the sched_setattr(2). This maximum utilization
-         value is used to clamp the task specific maximum utilization clamp.
- 
-+  cpu.latency.nice
-+	A read-write single value file which exists on non-root
-+	cgroups.  The default is "0".
-+
-+	The nice value is in the range [-20, 19].
-+
-+	This interface file allows reading and setting latency using the
-+	same values used by sched_setattr(2). The latency_nice of a group is
-+	used to limit the impact of the latency_nice of a task outside the
-+	group.
- 
- 
- Memory
-diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
-index 7ee7ed5de722..6dbc5a1bf6a8 100644
---- a/include/linux/rbtree_augmented.h
-+++ b/include/linux/rbtree_augmented.h
-@@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
- 	rb_insert_augmented(node, &root->rb_root, augment);
- }
- 
-+static __always_inline struct rb_node *
-+rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
-+			bool (*less)(struct rb_node *, const struct rb_node *),
-+			const struct rb_augment_callbacks *augment)
-+{
-+	struct rb_node **link = &tree->rb_root.rb_node;
-+	struct rb_node *parent = NULL;
-+	bool leftmost = true;
-+
-+	while (*link) {
-+		parent = *link;
-+		if (less(node, parent)) {
-+			link = &parent->rb_left;
-+		} else {
-+			link = &parent->rb_right;
-+			leftmost = false;
-+		}
-+	}
-+
-+	rb_link_node(node, parent, link);
-+	augment->propagate(parent, NULL); /* suboptimal */
-+	rb_insert_augmented_cached(node, tree, leftmost, augment);
-+
-+	return leftmost ? node : NULL;
-+}
-+
- /*
-  * Template for declaring augmented rbtree callbacks (generic case)
-  *
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index eed5d65b8d1f..63ac38d66ec6 100644
+index 63ac38d66..63a2205a5 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -550,13 +550,18 @@ struct sched_entity {
- 	/* For load-balancing: */
- 	struct load_weight		load;
- 	struct rb_node			run_node;
-+	u64				deadline;
-+	u64				min_deadline;
-+
- 	struct list_head		group_node;
- 	unsigned int			on_rq;
- 
- 	u64				exec_start;
+@@ -560,6 +560,12 @@ struct sched_entity {
  	u64				sum_exec_runtime;
--	u64				vruntime;
  	u64				prev_sum_exec_runtime;
-+	u64				vruntime;
-+	s64				vlag;
-+	u64				slice;
+ 	u64				vruntime;
++#ifdef CONFIG_SCHED_BORE
++	u64				prev_burst_time;
++	u64				burst_time;
++	u64				max_burst_time;
++	u8				penalty_score;
++#endif // CONFIG_SCHED_BORE
+ 	s64				vlag;
+ 	u64				slice;
  
- 	u64				nr_migrations;
+@@ -991,6 +997,10 @@ struct task_struct {
+ 	struct list_head		children;
+ 	struct list_head		sibling;
+ 	struct task_struct		*group_leader;
++#ifdef CONFIG_SCHED_BORE
++	u64	child_burst_cache;
++	u64	child_burst_last_cached;
++#endif // CONFIG_SCHED_BORE
  
-@@ -786,6 +791,7 @@ struct task_struct {
- 	int				static_prio;
- 	int				normal_prio;
- 	unsigned int			rt_priority;
-+	int				latency_prio;
+ 	/*
+ 	 * 'ptraced' is the list of tasks this task is using ptrace() on.
+diff --git a/init/Kconfig b/init/Kconfig
+index 0147b4a33..4ab7e154b 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1290,6 +1290,26 @@ config CHECKPOINT_RESTORE
  
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
-diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
-index 3bac0a8ceab2..b2e932c25be6 100644
---- a/include/uapi/linux/sched.h
-+++ b/include/uapi/linux/sched.h
-@@ -132,6 +132,7 @@ struct clone_args {
- #define SCHED_FLAG_KEEP_PARAMS		0x10
- #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
- #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
-+#define SCHED_FLAG_LATENCY_NICE		0x80
+ 	  If unsure, say N here.
  
- #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \
- 				 SCHED_FLAG_KEEP_PARAMS)
-@@ -143,6 +144,7 @@ struct clone_args {
- 			 SCHED_FLAG_RECLAIM		| \
- 			 SCHED_FLAG_DL_OVERRUN		| \
- 			 SCHED_FLAG_KEEP_ALL		| \
--			 SCHED_FLAG_UTIL_CLAMP)
-+			 SCHED_FLAG_UTIL_CLAMP		| \
-+			 SCHED_FLAG_LATENCY_NICE)
- 
- #endif /* _UAPI_LINUX_SCHED_H */
-diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
-index f2c4589d4dbf..db1e8199e8c8 100644
---- a/include/uapi/linux/sched/types.h
-+++ b/include/uapi/linux/sched/types.h
-@@ -10,6 +10,7 @@ struct sched_param {
- 
- #define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
- #define SCHED_ATTR_SIZE_VER1	56	/* add: util_{min,max} */
-+#define SCHED_ATTR_SIZE_VER2	60	/* add: latency_nice */
- 
- /*
-  * Extended scheduling parameters data structure.
-@@ -98,6 +99,22 @@ struct sched_param {
-  * scheduled on a CPU with no more capacity than the specified value.
-  *
-  * A task utilization boundary can be reset by setting the attribute to -1.
-+ *
-+ * Latency Tolerance Attributes
-+ * ===========================
-+ *
-+ * A subset of sched_attr attributes allows to specify the relative latency
-+ * requirements of a task with respect to the other tasks running/queued in the
-+ * system.
-+ *
-+ * @ sched_latency_nice	task's latency_nice value
-+ *
-+ * The latency_nice of a task can have any value in a range of
-+ * [MIN_LATENCY_NICE..MAX_LATENCY_NICE].
-+ *
-+ * A task with latency_nice with the value of LATENCY_NICE_MIN can be
-+ * taken for a task requiring a lower latency as opposed to the task with
-+ * higher latency_nice.
-  */
- struct sched_attr {
- 	__u32 size;
-@@ -120,6 +137,8 @@ struct sched_attr {
- 	__u32 sched_util_min;
- 	__u32 sched_util_max;
- 
-+	/* latency requirement hints */
-+	__s32 sched_latency_nice;
- };
- 
- #endif /* _UAPI_LINUX_SCHED_TYPES_H */
-diff --git a/init/init_task.c b/init/init_task.c
-index ff6c4b9bfe6b..511cbcf3510d 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -78,6 +78,7 @@ struct task_struct init_task
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+	.latency_prio	= DEFAULT_PRIO,
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.user_cpus_ptr	= NULL,
-@@ -89,7 +90,7 @@ struct task_struct init_task
- 		.fn = do_no_restart_syscall,
- 	},
- 	.se		= {
--		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
-+		.group_node	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
- 	.rt		= {
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
++config SCHED_BORE
++	bool "Burst-Oriented Response Enhancer"
++	default y
++	help
++	  In Desktop and Mobile computing, one might prefer interactive
++	  tasks to keep responsive no matter what they run in the background.
++
++	  Enabling this kernel feature modifies the scheduler to discriminate
++	  tasks by their burst time (runtime since it last went sleeping or
++	  yielding state) and prioritize those that run less bursty.
++	  Such tasks usually include window compositor, widgets backend,
++	  terminal emulator, video playback, games and so on.
++	  With a little impact to scheduling fairness, it may improve
++	  responsiveness especially under heavy background workload.
++
++	  You can turn it off by setting the sysctl kernel.sched_bore = 0.
++	  Enabling this feature implies NO_GENTLE_FAIR_SLEEPERS by default.
++
++	  If unsure say Y here.
++
+ config SCHED_AUTOGROUP
+ 	bool "Automatic process group scheduling"
+ 	select CGROUPS
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index bcb3a7e684ca..3bcb77b00e5b 100644
+index 3bcb77b00..c7ceffc14 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_struct *p, bool update_load)
- 	}
+@@ -4490,6 +4490,57 @@ int wake_up_state(struct task_struct *p, unsigned int state)
+ 	return try_to_wake_up(p, state, 0);
  }
  
-+static inline void set_latency_prio(struct task_struct *p, int prio)
-+{
-+	p->latency_prio = prio;
-+	set_latency_fair(&p->se, prio - MAX_RT_PRIO);
++#ifdef CONFIG_SCHED_BORE
++#define CHILD_BURST_CUTOFF_BITS 9
++extern unsigned int sched_burst_cache_lifetime;
++
++void __init sched_init_bore(void) {
++	init_task.child_burst_cache = 0;
++	init_task.child_burst_last_cached = 0;
++	init_task.se.prev_burst_time = 0;
++	init_task.se.burst_time = 0;
++	init_task.se.max_burst_time = 0;
 +}
 +
- #ifdef CONFIG_UCLAMP_TASK
++void inline __sched_fork_bore(struct task_struct *p) {
++	p->child_burst_cache = 0;
++	p->child_burst_last_cached = 0;
++	p->se.burst_time = 0;
++}
++
++static inline void update_task_child_burst_time_cache(struct task_struct *p) {
++	u64 sum = 0, avg_burst_time = 0;
++	u32 cnt = 0;
++	struct task_struct *child;
++
++	read_lock(&tasklist_lock);
++	list_for_each_entry(child, &p->children, sibling) {
++		cnt++;
++		sum += child->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS;
++	}
++	read_unlock(&tasklist_lock);
++
++	if (cnt) avg_burst_time = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS;
++	p->child_burst_cache = max(avg_burst_time, p->se.max_burst_time);
++}
++
++static void update_task_initial_burst_time(struct task_struct *task) {
++	struct sched_entity *se = &task->se;
++	struct task_struct *par = task->real_parent;
++	u64 ktime = ktime_to_ns(ktime_get());
++
++	if (likely(par)) {
++		if (par->child_burst_last_cached + sched_burst_cache_lifetime < ktime) {
++			par->child_burst_last_cached = ktime;
++			update_task_child_burst_time_cache(par);
++		}
++		se->prev_burst_time = max(se->prev_burst_time, par->child_burst_cache);
++	}
++
++	se->max_burst_time = se->prev_burst_time;
++}
++#endif // CONFIG_SCHED_BORE
++
  /*
-  * Serializes updates of utilization clamp values
-@@ -4500,8 +4506,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+  * Perform scheduler related setup for a newly forked process p.
+  * p is forked by current.
+@@ -4506,6 +4557,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  	p->se.prev_sum_exec_runtime	= 0;
  	p->se.nr_migrations		= 0;
  	p->se.vruntime			= 0;
-+	p->se.vlag			= 0;
++#ifdef CONFIG_SCHED_BORE
++	__sched_fork_bore(p);
++#endif // CONFIG_SCHED_BORE
+ 	p->se.vlag			= 0;
  	INIT_LIST_HEAD(&p->se.group_node);
  
-+	set_latency_prio(p, p->latency_prio);
+@@ -4735,6 +4789,9 @@ late_initcall(sched_core_sysctl_init);
+ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ {
+ 	__sched_fork(clone_flags, p);
++#ifdef CONFIG_SCHED_BORE
++	update_task_initial_burst_time(p);
++#endif // CONFIG_SCHED_BORE
+ 	/*
+ 	 * We mark the process as NEW here. This guarantees that
+ 	 * nobody will actually run it, and a signal or other external
+@@ -9955,6 +10012,11 @@ void __init sched_init(void)
+ 	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
+ #endif
+ 
++#ifdef CONFIG_SCHED_BORE
++	sched_init_bore();
++	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.4.0 by Masahito Suzuki");
++#endif // CONFIG_SCHED_BORE
 +
+ 	wait_bit_init();
+ 
  #ifdef CONFIG_FAIR_GROUP_SCHED
- 	p->se.cfs_rq			= NULL;
- #endif
-@@ -4753,6 +4762,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
- 
- 		p->prio = p->normal_prio = p->static_prio;
- 		set_load_weight(p, false);
-+		set_latency_prio(p, NICE_TO_PRIO(0));
- 
- 		/*
- 		 * We don't need the reset flag anymore after the fork. It has
-@@ -7512,7 +7522,7 @@ static struct task_struct *find_process_by_pid(pid_t pid)
- #define SETPARAM_POLICY	-1
- 
- static void __setscheduler_params(struct task_struct *p,
--		const struct sched_attr *attr)
-+				  const struct sched_attr *attr)
- {
- 	int policy = attr->sched_policy;
- 
-@@ -7536,6 +7546,13 @@ static void __setscheduler_params(struct task_struct *p,
- 	set_load_weight(p, true);
- }
- 
-+static void __setscheduler_latency(struct task_struct *p,
-+				   const struct sched_attr *attr)
-+{
-+	if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE)
-+		set_latency_prio(p, NICE_TO_PRIO(attr->sched_latency_nice));
-+}
-+
- /*
-  * Check the target process has a UID that matches the current process's:
-  */
-@@ -7676,6 +7693,13 @@ static int __sched_setscheduler(struct task_struct *p,
- 			return retval;
- 	}
- 
-+	if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE) {
-+		if (attr->sched_latency_nice > MAX_NICE)
-+			return -EINVAL;
-+		if (attr->sched_latency_nice < MIN_NICE)
-+			return -EINVAL;
-+	}
-+
- 	if (pi)
- 		cpuset_read_lock();
- 
-@@ -7710,6 +7734,9 @@ static int __sched_setscheduler(struct task_struct *p,
- 			goto change;
- 		if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
- 			goto change;
-+		if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE &&
-+		    attr->sched_latency_nice != PRIO_TO_NICE(p->latency_prio))
-+			goto change;
- 
- 		p->sched_reset_on_fork = reset_on_fork;
- 		retval = 0;
-@@ -7798,6 +7825,7 @@ static int __sched_setscheduler(struct task_struct *p,
- 		__setscheduler_params(p, attr);
- 		__setscheduler_prio(p, newprio);
- 	}
-+	__setscheduler_latency(p, attr);
- 	__setscheduler_uclamp(p, attr);
- 
- 	if (queued) {
-@@ -8008,6 +8036,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
- 	    size < SCHED_ATTR_SIZE_VER1)
- 		return -EINVAL;
- 
-+	if ((attr->sched_flags & SCHED_FLAG_LATENCY_NICE) &&
-+	    size < SCHED_ATTR_SIZE_VER2)
-+		return -EINVAL;
- 	/*
- 	 * XXX: Do we want to be lenient like existing syscalls; or do we want
- 	 * to be strict and return an error on out-of-bounds values?
-@@ -8245,6 +8276,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
- 	get_params(p, &kattr);
- 	kattr.sched_flags &= SCHED_FLAG_ALL;
- 
-+	kattr.sched_latency_nice = PRIO_TO_NICE(p->latency_prio);
-+
- #ifdef CONFIG_UCLAMP_TASK
- 	/*
- 	 * This could race with another potential updater, but this is fine
-@@ -11181,6 +11214,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
- {
- 	return sched_group_set_idle(css_tg(css), idle);
- }
-+
-+static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css,
-+				    struct cftype *cft)
-+{
-+	return PRIO_TO_NICE(css_tg(css)->latency_prio);
-+}
-+
-+static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css,
-+				     struct cftype *cft, s64 nice)
-+{
-+	int prio;
-+
-+	if (nice < MIN_NICE || nice > MAX_NICE)
-+		return -ERANGE;
-+
-+	prio = NICE_TO_PRIO(nice);
-+
-+	return sched_group_set_latency(css_tg(css), prio);
-+}
- #endif
- 
- static struct cftype cpu_legacy_files[] = {
-@@ -11195,6 +11247,11 @@ static struct cftype cpu_legacy_files[] = {
- 		.read_s64 = cpu_idle_read_s64,
- 		.write_s64 = cpu_idle_write_s64,
- 	},
-+	{
-+		.name = "latency.nice",
-+		.read_s64 = cpu_latency_nice_read_s64,
-+		.write_s64 = cpu_latency_nice_write_s64,
-+	},
- #endif
- #ifdef CONFIG_CFS_BANDWIDTH
- 	{
-@@ -11412,6 +11469,12 @@ static struct cftype cpu_files[] = {
- 		.read_s64 = cpu_idle_read_s64,
- 		.write_s64 = cpu_idle_write_s64,
- 	},
-+	{
-+		.name = "latency.nice",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.read_s64 = cpu_latency_nice_read_s64,
-+		.write_s64 = cpu_latency_nice_write_s64,
-+	},
- #endif
- #ifdef CONFIG_CFS_BANDWIDTH
- 	{
 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index 066ff1c8ae4e..e7e83181fbb6 100644
+index e7e83181f..c29500314 100644
 --- a/kernel/sched/debug.c
 +++ b/kernel/sched/debug.c
-@@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
- 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
+@@ -348,6 +348,7 @@ static __init int sched_init_debug(void)
  #endif
  
--	debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
--	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
--	debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
--	debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
-+	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
+ 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
++	debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
  
  	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
  	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
-@@ -581,9 +578,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
- 	else
- 		SEQ_printf(m, " %c", task_state_to_char(p));
+@@ -594,6 +595,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
  
--	SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
-+	SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
- 		p->comm, task_pid_nr(p),
- 		SPLIT_NS(p->se.vruntime),
-+		entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
-+		SPLIT_NS(p->se.deadline),
-+		SPLIT_NS(p->se.slice),
-+		SPLIT_NS(p->se.sum_exec_runtime),
- 		(long long)(p->nvcsw + p->nivcsw),
- 		p->prio);
- 
-@@ -626,10 +627,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
- 
- void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
- {
--	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
--		spread, rq0_min_vruntime, spread0;
-+	s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread;
-+	struct sched_entity *last, *first;
- 	struct rq *rq = cpu_rq(cpu);
--	struct sched_entity *last;
- 	unsigned long flags;
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
-@@ -643,26 +643,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
- 			SPLIT_NS(cfs_rq->exec_clock));
- 
- 	raw_spin_rq_lock_irqsave(rq, flags);
--	if (rb_first_cached(&cfs_rq->tasks_timeline))
--		MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
-+	first = __pick_first_entity(cfs_rq);
-+	if (first)
-+		left_vruntime = first->vruntime;
- 	last = __pick_last_entity(cfs_rq);
- 	if (last)
--		max_vruntime = last->vruntime;
-+		right_vruntime = last->vruntime;
- 	min_vruntime = cfs_rq->min_vruntime;
--	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
- 	raw_spin_rq_unlock_irqrestore(rq, flags);
--	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
--			SPLIT_NS(MIN_vruntime));
-+
-+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
-+			SPLIT_NS(left_vruntime));
- 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
- 			SPLIT_NS(min_vruntime));
--	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
--			SPLIT_NS(max_vruntime));
--	spread = max_vruntime - MIN_vruntime;
--	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
--			SPLIT_NS(spread));
--	spread0 = min_vruntime - rq0_min_vruntime;
--	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
--			SPLIT_NS(spread0));
-+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
-+			SPLIT_NS(avg_vruntime(cfs_rq)));
-+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
-+			SPLIT_NS(right_vruntime));
-+	spread = right_vruntime - left_vruntime;
-+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
- 	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
- 			cfs_rq->nr_spread_over);
- 	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
-@@ -863,10 +862,7 @@ static void sched_debug_header(struct seq_file *m)
- 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
- #define PN(x) \
- 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
--	PN(sysctl_sched_latency);
--	PN(sysctl_sched_min_granularity);
--	PN(sysctl_sched_idle_min_granularity);
--	PN(sysctl_sched_wakeup_granularity);
-+	PN(sysctl_sched_base_slice);
- 	P(sysctl_sched_child_runs_first);
- 	P(sysctl_sched_features);
- #undef PN
-@@ -1089,6 +1085,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
++#ifdef CONFIG_SCHED_BORE
++	SEQ_printf(m, " %2d", p->se.penalty_score);
++#endif
+ #ifdef CONFIG_NUMA_BALANCING
+ 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
  #endif
- 	P(policy);
- 	P(prio);
-+	P(latency_prio);
- 	if (task_has_dl_policy(p)) {
- 		P(dl.runtime);
- 		P(dl.deadline);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 9fe8288b1b1f..97678b9b4023 100644
+index 97678b9b4..b0acc7126 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -47,6 +47,7 @@
- #include <linux/psi.h>
- #include <linux/ratelimit.h>
- #include <linux/task_work.h>
-+#include <linux/rbtree_augmented.h>
- 
- #include <asm/switch_to.h>
- 
-@@ -56,26 +57,6 @@
- #include "stats.h"
- #include "autogroup.h"
- 
--/*
-- * Targeted preemption latency for CPU-bound tasks:
-- *
-- * NOTE: this latency value is not the same as the concept of
-- * 'timeslice length' - timeslices in CFS are of variable length
-- * and have no persistent notion like in traditional, time-slice
-- * based scheduling concepts.
-- *
-- * (to see the precise effective timeslice length of your workload,
-- *  run vmstat and monitor the context-switches (cs) field)
-- *
-- * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
-- */
--#ifdef CONFIG_CACHY
--unsigned int sysctl_sched_latency			= 3000000ULL;
--static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
--#else
--unsigned int sysctl_sched_latency			= 6000000ULL;
--static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
--#endif
- /*
-  * The initial- and re-scaling of tunables is configurable
+@@ -19,6 +19,9 @@
   *
-@@ -94,26 +75,8 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
-  *
-  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
+  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
+  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
++ *
++ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
++ *  Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com>
   */
--#ifdef CONFIG_CACHY
--unsigned int sysctl_sched_min_granularity			= 400000ULL;
--static unsigned int normalized_sysctl_sched_min_granularity	= 400000ULL;
--#else
--unsigned int sysctl_sched_min_granularity			= 750000ULL;
--static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
--#endif
--
--/*
-- * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
-- * Applies only when SCHED_IDLE tasks compete with normal tasks.
-- *
-- * (default: 0.75 msec)
-- */
--unsigned int sysctl_sched_idle_min_granularity			= 750000ULL;
--
--/*
-- * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
-- */
--static unsigned int sched_nr_latency = 8;
-+unsigned int sysctl_sched_base_slice			= 750000ULL;
-+static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
+ #include <linux/energy_model.h>
+ #include <linux/mmap_lock.h>
+@@ -66,17 +69,17 @@
+  *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
+  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
+  *
+- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
++ * (default SCHED_TUNABLESCALING_NONE = *1)
+  */
+-unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
++unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+ 
+ /*
+  * Minimal preemption granularity for CPU-bound tasks:
+  *
+- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
++ * (default: 3 msec * 1, units: nanoseconds)
+  */
+-unsigned int sysctl_sched_base_slice			= 750000ULL;
+-static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
++unsigned int sysctl_sched_base_slice			= 3000000ULL;
++static unsigned int normalized_sysctl_sched_base_slice	= 3000000ULL;
  
  /*
   * After fork, child runs first. If set to 0 (default) then
-@@ -121,23 +84,6 @@ static unsigned int sched_nr_latency = 8;
+@@ -84,8 +87,75 @@ static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
   */
  unsigned int sysctl_sched_child_runs_first __read_mostly;
  
--/*
-- * SCHED_OTHER wake-up granularity.
-- *
-- * This option delays the preemption effects of decoupled workloads
-- * and reduces their over-scheduling. Synchronous workloads will still
-- * have immediate wakeup/sleep latencies.
-- *
-- * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
-- */
--#ifdef CONFIG_CACHY
--unsigned int sysctl_sched_wakeup_granularity			= 500000UL;
--static unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
--#else
--unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
--static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
--#endif
--
++/*
++ * SCHED_OTHER wake-up granularity.
++ *
++ * This option delays the preemption effects of decoupled workloads
++ * and reduces their over-scheduling. Synchronous workloads will still
++ * have immediate wakeup/sleep latencies.
++ *
++ * (default: 3.2 msec * 1, units: nanoseconds)
++ */
++unsigned int sysctl_sched_wakeup_granularity			= 3200000UL;
++static unsigned int normalized_sysctl_sched_wakeup_granularity	= 3200000UL;
++
  const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
  
++#ifdef CONFIG_SCHED_BORE
++unsigned int __read_mostly sched_bore                 = 1;
++unsigned int __read_mostly sched_burst_cache_lifetime = 15000000;
++unsigned int __read_mostly sched_burst_penalty_offset = 12;
++unsigned int __read_mostly sched_burst_penalty_scale  = 1292;
++unsigned int __read_mostly sched_burst_smoothness     = 1;
++static int three          = 3;
++static int sixty_four     = 64;
++static int maxval_12_bits = 4095;
++
++#define FIXED_SHIFT 10
++#define FIXED_ONE (1 << FIXED_SHIFT)
++typedef u32 fixed;
++
++static void update_burst_score(struct sched_entity *se) {
++	u64 burst_time = se->max_burst_time;
++
++	int msb = fls64(burst_time);
++	fixed integer_part = msb << FIXED_SHIFT;
++	fixed fractional_part = burst_time << (64 - msb) << 1 >> (64 - FIXED_SHIFT);
++	fixed greed = integer_part | fractional_part;
++
++	fixed tolerance = sched_burst_penalty_offset << FIXED_SHIFT;
++	fixed penalty = max(0, (s32)greed - (s32)tolerance);
++	fixed scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
++
++	u8 score = min(39U, scaled_penalty >> FIXED_SHIFT);
++	se->penalty_score = score;
++}
++
++static inline u64 penalty_scale(u64 delta, struct sched_entity *se) {
++	return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->penalty_score], 22);
++}
++
++static inline u64 __binary_smooth(u64 new, u64 old, unsigned int smoothness) {
++	return (new + old * ((1 << smoothness) - 1)) >> smoothness;
++}
++
++void restart_burst(struct sched_entity *se) {
++	se->max_burst_time = se->prev_burst_time = __binary_smooth(
++		se->burst_time, se->prev_burst_time, sched_burst_smoothness);
++	se->burst_time = 0;
++}
++
++#define calc_delta_fair(delta, se) __calc_delta_fair(delta, se, true)
++#define calc_delta_fair_unscaled(delta, se) __calc_delta_fair(delta, se, false)
++static inline u64
++__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale);
++
++static s64 wakeup_preempt_backstep_delta(u64 rtime, struct sched_entity *se) {
++	u64 delta = calc_delta_fair_unscaled(rtime, se);
++	return delta - penalty_scale(delta, se);
++}
++#endif // CONFIG_SCHED_BORE
++
  int sched_thermal_decay_shift;
-@@ -189,12 +135,8 @@ int __weak arch_asym_cpu_priority(int cpu)
-  *
-  * (default: 5 msec, units: microseconds)
-  */
--#ifdef CONFIG_CACHY
--static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
--#else
- static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
- #endif
--#endif
- 
- #ifdef CONFIG_NUMA_BALANCING
- /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
-@@ -295,9 +237,7 @@ static void update_sysctl(void)
+ static int __init setup_sched_thermal_decay_shift(char *str)
+ {
+@@ -145,6 +215,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
  
+ #ifdef CONFIG_SYSCTL
+ static struct ctl_table sched_fair_sysctls[] = {
++#ifdef CONFIG_SCHED_BORE
++	{
++		.procname	= "sched_bore",
++		.data		= &sched_bore,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_cache_lifetime",
++		.data		= &sched_burst_cache_lifetime,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler = proc_dointvec,
++	},
++	{
++		.procname	= "sched_burst_penalty_offset",
++		.data		= &sched_burst_penalty_offset,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &sixty_four,
++	},
++	{
++		.procname	= "sched_burst_penalty_scale",
++		.data		= &sched_burst_penalty_scale,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &maxval_12_bits,
++	},
++	{
++		.procname	= "sched_burst_smoothness",
++		.data		= &sched_burst_smoothness,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &three,
++	},
++#endif // CONFIG_SCHED_BORE
+ 	{
+ 		.procname       = "sched_child_runs_first",
+ 		.data           = &sysctl_sched_child_runs_first,
+@@ -238,6 +353,7 @@ static void update_sysctl(void)
  #define SET_SYSCTL(name) \
  	(sysctl_##name = (factor) * normalized_sysctl_##name)
--	SET_SYSCTL(sched_min_granularity);
--	SET_SYSCTL(sched_latency);
--	SET_SYSCTL(sched_wakeup_granularity);
-+	SET_SYSCTL(sched_base_slice);
+ 	SET_SYSCTL(sched_base_slice);
++	SET_SYSCTL(sched_wakeup_granularity);
  #undef SET_SYSCTL
  }
  
-@@ -365,6 +305,16 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
- 	return mul_u64_u32_shr(delta_exec, fact, shift);
- }
- 
-+/*
-+ * delta /= w
-+ */
-+static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
-+{
-+	if (unlikely(se->load.weight != NICE_0_LOAD))
-+		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
-+
-+	return delta;
-+}
- 
- const struct sched_class fair_sched_class;
- 
-@@ -619,13 +569,200 @@ static inline bool entity_before(const struct sched_entity *a,
- 	return (s64)(a->vruntime - b->vruntime) < 0;
- }
- 
-+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+{
-+	return (s64)(se->vruntime - cfs_rq->min_vruntime);
-+}
-+
- #define __node_2_se(node) \
- 	rb_entry((node), struct sched_entity, run_node)
- 
-+/*
-+ * Compute virtual time from the per-task service numbers:
-+ *
-+ * Fair schedulers conserve lag:
-+ *
-+ *   \Sum lag_i = 0
-+ *
-+ * Where lag_i is given by:
-+ *
-+ *   lag_i = S - s_i = w_i * (V - v_i)
-+ *
-+ * Where S is the ideal service time and V is it's virtual time counterpart.
-+ * Therefore:
-+ *
-+ *   \Sum lag_i = 0
-+ *   \Sum w_i * (V - v_i) = 0
-+ *   \Sum w_i * V - w_i * v_i = 0
-+ *
-+ * From which we can solve an expression for V in v_i (which we have in
-+ * se->vruntime):
-+ *
-+ *       \Sum v_i * w_i   \Sum v_i * w_i
-+ *   V = -------------- = --------------
-+ *          \Sum w_i            W
-+ *
-+ * Specifically, this is the weighted average of all entity virtual runtimes.
-+ *
-+ * [[ NOTE: this is only equal to the ideal scheduler under the condition
-+ *          that join/leave operations happen at lag_i = 0, otherwise the
-+ *          virtual time has non-continguous motion equivalent to:
-+ *
-+ *	      V +-= lag_i / W
-+ *
-+ *	    Also see the comment in place_entity() that deals with this. ]]
-+ *
-+ * However, since v_i is u64, and the multiplcation could easily overflow
-+ * transform it into a relative form that uses smaller quantities:
-+ *
-+ * Substitute: v_i == (v_i - v0) + v0
-+ *
-+ *     \Sum ((v_i - v0) + v0) * w_i   \Sum (v_i - v0) * w_i
-+ * V = ---------------------------- = --------------------- + v0
-+ *                  W                            W
-+ *
-+ * Which we track using:
-+ *
-+ *                    v0 := cfs_rq->min_vruntime
-+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
-+ *              \Sum w_i := cfs_rq->avg_load
-+ *
-+ * Since min_vruntime is a monotonic increasing variable that closely tracks
-+ * the per-task service, these deltas: (v_i - v), will be in the order of the
-+ * maximal (virtual) lag induced in the system due to quantisation.
-+ *
-+ * Also, we use scale_load_down() to reduce the size.
-+ *
-+ * As measured, the max (key * weight) value was ~44 bits for a kernel build.
-+ */
-+static void
-+avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+{
-+	unsigned long weight = scale_load_down(se->load.weight);
-+	s64 key = entity_key(cfs_rq, se);
-+
-+	cfs_rq->avg_vruntime += key * weight;
-+	cfs_rq->avg_slice += se->slice * weight;
-+	cfs_rq->avg_load += weight;
-+}
-+
-+static void
-+avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+{
-+	unsigned long weight = scale_load_down(se->load.weight);
-+	s64 key = entity_key(cfs_rq, se);
-+
-+	cfs_rq->avg_vruntime -= key * weight;
-+	cfs_rq->avg_slice -= se->slice * weight;
-+	cfs_rq->avg_load -= weight;
-+}
-+
-+static inline
-+void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
-+{
-+	/*
-+	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
-+	 */
-+	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
-+}
-+
-+u64 avg_vruntime(struct cfs_rq *cfs_rq)
-+{
-+	struct sched_entity *curr = cfs_rq->curr;
-+	s64 avg = cfs_rq->avg_vruntime;
-+	long load = cfs_rq->avg_load;
-+
-+	if (curr && curr->on_rq) {
-+		unsigned long weight = scale_load_down(curr->load.weight);
-+
-+		avg += entity_key(cfs_rq, curr) * weight;
-+		load += weight;
-+	}
-+
-+	if (load)
-+		avg = div_s64(avg, load);
-+
-+	return cfs_rq->min_vruntime + avg;
-+}
-+
-+/*
-+ * lag_i = S - s_i = w_i * (V - v_i)
-+ *
-+ * However, since V is approximated by the weighted average of all entities it
-+ * is possible -- by addition/removal/reweight to the tree -- to move V around
-+ * and end up with a larger lag than we started with.
-+ *
-+ * Limit this to either double the slice length with a minimum of TICK_NSEC
-+ * since that is the timing granularity.
-+ *
-+ * EEVDF gives the following limit for a steady state system:
-+ *
-+ *   -r_max < lag < max(r_max, q)
-+ *
-+ * XXX could add max_slice to the augmented data to track this.
-+ */
-+void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+{
-+	s64 lag, limit;
-+
-+	SCHED_WARN_ON(!se->on_rq);
-+	lag = avg_vruntime(cfs_rq) - se->vruntime;
-+
-+	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-+	se->vlag = clamp(lag, -limit, limit);
-+}
-+
-+/*
-+ * Entity is eligible once it received less service than it ought to have,
-+ * eg. lag >= 0.
-+ *
-+ * lag_i = S - s_i = w_i*(V - v_i)
-+ *
-+ * lag_i >= 0 -> V >= v_i
-+ *
-+ *     \Sum (v_i - v)*w_i
-+ * V = ------------------ + v
-+ *          \Sum w_i
-+ *
-+ * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i)
-+ *
-+ * Note: using 'avg_vruntime() > se->vruntime' is inacurate due
-+ *       to the loss in precision caused by the division.
-+ */
-+int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+{
-+	struct sched_entity *curr = cfs_rq->curr;
-+	s64 avg = cfs_rq->avg_vruntime;
-+	long load = cfs_rq->avg_load;
-+
-+	if (curr && curr->on_rq) {
-+		unsigned long weight = scale_load_down(curr->load.weight);
-+
-+		avg += entity_key(cfs_rq, curr) * weight;
-+		load += weight;
-+	}
-+
-+	return avg >= entity_key(cfs_rq, se) * load;
-+}
-+
-+static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
-+{
-+	u64 min_vruntime = cfs_rq->min_vruntime;
-+	/*
-+	 * open coded max_vruntime() to allow updating avg_vruntime
-+	 */
-+	s64 delta = (s64)(vruntime - min_vruntime);
-+	if (delta > 0) {
-+		avg_vruntime_update(cfs_rq, delta);
-+		min_vruntime = vruntime;
-+	}
-+	return min_vruntime;
-+}
-+
- static void update_min_vruntime(struct cfs_rq *cfs_rq)
- {
-+	struct sched_entity *se = __pick_first_entity(cfs_rq);
- 	struct sched_entity *curr = cfs_rq->curr;
--	struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline);
- 
- 	u64 vruntime = cfs_rq->min_vruntime;
- 
-@@ -636,9 +773,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
- 			curr = NULL;
- 	}
- 
--	if (leftmost) { /* non-empty tree */
--		struct sched_entity *se = __node_2_se(leftmost);
--
-+	if (se) {
- 		if (!curr)
- 			vruntime = se->vruntime;
- 		else
-@@ -647,7 +782,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
- 
- 	/* ensure we never gain time by being placed backwards. */
- 	u64_u32_store(cfs_rq->min_vruntime,
--		      max_vruntime(cfs_rq->min_vruntime, vruntime));
-+		      __update_min_vruntime(cfs_rq, vruntime));
- }
- 
- static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
-@@ -655,17 +790,51 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
- 	return entity_before(__node_2_se(a), __node_2_se(b));
- }
- 
-+#define deadline_gt(field, lse, rse) ({ (s64)((lse)->field - (rse)->field) > 0; })
-+
-+static inline void __update_min_deadline(struct sched_entity *se, struct rb_node *node)
-+{
-+	if (node) {
-+		struct sched_entity *rse = __node_2_se(node);
-+		if (deadline_gt(min_deadline, se, rse))
-+			se->min_deadline = rse->min_deadline;
-+	}
-+}
-+
-+/*
-+ * se->min_deadline = min(se->deadline, left->min_deadline, right->min_deadline)
-+ */
-+static inline bool min_deadline_update(struct sched_entity *se, bool exit)
-+{
-+	u64 old_min_deadline = se->min_deadline;
-+	struct rb_node *node = &se->run_node;
-+
-+	se->min_deadline = se->deadline;
-+	__update_min_deadline(se, node->rb_right);
-+	__update_min_deadline(se, node->rb_left);
-+
-+	return se->min_deadline == old_min_deadline;
-+}
-+
-+RB_DECLARE_CALLBACKS(static, min_deadline_cb, struct sched_entity,
-+		     run_node, min_deadline, min_deadline_update);
-+
+@@ -308,11 +424,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
  /*
-  * Enqueue an entity into the rb-tree:
+  * delta /= w
   */
- static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
++#ifdef CONFIG_SCHED_BORE
++static inline u64
++__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale)
++#else // CONFIG_SCHED_BORE
+ static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
++#endif // CONFIG_SCHED_BORE
  {
--	rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
-+	avg_vruntime_add(cfs_rq, se);
-+	se->min_deadline = se->deadline;
-+	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
-+				__entity_less, &min_deadline_cb);
+ 	if (unlikely(se->load.weight != NICE_0_LOAD))
+ 		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+ 
++#ifdef CONFIG_SCHED_BORE
++	if (bscale && sched_bore) delta = penalty_scale(delta, se);
++#endif // CONFIG_SCHED_BORE
+ 	return delta;
  }
  
- static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
--	rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
-+	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
-+				  &min_deadline_cb);
-+	avg_vruntime_sub(cfs_rq, se);
+@@ -708,7 +832,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ 	SCHED_WARN_ON(!se->on_rq);
+ 	lag = avg_vruntime(cfs_rq) - se->vruntime;
+ 
++#ifdef CONFIG_SCHED_BORE
++	limit = calc_delta_fair_unscaled(max_t(u64, 2*se->slice, TICK_NSEC), se);
++#else // CONFIG_SCHED_BORE
+ 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
++#endif // CONFIG_SCHED_BORE
+ 	se->vlag = clamp(lag, -limit, limit);
  }
  
- struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
-@@ -678,14 +847,81 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
- 	return __node_2_se(left);
- }
- 
--static struct sched_entity *__pick_next_entity(struct sched_entity *se)
-+/*
-+ * Earliest Eligible Virtual Deadline First
-+ *
-+ * In order to provide latency guarantees for different request sizes
-+ * EEVDF selects the best runnable task from two criteria:
-+ *
-+ *  1) the task must be eligible (must be owed service)
-+ *
-+ *  2) from those tasks that meet 1), we select the one
-+ *     with the earliest virtual deadline.
-+ *
-+ * We can do this in O(log n) time due to an augmented RB-tree. The
-+ * tree keeps the entries sorted on service, but also functions as a
-+ * heap based on the deadline by keeping:
-+ *
-+ *  se->min_deadline = min(se->deadline, se->{left,right}->min_deadline)
-+ *
-+ * Which allows an EDF like search on (sub)trees.
-+ */
-+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
- {
--	struct rb_node *next = rb_next(&se->run_node);
-+	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
-+	struct sched_entity *curr = cfs_rq->curr;
-+	struct sched_entity *best = NULL;
- 
--	if (!next)
--		return NULL;
-+	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
-+		curr = NULL;
-+
-+	while (node) {
-+		struct sched_entity *se = __node_2_se(node);
- 
--	return __node_2_se(next);
-+		/*
-+		 * If this entity is not eligible, try the left subtree.
-+		 */
-+		if (!entity_eligible(cfs_rq, se)) {
-+			node = node->rb_left;
-+			continue;
-+		}
-+
-+		/*
-+		 * If this entity has an earlier deadline than the previous
-+		 * best, take this one. If it also has the earliest deadline
-+		 * of its subtree, we're done.
-+		 */
-+		if (!best || deadline_gt(deadline, best, se)) {
-+			best = se;
-+			if (best->deadline == best->min_deadline)
-+				break;
-+		}
-+
-+		/*
-+		 * If the earlest deadline in this subtree is in the fully
-+		 * eligible left half of our space, go there.
-+		 */
-+		if (node->rb_left &&
-+		    __node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
-+			node = node->rb_left;
-+			continue;
-+		}
-+
-+		node = node->rb_right;
-+	}
-+
-+	if (!best || (curr && deadline_gt(deadline, best, curr)))
-+		best = curr;
-+
-+	if (unlikely(!best)) {
-+		struct sched_entity *left = __pick_first_entity(cfs_rq);
-+		if (left) {
-+			pr_err("EEVDF scheduling fail, picking leftmost\n");
-+			return left;
-+		}
-+	}
-+
-+	return best;
- }
- 
- #ifdef CONFIG_SCHED_DEBUG
-@@ -707,104 +943,53 @@ int sched_update_scaling(void)
- {
- 	unsigned int factor = get_update_sysctl_factor();
- 
--	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
--					sysctl_sched_min_granularity);
--
+@@ -946,6 +1074,7 @@ int sched_update_scaling(void)
  #define WRT_SYSCTL(name) \
  	(normalized_sysctl_##name = sysctl_##name / (factor))
--	WRT_SYSCTL(sched_min_granularity);
--	WRT_SYSCTL(sched_latency);
--	WRT_SYSCTL(sched_wakeup_granularity);
-+	WRT_SYSCTL(sched_base_slice);
+ 	WRT_SYSCTL(sched_base_slice);
++	WRT_SYSCTL(sched_wakeup_granularity);
  #undef WRT_SYSCTL
  
  	return 0;
- }
- #endif
- 
--/*
-- * delta /= w
-- */
--static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
-+void set_latency_fair(struct sched_entity *se, int prio)
- {
--	if (unlikely(se->load.weight != NICE_0_LOAD))
--		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
-+	u32 weight = sched_prio_to_weight[prio];
-+	u64 base = sysctl_sched_base_slice;
- 
--	return delta;
--}
--
--/*
-- * The idea is to set a period in which each task runs once.
-- *
-- * When there are too many tasks (sched_nr_latency) we have to stretch
-- * this period because otherwise the slices get too small.
-- *
-- * p = (nr <= nl) ? l : l*nr/nl
-- */
--static u64 __sched_period(unsigned long nr_running)
--{
--	if (unlikely(nr_running > sched_nr_latency))
--		return nr_running * sysctl_sched_min_granularity;
--	else
--		return sysctl_sched_latency;
-+	/*
-+	 * For EEVDF the virtual time slope is determined by w_i (iow.
-+	 * nice) while the request time r_i is determined by
-+	 * latency-nice.
-+	 *
-+	 * Smaller request gets better latency.
-+	 */
-+	se->slice = div_u64(base << SCHED_FIXEDPOINT_SHIFT, weight);
- }
- 
--static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq);
-+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
- 
- /*
-- * We calculate the wall-time slice from the period by taking a part
-- * proportional to the weight.
-- *
-- * s = p*P[w/rw]
-+ * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
-+ * this is probably good enough.
-  */
--static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
-+static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
--	unsigned int nr_running = cfs_rq->nr_running;
--	struct sched_entity *init_se = se;
--	unsigned int min_gran;
--	u64 slice;
--
--	if (sched_feat(ALT_PERIOD))
--		nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
--
--	slice = __sched_period(nr_running + !se->on_rq);
--
--	for_each_sched_entity(se) {
--		struct load_weight *load;
--		struct load_weight lw;
--		struct cfs_rq *qcfs_rq;
--
--		qcfs_rq = cfs_rq_of(se);
--		load = &qcfs_rq->load;
--
--		if (unlikely(!se->on_rq)) {
--			lw = qcfs_rq->load;
--
--			update_load_add(&lw, se->load.weight);
--			load = &lw;
--		}
--		slice = __calc_delta(slice, se->load.weight, load);
--	}
-+	if ((s64)(se->vruntime - se->deadline) < 0)
-+		return;
- 
--	if (sched_feat(BASE_SLICE)) {
--		if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq))
--			min_gran = sysctl_sched_idle_min_granularity;
--		else
--			min_gran = sysctl_sched_min_granularity;
-+	/*
-+	 * EEVDF: vd_i = ve_i + r_i / w_i
-+	 */
-+	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
- 
--		slice = max_t(u64, slice, min_gran);
-+	/*
-+	 * The task has consumed its request, reschedule.
-+	 */
-+	if (cfs_rq->nr_running > 1) {
-+		resched_curr(rq_of(cfs_rq));
-+		clear_buddies(cfs_rq, se);
- 	}
--
--	return slice;
--}
--
--/*
-- * We calculate the vruntime slice of a to-be-inserted task.
-- *
-- * vs = s/w
-- */
--static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
--{
--	return calc_delta_fair(sched_slice(cfs_rq, se), se);
- }
- 
- #include "pelt.h"
-@@ -939,6 +1124,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
+@@ -1123,6 +1252,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ 	curr->sum_exec_runtime += delta_exec;
  	schedstat_add(cfs_rq->exec_clock, delta_exec);
  
++#ifdef CONFIG_SCHED_BORE
++	curr->burst_time += delta_exec;
++	curr->max_burst_time = max(curr->max_burst_time, curr->burst_time);
++	update_burst_score(curr);
++#endif // CONFIG_SCHED_BORE
  	curr->vruntime += calc_delta_fair(delta_exec, curr);
-+	update_deadline(cfs_rq, curr);
+ 	update_deadline(cfs_rq, curr);
  	update_min_vruntime(cfs_rq);
- 
- 	if (entity_is_task(curr)) {
-@@ -3393,16 +3579,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
- static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- 			    unsigned long weight)
- {
-+	unsigned long old_weight = se->load.weight;
-+
- 	if (se->on_rq) {
- 		/* commit outstanding execution time */
- 		if (cfs_rq->curr == se)
- 			update_curr(cfs_rq);
-+		else
-+			avg_vruntime_sub(cfs_rq, se);
- 		update_load_sub(&cfs_rq->load, se->load.weight);
- 	}
- 	dequeue_load_avg(cfs_rq, se);
- 
- 	update_load_set(&se->load, weight);
- 
-+	if (!se->on_rq) {
-+		/*
-+		 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
-+		 * we need to scale se->vlag when w_i changes.
-+		 */
-+		se->vlag = div_s64(se->vlag * old_weight, weight);
-+	} else {
-+		s64 deadline = se->deadline - se->vruntime;
-+		/*
-+		 * When the weight changes, the virtual time slope changes and
-+		 * we should adjust the relative virtual deadline accordingly.
-+		 */
-+		deadline = div_s64(deadline * old_weight, weight);
-+		se->deadline = se->vruntime + deadline;
-+	}
-+
- #ifdef CONFIG_SMP
- 	do {
- 		u32 divider = get_pelt_divider(&se->avg);
-@@ -3412,9 +3618,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- #endif
- 
- 	enqueue_load_avg(cfs_rq, se);
--	if (se->on_rq)
-+	if (se->on_rq) {
- 		update_load_add(&cfs_rq->load, se->load.weight);
--
-+		if (cfs_rq->curr != se)
-+			avg_vruntime_add(cfs_rq, se);
-+	}
- }
- 
- void reweight_task(struct task_struct *p, int prio)
-@@ -4710,158 +4918,151 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
- 
- #endif /* CONFIG_SMP */
- 
--static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
--{
--#ifdef CONFIG_SCHED_DEBUG
--	s64 d = se->vruntime - cfs_rq->min_vruntime;
--
--	if (d < 0)
--		d = -d;
--
--	if (d > 3*sysctl_sched_latency)
--		schedstat_inc(cfs_rq->nr_spread_over);
--#endif
--}
--
--static inline bool entity_is_long_sleeper(struct sched_entity *se)
-+static inline bool
-+entity_has_slept(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- {
--	struct cfs_rq *cfs_rq;
--	u64 sleep_time;
-+	u64 now;
- 
--	if (se->exec_start == 0)
-+	if (!(flags & ENQUEUE_WAKEUP))
- 		return false;
- 
--	cfs_rq = cfs_rq_of(se);
--
--	sleep_time = rq_clock_task(rq_of(cfs_rq));
--
--	/* Happen while migrating because of clock task divergence */
--	if (sleep_time <= se->exec_start)
--		return false;
--
--	sleep_time -= se->exec_start;
--	if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
-+	if (flags & ENQUEUE_MIGRATED)
- 		return true;
- 
--	return false;
-+	now = rq_clock_task(rq_of(cfs_rq));
-+	return (s64)(se->exec_start - now) >= se->slice;
- }
- 
- static void
--place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
-+place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- {
--	u64 vruntime = cfs_rq->min_vruntime;
-+	u64 vslice = calc_delta_fair(se->slice, se);
-+	u64 vruntime = avg_vruntime(cfs_rq);
-+	s64 lag = 0;
- 
- 	/*
--	 * The 'current' period is already promised to the current tasks,
--	 * however the extra weight of the new task will slow them down a
--	 * little, place the new task so that it fits in the slot that
--	 * stays open at the end.
-+	 * Due to how V is constructed as the weighted average of entities,
-+	 * adding tasks with positive lag, or removing tasks with negative lag
-+	 * will move 'time' backwards, this can screw around with the lag of
-+	 * other tasks.
-+	 *
-+	 * EEVDF: placement strategy #1 / #2
- 	 */
--	if (initial && sched_feat(START_DEBIT))
--		vruntime += sched_vslice(cfs_rq, se);
-+	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
-+		struct sched_entity *curr = cfs_rq->curr;
-+		unsigned long load;
- 
--	/* sleeps up to a single latency don't count. */
--	if (!initial) {
--		unsigned long thresh;
-+		lag = se->vlag;
- 
--		if (se_is_idle(se))
--			thresh = sysctl_sched_min_granularity;
--		else
--			thresh = sysctl_sched_latency;
-+		/*
-+		 * For latency sensitive tasks; those that have a shorter than
-+		 * average slice and do not fully consume the slice, transition
-+		 * to EEVDF placement strategy #2.
-+		 */
-+		if (sched_feat(PLACE_FUDGE) &&
-+		    (cfs_rq->avg_slice > se->slice * cfs_rq->avg_load) &&
-+		    entity_has_slept(cfs_rq, se, flags)) {
-+			lag += vslice;
-+			if (lag > 0)
-+				lag = 0;
-+		}
- 
- 		/*
--		 * Halve their sleep time's effect, to allow
--		 * for a gentler effect of sleepers:
-+		 * If we want to place a task and preserve lag, we have to
-+		 * consider the effect of the new entity on the weighted
-+		 * average and compensate for this, otherwise lag can quickly
-+		 * evaporate.
-+		 *
-+		 * Lag is defined as:
-+		 *
-+		 *   lag_i = S - s_i = w_i * (V - v_i)
-+		 *
-+		 * To avoid the 'w_i' term all over the place, we only track
-+		 * the virtual lag:
-+		 *
-+		 *   vl_i = V - v_i <=> v_i = V - vl_i
-+		 *
-+		 * And we take V to be the weighted average of all v:
-+		 *
-+		 *   V = (\Sum w_j*v_j) / W
-+		 *
-+		 * Where W is: \Sum w_j
-+		 *
-+		 * Then, the weighted average after adding an entity with lag
-+		 * vl_i is given by:
-+		 *
-+		 *   V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
-+		 *      = (W*V + w_i*(V - vl_i)) / (W + w_i)
-+		 *      = (W*V + w_i*V - w_i*vl_i) / (W + w_i)
-+		 *      = (V*(W + w_i) - w_i*l) / (W + w_i)
-+		 *      = V - w_i*vl_i / (W + w_i)
-+		 *
-+		 * And the actual lag after adding an entity with vl_i is:
-+		 *
-+		 *   vl'_i = V' - v_i
-+		 *         = V - w_i*vl_i / (W + w_i) - (V - vl_i)
-+		 *         = vl_i - w_i*vl_i / (W + w_i)
-+		 *
-+		 * Which is strictly less than vl_i. So in order to preserve lag
-+		 * we should inflate the lag before placement such that the
-+		 * effective lag after placement comes out right.
-+		 *
-+		 * As such, invert the above relation for vl'_i to get the vl_i
-+		 * we need to use such that the lag after placement is the lag
-+		 * we computed before dequeue.
-+		 *
-+		 *   vl'_i = vl_i - w_i*vl_i / (W + w_i)
-+		 *         = ((W + w_i)*vl_i - w_i*vl_i) / (W + w_i)
-+		 *
-+		 *   (W + w_i)*vl'_i = (W + w_i)*vl_i - w_i*vl_i
-+		 *                   = W*vl_i
-+		 *
-+		 *   vl_i = (W + w_i)*vl'_i / W
- 		 */
--		if (sched_feat(GENTLE_FAIR_SLEEPERS))
--			thresh >>= 1;
--
--		vruntime -= thresh;
--	}
--
--	/*
--	 * Pull vruntime of the entity being placed to the base level of
--	 * cfs_rq, to prevent boosting it if placed backwards.
--	 * However, min_vruntime can advance much faster than real time, with
--	 * the extreme being when an entity with the minimal weight always runs
--	 * on the cfs_rq. If the waking entity slept for a long time, its
--	 * vruntime difference from min_vruntime may overflow s64 and their
--	 * comparison may get inversed, so ignore the entity's original
--	 * vruntime in that case.
--	 * The maximal vruntime speedup is given by the ratio of normal to
--	 * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
--	 * When placing a migrated waking entity, its exec_start has been set
--	 * from a different rq. In order to take into account a possible
--	 * divergence between new and prev rq's clocks task because of irq and
--	 * stolen time, we take an additional margin.
--	 * So, cutting off on the sleep time of
--	 *     2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
--	 * should be safe.
--	 */
--	if (entity_is_long_sleeper(se))
--		se->vruntime = vruntime;
--	else
--		se->vruntime = max_vruntime(se->vruntime, vruntime);
-+		load = cfs_rq->avg_load;
-+		if (curr && curr->on_rq)
-+			load += scale_load_down(curr->load.weight);
-+
-+		lag *= load + scale_load_down(se->load.weight);
-+		if (WARN_ON_ONCE(!load))
-+			load = 1;
-+		lag = div_s64(lag, load);
-+	}
-+
-+	se->vruntime = vruntime - lag;
-+
-+	/*
-+	 * When joining the competition; the exisiting tasks will be,
-+	 * on average, halfway through their slice, as such start tasks
-+	 * off with half a slice to ease into the competition.
-+	 */
-+	if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
-+		vslice /= 2;
-+
-+	/*
-+	 * EEVDF: vd_i = ve_i + r_i/w_i
-+	 */
-+	se->deadline = se->vruntime + vslice;
- }
- 
- static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
- 
- static inline bool cfs_bandwidth_used(void);
- 
--/*
-- * MIGRATION
-- *
-- *	dequeue
-- *	  update_curr()
-- *	    update_min_vruntime()
-- *	  vruntime -= min_vruntime
-- *
-- *	enqueue
-- *	  update_curr()
-- *	    update_min_vruntime()
-- *	  vruntime += min_vruntime
-- *
-- * this way the vruntime transition between RQs is done when both
-- * min_vruntime are up-to-date.
-- *
-- * WAKEUP (remote)
-- *
-- *	->migrate_task_rq_fair() (p->state == TASK_WAKING)
-- *	  vruntime -= min_vruntime
-- *
-- *	enqueue
-- *	  update_curr()
-- *	    update_min_vruntime()
-- *	  vruntime += min_vruntime
-- *
-- * this way we don't have the most up-to-date min_vruntime on the originating
-- * CPU and an up-to-date min_vruntime on the destination CPU.
-- */
--
- static void
- enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- {
--	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
- 	bool curr = cfs_rq->curr == se;
- 
- 	/*
- 	 * If we're the current task, we must renormalise before calling
- 	 * update_curr().
- 	 */
--	if (renorm && curr)
--		se->vruntime += cfs_rq->min_vruntime;
-+	if (curr)
-+		place_entity(cfs_rq, se, flags);
- 
- 	update_curr(cfs_rq);
- 
--	/*
--	 * Otherwise, renormalise after, such that we're placed at the current
--	 * moment in time, instead of some random moment in the past. Being
--	 * placed in the past could significantly boost this task to the
--	 * fairness detriment of existing tasks.
--	 */
--	if (renorm && !curr)
--		se->vruntime += cfs_rq->min_vruntime;
--
- 	/*
- 	 * When enqueuing a sched_entity, we must:
- 	 *   - Update loads to have both entity and cfs_rq synced with now.
-@@ -4873,18 +5074,28 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 	 */
- 	update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
- 	se_update_runnable(se);
-+	/*
-+	 * XXX update_load_avg() above will have attached us to the pelt sum;
-+	 * but update_cfs_group() here will re-adjust the weight and have to
-+	 * undo/redo all that. Seems wasteful.
-+	 */
- 	update_cfs_group(se);
-+
-+	/*
-+	 * XXX now that the entity has been re-weighted, and it's lag adjusted,
-+	 * we can place the entity.
-+	 */
-+	if (!curr)
-+		place_entity(cfs_rq, se, flags);
-+
- 	account_entity_enqueue(cfs_rq, se);
- 
--	if (flags & ENQUEUE_WAKEUP)
--		place_entity(cfs_rq, se, 0);
- 	/* Entity has migrated, no longer consider this task hot */
- 	if (flags & ENQUEUE_MIGRATED)
- 		se->exec_start = 0;
- 
- 	check_schedstat_required();
- 	update_stats_enqueue_fair(cfs_rq, se, flags);
--	check_spread(cfs_rq, se);
- 	if (!curr)
- 		__enqueue_entity(cfs_rq, se);
- 	se->on_rq = 1;
-@@ -4896,17 +5107,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 	}
- }
- 
--static void __clear_buddies_last(struct sched_entity *se)
--{
--	for_each_sched_entity(se) {
--		struct cfs_rq *cfs_rq = cfs_rq_of(se);
--		if (cfs_rq->last != se)
--			break;
--
--		cfs_rq->last = NULL;
--	}
--}
--
- static void __clear_buddies_next(struct sched_entity *se)
- {
- 	for_each_sched_entity(se) {
-@@ -4918,27 +5118,10 @@ static void __clear_buddies_next(struct sched_entity *se)
- 	}
- }
- 
--static void __clear_buddies_skip(struct sched_entity *se)
--{
--	for_each_sched_entity(se) {
--		struct cfs_rq *cfs_rq = cfs_rq_of(se);
--		if (cfs_rq->skip != se)
--			break;
--
--		cfs_rq->skip = NULL;
--	}
--}
--
- static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
--	if (cfs_rq->last == se)
--		__clear_buddies_last(se);
--
- 	if (cfs_rq->next == se)
- 		__clear_buddies_next(se);
--
--	if (cfs_rq->skip == se)
--		__clear_buddies_skip(se);
- }
- 
- static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
-@@ -4972,20 +5155,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 
- 	clear_buddies(cfs_rq, se);
- 
-+	update_entity_lag(cfs_rq, se);
- 	if (se != cfs_rq->curr)
- 		__dequeue_entity(cfs_rq, se);
- 	se->on_rq = 0;
- 	account_entity_dequeue(cfs_rq, se);
- 
--	/*
--	 * Normalize after update_curr(); which will also have moved
--	 * min_vruntime if @se is the one holding it back. But before doing
--	 * update_min_vruntime() again, which will discount @se's position and
--	 * can move min_vruntime forward still more.
--	 */
--	if (!(flags & DEQUEUE_SLEEP))
--		se->vruntime -= cfs_rq->min_vruntime;
--
- 	/* return excess runtime on last dequeue */
- 	return_cfs_rq_runtime(cfs_rq);
- 
-@@ -5004,52 +5179,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
- 		update_idle_cfs_rq_clock_pelt(cfs_rq);
- }
- 
--/*
-- * Preempt the current task with a newly woken task if needed:
-- */
--static void
--check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
--{
--	unsigned long ideal_runtime, delta_exec;
--	struct sched_entity *se;
--	s64 delta;
--
--	/*
--	 * When many tasks blow up the sched_period; it is possible that
--	 * sched_slice() reports unusually large results (when many tasks are
--	 * very light for example). Therefore impose a maximum.
--	 */
--	ideal_runtime = min_t(u64, sched_slice(cfs_rq, curr), sysctl_sched_latency);
--
--	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
--	if (delta_exec > ideal_runtime) {
--		resched_curr(rq_of(cfs_rq));
--		/*
--		 * The current task ran long enough, ensure it doesn't get
--		 * re-elected due to buddy favours.
--		 */
--		clear_buddies(cfs_rq, curr);
--		return;
--	}
--
--	/*
--	 * Ensure that a task that missed wakeup preemption by a
--	 * narrow margin doesn't have to wait for a full slice.
--	 * This also mitigates buddy induced latencies under load.
--	 */
--	if (delta_exec < sysctl_sched_min_granularity)
--		return;
--
--	se = __pick_first_entity(cfs_rq);
--	delta = curr->vruntime - se->vruntime;
--
--	if (delta < 0)
--		return;
--
--	if (delta > ideal_runtime)
--		resched_curr(rq_of(cfs_rq));
--}
--
- static void
- set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
-@@ -5088,9 +5217,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -5217,6 +5351,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	se->prev_sum_exec_runtime = se->sum_exec_runtime;
  }
  
--static int
--wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
--
++static int
++wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
++
  /*
   * Pick the next process, keeping these things in mind, in this order:
   * 1) keep things fair between processes/task groups
-@@ -5101,50 +5227,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+@@ -5227,14 +5364,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
  static struct sched_entity *
  pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  {
--	struct sched_entity *left = __pick_first_entity(cfs_rq);
--	struct sched_entity *se;
--
++	struct sched_entity *candidate = pick_eevdf(cfs_rq);
  	/*
--	 * If curr is set we have to see if its left of the leftmost entity
--	 * still in the tree, provided there was anything in the tree at all.
-+	 * Enabling NEXT_BUDDY will affect latency but not fairness.
+ 	 * Enabling NEXT_BUDDY will affect latency but not fairness.
  	 */
--	if (!left || (curr && entity_before(curr, left)))
--		left = curr;
--
--	se = left; /* ideally we run the leftmost entity */
--
--	/*
--	 * Avoid running the skip buddy, if running something else can
--	 * be done without getting too unfair.
--	 */
--	if (cfs_rq->skip && cfs_rq->skip == se) {
--		struct sched_entity *second;
--
--		if (se == curr) {
--			second = __pick_first_entity(cfs_rq);
--		} else {
--			second = __pick_next_entity(se);
--			if (!second || (curr && entity_before(curr, second)))
--				second = curr;
--		}
--
--		if (second && wakeup_preempt_entity(second, left) < 1)
--			se = second;
--	}
--
--	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) {
--		/*
--		 * Someone really wants this to run. If it's not unfair, run it.
--		 */
--		se = cfs_rq->next;
--	} else if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) {
--		/*
--		 * Prefer last buddy, try to return the CPU to a preempted task.
--		 */
--		se = cfs_rq->last;
--	}
-+	if (sched_feat(NEXT_BUDDY) &&
-+	    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
-+		return cfs_rq->next;
+ 	if (sched_feat(NEXT_BUDDY) &&
+-	    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
++	    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next) &&
++			wakeup_preempt_entity(cfs_rq->next, candidate) < 1)
+ 		return cfs_rq->next;
  
--	return se;
-+	return pick_eevdf(cfs_rq);
+-	return pick_eevdf(cfs_rq);
++	return candidate;
  }
  
  static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
-@@ -5161,8 +5251,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
- 	/* throttle cfs_rqs exceeding runtime */
- 	check_cfs_rq_runtime(cfs_rq);
- 
--	check_spread(cfs_rq, prev);
--
- 	if (prev->on_rq) {
- 		update_stats_wait_start_fair(cfs_rq, prev);
- 		/* Put 'current' back into the tree. */
-@@ -5203,9 +5291,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
- 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
- 		return;
- #endif
--
--	if (cfs_rq->nr_running > 1)
--		check_preempt_tick(cfs_rq, curr);
+@@ -6464,6 +6603,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 	hrtick_update(rq);
  }
  
- 
-@@ -6210,13 +6295,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
- static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
- {
- 	struct sched_entity *se = &p->se;
--	struct cfs_rq *cfs_rq = cfs_rq_of(se);
- 
- 	SCHED_WARN_ON(task_rq(p) != rq);
- 
- 	if (rq->cfs.h_nr_running > 1) {
--		u64 slice = sched_slice(cfs_rq, se);
- 		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
-+		u64 slice = se->slice;
- 		s64 delta = slice - ran;
- 
- 		if (delta < 0) {
-@@ -6240,8 +6324,7 @@ static void hrtick_update(struct rq *rq)
- 	if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
- 		return;
- 
--	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
--		hrtick_start_fair(rq, curr);
-+	hrtick_start_fair(rq, curr);
- }
- #else /* !CONFIG_SCHED_HRTICK */
- static inline void
-@@ -6282,17 +6365,6 @@ static int sched_idle_rq(struct rq *rq)
- 			rq->nr_running);
- }
- 
--/*
-- * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use
-- * of idle_nr_running, which does not consider idle descendants of normal
-- * entities.
-- */
--static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq)
--{
--	return cfs_rq->nr_running &&
--		cfs_rq->nr_running == cfs_rq->idle_nr_running;
--}
--
- #ifdef CONFIG_SMP
- static int sched_idle_cpu(int cpu)
- {
-@@ -7778,18 +7850,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
- {
- 	struct sched_entity *se = &p->se;
- 
--	/*
--	 * As blocked tasks retain absolute vruntime the migration needs to
--	 * deal with this by subtracting the old and adding the new
--	 * min_vruntime -- the latter is done by enqueue_entity() when placing
--	 * the task on the new runqueue.
--	 */
--	if (READ_ONCE(p->__state) == TASK_WAKING) {
--		struct cfs_rq *cfs_rq = cfs_rq_of(se);
--
--		se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
--	}
--
- 	if (!task_on_rq_migrating(p)) {
- 		remove_entity_load_avg(se);
- 
-@@ -7827,66 +7887,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
- }
- #endif /* CONFIG_SMP */
- 
--static unsigned long wakeup_gran(struct sched_entity *se)
--{
--	unsigned long gran = sysctl_sched_wakeup_granularity;
--
--	/*
--	 * Since its curr running now, convert the gran from real-time
--	 * to virtual-time in his units.
--	 *
--	 * By using 'se' instead of 'curr' we penalize light tasks, so
--	 * they get preempted easier. That is, if 'se' < 'curr' then
--	 * the resulting gran will be larger, therefore penalizing the
--	 * lighter, if otoh 'se' > 'curr' then the resulting gran will
--	 * be smaller, again penalizing the lighter task.
--	 *
--	 * This is especially important for buddies when the leftmost
--	 * task is higher priority than the buddy.
--	 */
--	return calc_delta_fair(gran, se);
--}
--
--/*
-- * Should 'se' preempt 'curr'.
-- *
-- *             |s1
-- *        |s2
-- *   |s3
-- *         g
-- *      |<--->|c
-- *
-- *  w(c, s1) = -1
-- *  w(c, s2) =  0
-- *  w(c, s3) =  1
-- *
-- */
--static int
--wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
--{
--	s64 gran, vdiff = curr->vruntime - se->vruntime;
--
--	if (vdiff <= 0)
--		return -1;
--
--	gran = wakeup_gran(se);
--	if (vdiff > gran)
--		return 1;
--
--	return 0;
--}
--
--static void set_last_buddy(struct sched_entity *se)
--{
--	for_each_sched_entity(se) {
--		if (SCHED_WARN_ON(!se->on_rq))
--			return;
--		if (se_is_idle(se))
--			return;
--		cfs_rq_of(se)->last = se;
--	}
--}
--
- static void set_next_buddy(struct sched_entity *se)
- {
- 	for_each_sched_entity(se) {
-@@ -7898,12 +7898,6 @@ static void set_next_buddy(struct sched_entity *se)
- 	}
- }
- 
--static void set_skip_buddy(struct sched_entity *se)
--{
--	for_each_sched_entity(se)
--		cfs_rq_of(se)->skip = se;
--}
--
- /*
-  * Preempt the current task with a newly woken task if needed:
-  */
-@@ -7912,7 +7906,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
- 	struct task_struct *curr = rq->curr;
- 	struct sched_entity *se = &curr->se, *pse = &p->se;
- 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
--	int scale = cfs_rq->nr_running >= sched_nr_latency;
- 	int next_buddy_marked = 0;
- 	int cse_is_idle, pse_is_idle;
- 
-@@ -7928,7 +7921,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
- 	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
- 		return;
- 
--	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
-+	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
- 		set_next_buddy(pse);
- 		next_buddy_marked = 1;
- 	}
-@@ -7973,35 +7966,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
- 	if (cse_is_idle != pse_is_idle)
- 		return;
- 
--	update_curr(cfs_rq_of(se));
--	if (wakeup_preempt_entity(se, pse) == 1) {
--		/*
--		 * Bias pick_next to pick the sched entity that is
--		 * triggering this preemption.
--		 */
--		if (!next_buddy_marked)
--			set_next_buddy(pse);
-+	cfs_rq = cfs_rq_of(se);
-+	update_curr(cfs_rq);
-+
-+	/*
-+	 * XXX pick_eevdf(cfs_rq) != se ?
-+	 */
-+	if (pick_eevdf(cfs_rq) == pse)
- 		goto preempt;
--	}
- 
- 	return;
- 
- preempt:
- 	resched_curr(rq);
--	/*
--	 * Only set the backward buddy when the current task is still
--	 * on the rq. This can happen when a wakeup gets interleaved
--	 * with schedule on the ->pre_schedule() or idle_balance()
--	 * point, either of which can * drop the rq lock.
--	 *
--	 * Also, during early boot the idle thread is in the fair class,
--	 * for obvious reasons its a bad idea to schedule back to it.
--	 */
--	if (unlikely(!se->on_rq || curr == rq->idle))
--		return;
--
--	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
--		set_last_buddy(se);
- }
- 
- #ifdef CONFIG_SMP
-@@ -8202,8 +8179,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
- 
- /*
-  * sched_yield() is very simple
-- *
-- * The magic of dealing with the ->skip buddy is in pick_next_entity.
-  */
- static void yield_task_fair(struct rq *rq)
- {
-@@ -8219,21 +8194,19 @@ static void yield_task_fair(struct rq *rq)
- 
- 	clear_buddies(cfs_rq, se);
- 
--	if (curr->policy != SCHED_BATCH) {
--		update_rq_clock(rq);
--		/*
--		 * Update run-time statistics of the 'current'.
--		 */
--		update_curr(cfs_rq);
--		/*
--		 * Tell update_rq_clock() that we've just updated,
--		 * so we don't do microscopic update in schedule()
--		 * and double the fastpath cost.
--		 */
--		rq_clock_skip_update(rq);
--	}
-+	update_rq_clock(rq);
-+	/*
-+	 * Update run-time statistics of the 'current'.
-+	 */
-+	update_curr(cfs_rq);
-+	/*
-+	 * Tell update_rq_clock() that we've just updated,
-+	 * so we don't do microscopic update in schedule()
-+	 * and double the fastpath cost.
-+	 */
-+	rq_clock_skip_update(rq);
- 
--	set_skip_buddy(se);
-+	se->deadline += calc_delta_fair(se->slice, se);
- }
- 
- static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
-@@ -8476,8 +8449,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
- 	 * Buddy candidates are cache hot:
- 	 */
- 	if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
--			(&p->se == cfs_rq_of(&p->se)->next ||
--			 &p->se == cfs_rq_of(&p->se)->last))
-+	    (&p->se == cfs_rq_of(&p->se)->next))
- 		return 1;
- 
- 	if (sysctl_sched_migration_cost == -1)
-@@ -11987,8 +11959,8 @@ static void rq_offline_fair(struct rq *rq)
- static inline bool
- __entity_slice_used(struct sched_entity *se, int min_nr_tasks)
- {
--	u64 slice = sched_slice(cfs_rq_of(se), se);
- 	u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
-+	u64 slice = se->slice;
- 
- 	return (rtime * min_nr_tasks > slice);
- }
-@@ -12144,8 +12116,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
-  */
- static void task_fork_fair(struct task_struct *p)
- {
--	struct cfs_rq *cfs_rq;
- 	struct sched_entity *se = &p->se, *curr;
-+	struct cfs_rq *cfs_rq;
- 	struct rq *rq = this_rq();
- 	struct rq_flags rf;
- 
-@@ -12154,22 +12126,9 @@ static void task_fork_fair(struct task_struct *p)
- 
- 	cfs_rq = task_cfs_rq(current);
- 	curr = cfs_rq->curr;
--	if (curr) {
-+	if (curr)
- 		update_curr(cfs_rq);
--		se->vruntime = curr->vruntime;
--	}
--	place_entity(cfs_rq, se, 1);
--
--	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
--		/*
--		 * Upon rescheduling, sched_class::put_prev_task() will place
--		 * 'current' within the tree based on its new key value.
--		 */
--		swap(curr->vruntime, se->vruntime);
--		resched_curr(rq);
--	}
--
--	se->vruntime -= cfs_rq->min_vruntime;
-+	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
- 	rq_unlock(rq, &rf);
- }
- 
-@@ -12198,34 +12157,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
- 		check_preempt_curr(rq, p, 0);
- }
- 
--static inline bool vruntime_normalized(struct task_struct *p)
--{
--	struct sched_entity *se = &p->se;
--
--	/*
--	 * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
--	 * the dequeue_entity(.flags=0) will already have normalized the
--	 * vruntime.
--	 */
--	if (p->on_rq)
--		return true;
--
--	/*
--	 * When !on_rq, vruntime of the task has usually NOT been normalized.
--	 * But there are some cases where it has already been normalized:
--	 *
--	 * - A forked child which is waiting for being woken up by
--	 *   wake_up_new_task().
--	 * - A task which has been woken up by try_to_wake_up() and
--	 *   waiting for actually being woken up by sched_ttwu_pending().
--	 */
--	if (!se->sum_exec_runtime ||
--	    (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup))
--		return true;
--
--	return false;
--}
--
- #ifdef CONFIG_FAIR_GROUP_SCHED
- /*
-  * Propagate the changes of the sched_entity across the tg tree to make it
-@@ -12296,16 +12227,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
- static void detach_task_cfs_rq(struct task_struct *p)
- {
- 	struct sched_entity *se = &p->se;
--	struct cfs_rq *cfs_rq = cfs_rq_of(se);
--
--	if (!vruntime_normalized(p)) {
--		/*
--		 * Fix up our vruntime so that the current sleep doesn't
--		 * cause 'unlimited' sleep bonus.
--		 */
--		place_entity(cfs_rq, se, 0);
--		se->vruntime -= cfs_rq->min_vruntime;
--	}
- 
- 	detach_entity_cfs_rq(se);
- }
-@@ -12313,12 +12234,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
- static void attach_task_cfs_rq(struct task_struct *p)
- {
- 	struct sched_entity *se = &p->se;
--	struct cfs_rq *cfs_rq = cfs_rq_of(se);
- 
- 	attach_entity_cfs_rq(se);
--
--	if (!vruntime_normalized(p))
--		se->vruntime += cfs_rq->min_vruntime;
- }
- 
- static void switched_from_fair(struct rq *rq, struct task_struct *p)
-@@ -12429,6 +12346,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
- 		goto err;
- 
- 	tg->shares = NICE_0_LOAD;
-+	tg->latency_prio = DEFAULT_PRIO;
- 
- 	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
- 
-@@ -12527,6 +12445,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
- 	}
- 
- 	se->my_q = cfs_rq;
-+
-+	set_latency_fair(se, tg->latency_prio - MAX_RT_PRIO);
-+
- 	/* guarantee group entities always have weight */
- 	update_load_set(&se->load, NICE_0_LOAD);
- 	se->parent = parent;
-@@ -12657,6 +12578,29 @@ int sched_group_set_idle(struct task_group *tg, long idle)
- 	return 0;
- }
- 
-+int sched_group_set_latency(struct task_group *tg, int prio)
++static unsigned long wakeup_gran(struct sched_entity *se)
 +{
-+	int i;
++	unsigned long gran = sysctl_sched_wakeup_granularity;
++#ifdef CONFIG_SCHED_BORE
++	return calc_delta_fair_unscaled(gran, se);
++#else // CONFIG_SCHED_BORE
++	return calc_delta_fair(gran, se);
++#endif // CONFIG_SCHED_BORE
++}
 +
-+	if (tg == &root_task_group)
-+		return -EINVAL;
-+
-+	mutex_lock(&shares_mutex);
-+
-+	if (tg->latency_prio == prio) {
-+		mutex_unlock(&shares_mutex);
-+		return 0;
++static int
++wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
++{
++	s64 gran, vdiff = curr->vruntime - se->vruntime;
++#ifdef CONFIG_SCHED_BORE
++	if (sched_bore) {
++		u64 rtime = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
++		vdiff += wakeup_preempt_backstep_delta(rtime, curr)
++		       - wakeup_preempt_backstep_delta(rtime, se);
 +	}
++#endif // CONFIG_SCHED_BORE
 +
-+	tg->latency_prio = prio;
++	if (vdiff <= 0)
++		return -1;
 +
-+	for_each_possible_cpu(i)
-+		set_latency_fair(tg->se[i], prio - MAX_RT_PRIO);
++	gran = wakeup_gran(se);
++	if (vdiff > gran)
++		return 1;
 +
-+	mutex_unlock(&shares_mutex);
 +	return 0;
 +}
 +
- #else /* CONFIG_FAIR_GROUP_SCHED */
+ static void set_next_buddy(struct sched_entity *se);
  
- void free_fair_sched_group(struct task_group *tg) { }
-@@ -12683,7 +12627,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
- 	 * idle runqueue:
+ /*
+@@ -6482,6 +6653,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 	util_est_dequeue(&rq->cfs, p);
+ 
+ 	for_each_sched_entity(se) {
++#ifdef CONFIG_SCHED_BORE
++		if (task_sleep) restart_burst(se);
++#endif // CONFIG_SCHED_BORE
+ 		cfs_rq = cfs_rq_of(se);
+ 		dequeue_entity(cfs_rq, se, flags);
+ 
+@@ -7972,7 +8146,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ 	/*
+ 	 * XXX pick_eevdf(cfs_rq) != se ?
  	 */
- 	if (rq->cfs.load.weight)
--		rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
-+		rr_interval = NS_TO_JIFFIES(se->slice);
+-	if (pick_eevdf(cfs_rq) == pse)
++	if ((pick_eevdf(cfs_rq) == pse) && (wakeup_preempt_entity(se, pse) == 1))
+ 		goto preempt;
  
- 	return rr_interval;
- }
+ 	return;
+@@ -8185,6 +8359,9 @@ static void yield_task_fair(struct rq *rq)
+ 	struct task_struct *curr = rq->curr;
+ 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ 	struct sched_entity *se = &curr->se;
++#ifdef CONFIG_SCHED_BORE
++	restart_burst(se);
++#endif // CONFIG_SCHED_BORE
+ 
+ 	/*
+ 	 * Are we the only task in the tree?
 diff --git a/kernel/sched/features.h b/kernel/sched/features.h
-index 9e390eb82e38..ca95044a7479 100644
+index ca95044a7..a7d34d1b2 100644
 --- a/kernel/sched/features.h
 +++ b/kernel/sched/features.h
-@@ -1,16 +1,12 @@
- /* SPDX-License-Identifier: GPL-2.0 */
--/*
-- * Only give sleepers 50% of their service deficit. This allows
-- * them to run sooner, but does not allow tons of sleepers to
-- * rip the spread apart.
-- */
--SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
- 
- /*
-- * Place new tasks ahead so that they do not starve already running
-- * tasks
-+ * Using the avg_vruntime, do the right thing and preserve lag across
-+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
-  */
--SCHED_FEAT(START_DEBIT, true)
-+SCHED_FEAT(PLACE_LAG, true)
-+SCHED_FEAT(PLACE_FUDGE, true)
-+SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
- 
- /*
-  * Prefer to schedule the task we woke last (assuming it failed
-@@ -19,13 +15,6 @@ SCHED_FEAT(START_DEBIT, true)
+@@ -13,7 +13,11 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
+  * wakeup-preemption), since its likely going to consume data we
+  * touched, increases cache locality.
   */
++#ifdef CONFIG_SCHED_BORE
++SCHED_FEAT(NEXT_BUDDY, true)
++#else // CONFIG_SCHED_BORE
  SCHED_FEAT(NEXT_BUDDY, false)
++#endif // CONFIG_SCHED_BORE
  
--/*
-- * Prefer to schedule the task that ran last (when we did
-- * wake-preempt) as that likely will touch the same data, increases
-- * cache locality.
-- */
--SCHED_FEAT(LAST_BUDDY, true)
--
  /*
   * Consider buddies to be cache hot, decreases the likeliness of a
-  * cache buddy being migrated away, increases cache locality.
-@@ -99,6 +88,3 @@ SCHED_FEAT(UTIL_EST, true)
- SCHED_FEAT(UTIL_EST_FASTUP, true)
- 
- SCHED_FEAT(LATENCY_WARN, false)
--
--SCHED_FEAT(ALT_PERIOD, true)
--SCHED_FEAT(BASE_SLICE, true)
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index d8ba81c66579..0ea13cfac95b 100644
+index 0ea13cfac..34cb2fbbb 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -372,6 +372,8 @@ struct task_group {
- 
- 	/* A positive value indicates that this is a SCHED_IDLE group. */
- 	int			idle;
-+	/* latency priority of the group. */
-+	int			latency_prio;
- 
- #ifdef	CONFIG_SMP
- 	/*
-@@ -482,6 +484,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
- 
- extern int sched_group_set_idle(struct task_group *tg, long idle);
- 
-+extern int sched_group_set_latency(struct task_group *tg, int prio);
-+
- #ifdef CONFIG_SMP
- extern void set_task_rq_fair(struct sched_entity *se,
- 			     struct cfs_rq *prev, struct cfs_rq *next);
-@@ -548,6 +552,10 @@ struct cfs_rq {
- 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
- 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
- 
-+	s64			avg_vruntime;
-+	u64			avg_slice;
-+	u64			avg_load;
-+
- 	u64			exec_clock;
- 	u64			min_vruntime;
- #ifdef CONFIG_SCHED_CORE
-@@ -567,8 +575,6 @@ struct cfs_rq {
- 	 */
- 	struct sched_entity	*curr;
- 	struct sched_entity	*next;
--	struct sched_entity	*last;
--	struct sched_entity	*skip;
- 
- #ifdef	CONFIG_SCHED_DEBUG
- 	unsigned int		nr_spread_over;
-@@ -2167,6 +2173,7 @@ extern const u32		sched_prio_to_wmult[40];
- #else
- #define ENQUEUE_MIGRATED	0x00
- #endif
-+#define ENQUEUE_INITIAL		0x80
- 
- #define RETRY_TASK		((void *)-1UL)
- 
-@@ -2471,11 +2478,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
- extern const_debug unsigned int sysctl_sched_nr_migrate;
+@@ -2479,6 +2479,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate;
  extern const_debug unsigned int sysctl_sched_migration_cost;
  
-+extern unsigned int sysctl_sched_base_slice;
-+
+ extern unsigned int sysctl_sched_base_slice;
++extern unsigned int sysctl_sched_wakeup_granularity;
+ 
  #ifdef CONFIG_SCHED_DEBUG
--extern unsigned int sysctl_sched_latency;
--extern unsigned int sysctl_sched_min_granularity;
--extern unsigned int sysctl_sched_idle_min_granularity;
--extern unsigned int sysctl_sched_wakeup_granularity;
  extern int sysctl_resched_latency_warn_ms;
- extern int sysctl_resched_latency_warn_once;
- 
-@@ -2488,6 +2493,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
- extern unsigned int sysctl_numa_balancing_hot_threshold;
- #endif
- 
-+extern void set_latency_fair(struct sched_entity *se, int prio);
-+
- #ifdef CONFIG_SCHED_HRTICK
- 
- /*
-@@ -3496,4 +3503,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
- static inline void init_sched_mm_cid(struct task_struct *t) { }
- #endif
- 
-+extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
-+extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
-+
- #endif /* _KERNEL_SCHED_SCHED_H */
-diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
-index 3bac0a8ceab2..b2e932c25be6 100644
---- a/tools/include/uapi/linux/sched.h
-+++ b/tools/include/uapi/linux/sched.h
-@@ -132,6 +132,7 @@ struct clone_args {
- #define SCHED_FLAG_KEEP_PARAMS		0x10
- #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
- #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
-+#define SCHED_FLAG_LATENCY_NICE		0x80
- 
- #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \
- 				 SCHED_FLAG_KEEP_PARAMS)
-@@ -143,6 +144,7 @@ struct clone_args {
- 			 SCHED_FLAG_RECLAIM		| \
- 			 SCHED_FLAG_DL_OVERRUN		| \
- 			 SCHED_FLAG_KEEP_ALL		| \
--			 SCHED_FLAG_UTIL_CLAMP)
-+			 SCHED_FLAG_UTIL_CLAMP		| \
-+			 SCHED_FLAG_LATENCY_NICE)
- 
- #endif /* _UAPI_LINUX_SCHED_H */
 -- 
-2.41.0
+2.41.0.rc2
diff --git a/patches/0003-bore.patch b/patches/0003-bore.patch
new file mode 100644
index 0000000..64bf802
--- /dev/null
+++ b/patches/0003-bore.patch
@@ -0,0 +1,421 @@
+From 32c617afc05751783be3eb0f5a1d15e31dfc7919 Mon Sep 17 00:00:00 2001
+From: Piotr Gorski <lucjan.lucjanov@gmail.com>
+Date: Thu, 8 Jun 2023 11:14:23 +0200
+Subject: [PATCH] bore-cachy
+
+Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
+---
+ include/linux/sched.h   |  10 +++
+ init/Kconfig            |  20 ++++++
+ kernel/sched/core.c     |  62 ++++++++++++++++++
+ kernel/sched/debug.c    |   3 +
+ kernel/sched/fair.c     | 136 ++++++++++++++++++++++++++++++++++++++++
+ kernel/sched/features.h |   8 +++
+ 6 files changed, 239 insertions(+)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index eed5d65b8..38fbebe4d 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -557,6 +557,12 @@ struct sched_entity {
+ 	u64				sum_exec_runtime;
+ 	u64				vruntime;
+ 	u64				prev_sum_exec_runtime;
++#ifdef CONFIG_SCHED_BORE
++	u64				prev_burst_time;
++	u64				burst_time;
++	u64				max_burst_time;
++	u8				penalty_score;
++#endif // CONFIG_SCHED_BORE
+ 
+ 	u64				nr_migrations;
+ 
+@@ -985,6 +991,10 @@ struct task_struct {
+ 	struct list_head		children;
+ 	struct list_head		sibling;
+ 	struct task_struct		*group_leader;
++#ifdef CONFIG_SCHED_BORE
++	u64	child_burst_cache;
++	u64	child_burst_last_cached;
++#endif // CONFIG_SCHED_BORE
+ 
+ 	/*
+ 	 * 'ptraced' is the list of tasks this task is using ptrace() on.
+diff --git a/init/Kconfig b/init/Kconfig
+index 0147b4a33..4ab7e154b 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1290,6 +1290,26 @@ config CHECKPOINT_RESTORE
+ 
+ 	  If unsure, say N here.
+ 
++config SCHED_BORE
++	bool "Burst-Oriented Response Enhancer"
++	default y
++	help
++	  In Desktop and Mobile computing, one might prefer interactive
++	  tasks to keep responsive no matter what they run in the background.
++
++	  Enabling this kernel feature modifies the scheduler to discriminate
++	  tasks by their burst time (runtime since it last went sleeping or
++	  yielding state) and prioritize those that run less bursty.
++	  Such tasks usually include window compositor, widgets backend,
++	  terminal emulator, video playback, games and so on.
++	  With a little impact to scheduling fairness, it may improve
++	  responsiveness especially under heavy background workload.
++
++	  You can turn it off by setting the sysctl kernel.sched_bore = 0.
++	  Enabling this feature implies NO_GENTLE_FAIR_SLEEPERS by default.
++
++	  If unsure say Y here.
++
+ config SCHED_AUTOGROUP
+ 	bool "Automatic process group scheduling"
+ 	select CGROUPS
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index bcb3a7e68..a0f227344 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4484,6 +4484,57 @@ int wake_up_state(struct task_struct *p, unsigned int state)
+ 	return try_to_wake_up(p, state, 0);
+ }
+ 
++#ifdef CONFIG_SCHED_BORE
++#define CHILD_BURST_CUTOFF_BITS 9
++extern unsigned int sched_burst_cache_lifetime;
++
++void __init sched_init_bore(void) {
++	init_task.child_burst_cache = 0;
++	init_task.child_burst_last_cached = 0;
++	init_task.se.prev_burst_time = 0;
++	init_task.se.burst_time = 0;
++	init_task.se.max_burst_time = 0;
++}
++
++void inline __sched_fork_bore(struct task_struct *p) {
++	p->child_burst_cache = 0;
++	p->child_burst_last_cached = 0;
++	p->se.burst_time = 0;
++}
++
++static inline void update_task_child_burst_time_cache(struct task_struct *p) {
++	u64 sum = 0, avg_burst_time = 0;
++	u32 cnt = 0;
++	struct task_struct *child;
++
++	read_lock(&tasklist_lock);
++	list_for_each_entry(child, &p->children, sibling) {
++		cnt++;
++		sum += child->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS;
++	}
++	read_unlock(&tasklist_lock);
++
++	if (cnt) avg_burst_time = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS;
++	p->child_burst_cache = max(avg_burst_time, p->se.max_burst_time);
++}
++
++static void update_task_initial_burst_time(struct task_struct *task) {
++	struct sched_entity *se = &task->se;
++	struct task_struct *par = task->real_parent;
++	u64 ktime = ktime_to_ns(ktime_get());
++
++	if (likely(par)) {
++		if (par->child_burst_last_cached + sched_burst_cache_lifetime < ktime) {
++			par->child_burst_last_cached = ktime;
++			update_task_child_burst_time_cache(par);
++		}
++		se->prev_burst_time = max(se->prev_burst_time, par->child_burst_cache);
++	}
++
++	se->max_burst_time = se->prev_burst_time;
++}
++#endif // CONFIG_SCHED_BORE
++
+ /*
+  * Perform scheduler related setup for a newly forked process p.
+  * p is forked by current.
+@@ -4500,6 +4551,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	p->se.prev_sum_exec_runtime	= 0;
+ 	p->se.nr_migrations		= 0;
+ 	p->se.vruntime			= 0;
++#ifdef CONFIG_SCHED_BORE
++	__sched_fork_bore(p);
++#endif // CONFIG_SCHED_BORE
+ 	INIT_LIST_HEAD(&p->se.group_node);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -4726,6 +4780,9 @@ late_initcall(sched_core_sysctl_init);
+ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ {
+ 	__sched_fork(clone_flags, p);
++#ifdef CONFIG_SCHED_BORE
++	update_task_initial_burst_time(p);
++#endif // CONFIG_SCHED_BORE
+ 	/*
+ 	 * We mark the process as NEW here. This guarantees that
+ 	 * nobody will actually run it, and a signal or other external
+@@ -9922,6 +9979,11 @@ void __init sched_init(void)
+ 	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
+ #endif
+ 
++#ifdef CONFIG_SCHED_BORE
++	sched_init_bore();
++	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.4.0 by Masahito Suzuki");
++#endif // CONFIG_SCHED_BORE
++
+ 	wait_bit_init();
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index 066ff1c8a..4bc07d405 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -593,6 +593,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
+ 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
+ 
++#ifdef CONFIG_SCHED_BORE
++	SEQ_printf(m, " %2d", p->se.penalty_score);
++#endif
+ #ifdef CONFIG_NUMA_BALANCING
+ 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
+ #endif
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 9fe8288b1..ac29ac350 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -19,6 +19,9 @@
+  *
+  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
+  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
++ *
++ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
++ *  Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com>
+  */
+ #include <linux/energy_model.h>
+ #include <linux/mmap_lock.h>
+@@ -140,6 +143,61 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
+ 
+ const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
+ 
++#ifdef CONFIG_SCHED_BORE
++unsigned int __read_mostly sched_bore                 = 1;
++unsigned int __read_mostly sched_burst_cache_lifetime = 15000000;
++unsigned int __read_mostly sched_burst_penalty_offset = 12;
++unsigned int __read_mostly sched_burst_penalty_scale  = 1292;
++unsigned int __read_mostly sched_burst_smoothness     = 1;
++static int three          = 3;
++static int sixty_four     = 64;
++static int maxval_12_bits = 4095;
++
++#define FIXED_SHIFT 10
++#define FIXED_ONE (1 << FIXED_SHIFT)
++typedef u32 fixed;
++
++static void update_burst_score(struct sched_entity *se) {
++	u64 burst_time = se->max_burst_time;
++
++	int msb = fls64(burst_time);
++	fixed integer_part = msb << FIXED_SHIFT;
++	fixed fractional_part = burst_time << (64 - msb) << 1 >> (64 - FIXED_SHIFT);
++	fixed greed = integer_part | fractional_part;
++
++	fixed tolerance = sched_burst_penalty_offset << FIXED_SHIFT;
++	fixed penalty = max(0, (s32)greed - (s32)tolerance);
++	fixed scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
++	
++	u8 score = min(39U, scaled_penalty >> FIXED_SHIFT);
++	se->penalty_score = score;
++}
++
++static inline u64 penalty_scale(u64 delta, struct sched_entity *se) {
++	return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->penalty_score], 22);
++}
++
++static inline u64 __binary_smooth(u64 new, u64 old, unsigned int smoothness) {
++	return (new + old * ((1 << smoothness) - 1)) >> smoothness;
++}
++
++void restart_burst(struct sched_entity *se) {
++	se->max_burst_time = se->prev_burst_time = __binary_smooth(
++		se->burst_time, se->prev_burst_time, sched_burst_smoothness);
++	se->burst_time = 0;
++}
++
++#define calc_delta_fair(delta, se) __calc_delta_fair(delta, se, true)
++#define calc_delta_fair_unscaled(delta, se) __calc_delta_fair(delta, se, false)
++static inline u64 
++__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale);
++
++static s64 wakeup_preempt_backstep_delta(u64 rtime, struct sched_entity *se) {
++	u64 delta = calc_delta_fair_unscaled(rtime, se);
++	return delta - penalty_scale(delta, se);
++}
++#endif // CONFIG_SCHED_BORE
++
+ int sched_thermal_decay_shift;
+ static int __init setup_sched_thermal_decay_shift(char *str)
+ {
+@@ -203,6 +261,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
+ 
+ #ifdef CONFIG_SYSCTL
+ static struct ctl_table sched_fair_sysctls[] = {
++#ifdef CONFIG_SCHED_BORE
++	{
++		.procname	= "sched_bore",
++		.data		= &sched_bore,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= SYSCTL_ONE,
++	},
++	{
++		.procname	= "sched_burst_cache_lifetime",
++		.data		= &sched_burst_cache_lifetime,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler = proc_dointvec,
++	},
++	{
++		.procname	= "sched_burst_penalty_offset",
++		.data		= &sched_burst_penalty_offset,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &sixty_four,
++	},
++	{
++		.procname	= "sched_burst_penalty_scale",
++		.data		= &sched_burst_penalty_scale,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &maxval_12_bits,
++	},
++	{
++		.procname	= "sched_burst_smoothness",
++		.data		= &sched_burst_smoothness,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &three,
++	},
++#endif // CONFIG_SCHED_BORE
+ 	{
+ 		.procname       = "sched_child_runs_first",
+ 		.data           = &sysctl_sched_child_runs_first,
+@@ -724,11 +827,19 @@ int sched_update_scaling(void)
+ /*
+  * delta /= w
+  */
++#ifdef CONFIG_SCHED_BORE
++static inline u64 
++__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale)
++#else // CONFIG_SCHED_BORE
+ static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
++#endif // CONFIG_SCHED_BORE
+ {
+ 	if (unlikely(se->load.weight != NICE_0_LOAD))
+ 		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+ 
++#ifdef CONFIG_SCHED_BORE
++	if (bscale && sched_bore) delta = penalty_scale(delta, se);
++#endif // CONFIG_SCHED_BORE
+ 	return delta;
+ }
+ 
+@@ -938,6 +1049,14 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ 	curr->sum_exec_runtime += delta_exec;
+ 	schedstat_add(cfs_rq->exec_clock, delta_exec);
+ 
++#ifdef CONFIG_SCHED_BORE
++	curr->burst_time += delta_exec;
++	curr->max_burst_time = max(curr->max_burst_time, curr->burst_time);
++	update_burst_score(curr);
++	if (sched_bore)
++		curr->vruntime += penalty_scale(calc_delta_fair(delta_exec, curr), curr);
++	else
++#endif // CONFIG_SCHED_BORE
+ 	curr->vruntime += calc_delta_fair(delta_exec, curr);
+ 	update_min_vruntime(cfs_rq);
+ 
+@@ -6410,6 +6529,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 	util_est_dequeue(&rq->cfs, p);
+ 
+ 	for_each_sched_entity(se) {
++#ifdef CONFIG_SCHED_BORE
++		if (task_sleep) restart_burst(se);
++#endif // CONFIG_SCHED_BORE
+ 		cfs_rq = cfs_rq_of(se);
+ 		dequeue_entity(cfs_rq, se, flags);
+ 
+@@ -7844,7 +7966,11 @@ static unsigned long wakeup_gran(struct sched_entity *se)
+ 	 * This is especially important for buddies when the leftmost
+ 	 * task is higher priority than the buddy.
+ 	 */
++#ifdef CONFIG_SCHED_BORE
++	return calc_delta_fair_unscaled(gran, se);
++#else // CONFIG_SCHED_BORE
+ 	return calc_delta_fair(gran, se);
++#endif // CONFIG_SCHED_BORE
+ }
+ 
+ /*
+@@ -7865,6 +7991,13 @@ static int
+ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+ {
+ 	s64 gran, vdiff = curr->vruntime - se->vruntime;
++#ifdef CONFIG_SCHED_BORE
++	if (sched_bore) {
++		u64 rtime = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
++		vdiff += wakeup_preempt_backstep_delta(rtime, curr)
++		       - wakeup_preempt_backstep_delta(rtime, se);
++	}
++#endif // CONFIG_SCHED_BORE
+ 
+ 	if (vdiff <= 0)
+ 		return -1;
+@@ -8210,6 +8343,9 @@ static void yield_task_fair(struct rq *rq)
+ 	struct task_struct *curr = rq->curr;
+ 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ 	struct sched_entity *se = &curr->se;
++#ifdef CONFIG_SCHED_BORE
++	restart_burst(se);
++#endif // CONFIG_SCHED_BORE
+ 
+ 	/*
+ 	 * Are we the only task in the tree?
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index 9e390eb82..696ea7081 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -4,7 +4,11 @@
+  * them to run sooner, but does not allow tons of sleepers to
+  * rip the spread apart.
+  */
++#ifdef CONFIG_SCHED_BORE
++SCHED_FEAT(GENTLE_FAIR_SLEEPERS, false)
++#else // CONFIG_SCHED_BORE
+ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
++#endif // CONFIG_SCHED_BORE
+ 
+ /*
+  * Place new tasks ahead so that they do not starve already running
+@@ -17,7 +21,11 @@ SCHED_FEAT(START_DEBIT, true)
+  * wakeup-preemption), since its likely going to consume data we
+  * touched, increases cache locality.
+  */
++#ifdef CONFIG_SCHED_BORE
++SCHED_FEAT(NEXT_BUDDY, true)
++#else // CONFIG_SCHED_BORE
+ SCHED_FEAT(NEXT_BUDDY, false)
++#endif // CONFIG_SCHED_BORE
+ 
+ /*
+  * Prefer to schedule the task that ran last (when we did
+-- 
+2.41.0.rc2
diff --git a/patches/0003-Allow-to-set-custom-USB-pollrate-for-specific-device.patch b/patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
similarity index 100%
rename from patches/0003-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
rename to patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch
diff --git a/patches/0004-amdgpu-si-cik-default.patch b/patches/0005-amdgpu-si-cik-default.patch
similarity index 100%
rename from patches/0004-amdgpu-si-cik-default.patch
rename to patches/0005-amdgpu-si-cik-default.patch
diff --git a/scripts/build.sh b/scripts/build.sh
index 7b8b207..c4c2314 100755
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -2,4 +2,4 @@
 
 echo "Pika Kernel - Building"
 
-make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-5
+make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-1
diff --git a/scripts/patch.sh b/scripts/patch.sh
index 9a9e251..cd2c0b3 100755
--- a/scripts/patch.sh
+++ b/scripts/patch.sh
@@ -7,8 +7,10 @@ echo "Pika Kernel - Applying patches"
 patch -Np1 < "../patches/0001-cachy-all.patch"
 # orig patch from cachy
 patch -Np1 < "../patches/0002-eevdf.patch"
+# orig patch from cachy
+patch -Np1 < "../patches/0003-bore.patch"
 # Nobara patches are here: https://github.com/sammilucia/nobara-kernel-fork
 # Allow setting custom pollrates for usb devices
-patch -Np1 < "../patches/0003-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
+patch -Np1 < "../patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
 # Allow pre polaris cards to use the amdgpu kernel module
-patch -Np1 < "../patches/0004-amdgpu-si-cik-default.patch"
+patch -Np1 < "../patches/0005-amdgpu-si-cik-default.patch"
diff --git a/scripts/source.sh b/scripts/source.sh
index 6156c9f..43f38ab 100755
--- a/scripts/source.sh
+++ b/scripts/source.sh
@@ -2,7 +2,7 @@
 
 echo "Pika Kernel - Getting source"
 
-wget -nv https://git.kernel.org/torvalds/t/linux-6.4-rc5.tar.gz
-tar -xf ./linux-6.4-rc5.tar.gz
+wget -nv https://git.kernel.org/torvalds/t/linux-6.4-rc6.tar.gz
+tar -xf ./linux-6.4-rc6.tar.gz
 
-cd linux-6.4-rc5
+cd linux-6.4-rc6