6.4RC6 + Add BORE/EEVDF

This commit is contained in:
ferrreo 2023-06-14 20:49:41 +01:00
parent f2b5283ae1
commit 3ad8571d9c
9 changed files with 1114 additions and 2168 deletions

4
config
View File

@ -7067,7 +7067,7 @@ CONFIG_SND_SERIAL_U16550=m
CONFIG_SND_MPU401=m
CONFIG_SND_PORTMAN2X4=m
CONFIG_SND_AC97_POWER_SAVE=y
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=10
CONFIG_SND_SB_COMMON=m
CONFIG_SND_PCI=y
CONFIG_SND_AD1889=m
@ -7172,7 +7172,7 @@ CONFIG_SND_HDA_CODEC_CA0132_DSP=y
CONFIG_SND_HDA_CODEC_CMEDIA=m
CONFIG_SND_HDA_CODEC_SI3054=m
CONFIG_SND_HDA_GENERIC=m
CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1
CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10
CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
# CONFIG_SND_HDA_CTL_DEV_ID is not set
# end of HD-Audio

View File

@ -1,4 +1,4 @@
From e1cfa351424a722e33443e5c9a6a937034eb18bd Mon Sep 17 00:00:00 2001
From 8b27eca196447f74bfa5a346df10212b900ce82a Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Tue, 9 May 2023 18:38:36 +0200
Subject: [PATCH 1/8] bbr2
@ -51,7 +51,7 @@ index c2b15f7e5516..d85858efa571 100644
};
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18a038d16434..070d0aad5204 100644
index 5066e4586cf0..b34661204315 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
@ -62,7 +62,7 @@ index 18a038d16434..070d0aad5204 100644
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
@@ -823,6 +824,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
@@ -824,6 +825,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
return max_t(s64, t1 - t0, 0);
}
@ -74,7 +74,7 @@ index 18a038d16434..070d0aad5204 100644
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
return tcp_ns_to_ts(skb->skb_mstamp_ns);
@@ -898,9 +904,14 @@ struct tcp_skb_cb {
@@ -899,9 +905,14 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
@ -91,7 +91,7 @@ index 18a038d16434..070d0aad5204 100644
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -1026,7 +1037,11 @@ enum tcp_ca_ack_event_flags {
@@ -1027,7 +1038,11 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
@ -104,7 +104,7 @@ index 18a038d16434..070d0aad5204 100644
union tcp_cc_info;
@@ -1046,8 +1061,11 @@ struct ack_sample {
@@ -1047,8 +1062,11 @@ struct ack_sample {
*/
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
@ -116,7 +116,7 @@ index 18a038d16434..070d0aad5204 100644
s32 delivered; /* number of packets delivered over interval */
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
@@ -1061,6 +1079,7 @@ struct rate_sample {
@@ -1062,6 +1080,7 @@ struct rate_sample {
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
@ -124,7 +124,7 @@ index 18a038d16434..070d0aad5204 100644
};
struct tcp_congestion_ops {
@@ -1084,8 +1103,11 @@ struct tcp_congestion_ops {
@@ -1085,8 +1104,11 @@ struct tcp_congestion_ops {
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
@ -138,7 +138,7 @@ index 18a038d16434..070d0aad5204 100644
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
@@ -1151,6 +1173,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
@@ -1152,6 +1174,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
}
#endif
@ -153,7 +153,7 @@ index 18a038d16434..070d0aad5204 100644
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1170,6 +1200,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
@@ -1171,6 +1201,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
/* From tcp_rate.c */
@ -268,10 +268,10 @@ index b18ba8ef93ad..b4e3dcb27a20 100644
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a60f6f4e7cd9..158d0ed5a7c4 100644
index 8d20d9221238..99c2e0357dec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3186,6 +3186,7 @@ int tcp_disconnect(struct sock *sk, int flags)
@@ -3192,6 +3192,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->rx_opt.dsack = 0;
tp->rx_opt.num_sacks = 0;
tp->rcv_ooopack = 0;
@ -3037,7 +3037,7 @@ index 1b34050a7538..66d40449b3f4 100644
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 61b6710f337a..2efb52fbeee3 100644
index bf8b22218dd4..3ae56b0676a8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
@ -3269,10 +3269,10 @@ index a8f6d9d06f2e..a8b4c9504570 100644
rs->interval_us = max(snd_us, ack_us);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b839c2f91292..ae272ae2b707 100644
index 39eb947fe392..61ab4ee55b22 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -605,6 +605,7 @@ void tcp_write_timer_handler(struct sock *sk)
@@ -615,6 +615,7 @@ void tcp_write_timer_handler(struct sock *sk)
return;
}
@ -3283,7 +3283,7 @@ index b839c2f91292..ae272ae2b707 100644
--
2.41.0
From 2414bafa00ccf9c4dca1327546ff9cfa2f87676f Mon Sep 17 00:00:00 2001
From 4b326373e0295ad142c417ef510d28cc491e0e73 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Thu, 1 Jun 2023 16:35:02 +0200
Subject: [PATCH 2/8] cachy
@ -3411,7 +3411,7 @@ index 3c399f132e2d..a62ad01e6d11 100644
vmlinuz
voffset.h
diff --git a/Makefile b/Makefile
index 836643eaefee..161c4a3c9e3a 100644
index 0d3a9d3e73c1..f6ec2f6c57ca 100644
--- a/Makefile
+++ b/Makefile
@@ -818,6 +818,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
@ -9419,7 +9419,7 @@ index 38ef6d06888e..0f78364efd4f 100644
config SCHED_HRTICK
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..ee9b6e4cf16a 100644
index 41c964104b58..915ad6dae416 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -100,6 +100,10 @@
@ -9444,7 +9444,7 @@ index ed4e01daccaa..ee9b6e4cf16a 100644
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3422,6 +3430,12 @@ int ksys_unshare(unsigned long unshare_flags)
@@ -3419,6 +3427,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
@ -9758,7 +9758,7 @@ index ab0c5bd1a60f..f4989f706d7f 100644
--
2.41.0
From c6c9513db571d0b72d3a7c37aa010db70992b6a6 Mon Sep 17 00:00:00 2001
From d66ae67f1a8580742fdd5cda2e6dcade3cc770e0 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Tue, 9 May 2023 18:39:03 +0200
Subject: [PATCH 3/8] ddcci
@ -12558,7 +12558,7 @@ index 000000000000..a219f031e584
--
2.41.0
From 7341d3f2d650ef7c81ace77bbaed7aeedf6d124b Mon Sep 17 00:00:00 2001
From 72c060c5d2883853d8530a436380a788f74248b1 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Thu, 1 Jun 2023 16:35:21 +0200
Subject: [PATCH 4/8] fixes
@ -14250,7 +14250,7 @@ index b5210abb5141..4d8936e1f769 100644
--
2.41.0
From 2b82b34c90d5a0b7f64f438ae45a77777059a810 Mon Sep 17 00:00:00 2001
From 5cce371c10c8c702dda5f1f0ca4428ff0d336662 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Thu, 1 Jun 2023 16:35:38 +0200
Subject: [PATCH 5/8] ksm
@ -14702,7 +14702,7 @@ index 860b2dcf3ac4..96fe36a6d0f5 100644
--
2.41.0
From d9705b7f78a157575856ee08474297f3abe38dfd Mon Sep 17 00:00:00 2001
From 21d2dc84c885c791c47d30838a265cf6c532d567 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Thu, 1 Jun 2023 16:35:57 +0200
Subject: [PATCH 6/8] sched
@ -14806,7 +14806,7 @@ index 57bde66d95f7..fad77b5172e2 100644
/*
* Prefer to place tasks in a sibling domain
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 625d7483951c..b26ae200abef 100644
index 245cf62ce85a..2d3d13e52333 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3877,6 +3877,14 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
@ -15515,9 +15515,9 @@ index 6682535e37c8..ca4472281c28 100644
--
2.41.0
From b205c598effc456c3c47800294a7e3c4f110e0c2 Mon Sep 17 00:00:00 2001
From de932d02ee465828c5f4902165e38d9fb74f7758 Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Thu, 1 Jun 2023 16:36:10 +0200
Date: Wed, 14 Jun 2023 19:42:38 +0200
Subject: [PATCH 7/8] vma
Signed-off-by: Peter Jung <admin@ptr1337.dev>
@ -15526,20 +15526,24 @@ Signed-off-by: Peter Jung <admin@ptr1337.dev>
arch/powerpc/mm/fault.c | 3 ++-
arch/s390/mm/fault.c | 3 ++-
arch/x86/mm/fault.c | 3 ++-
include/linux/mm_types.h | 6 +++++-
include/linux/pagemap.h | 14 ++++++++++----
mm/filemap.c | 37 +++++++++++++++++++++++--------------
mm/memory.c | 39 ++++++++++++++++++++++++++++-----------
8 files changed, 74 insertions(+), 34 deletions(-)
fs/userfaultfd.c | 42 ++++++++++++++++++-----------------
include/linux/mm_types.h | 7 +++++-
include/linux/pagemap.h | 14 ++++++++----
mm/filemap.c | 37 +++++++++++++++++++------------
mm/madvise.c | 4 ++--
mm/memory.c | 48 ++++++++++++++++++++++------------------
mm/swap.h | 1 -
mm/swap_state.c | 12 +++++-----
12 files changed, 103 insertions(+), 74 deletions(-)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cb21ccd7940d..92ecac055e4d 100644
index 6045a5117ac1..8f59badbffb5 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -602,7 +602,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
@@ -601,7 +601,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto lock_mmap;
}
fault = handle_mm_fault(vma, addr & PAGE_MASK,
mm_flags | FAULT_FLAG_VMA_LOCK, regs);
fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
- vma_end_read(vma);
+ if (!(fault & VM_FAULT_VMA_UNLOCKED))
+ vma_end_read(vma);
@ -15588,19 +15592,119 @@ index e4399983c50c..ef62ab2fd211 100644
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0fd96d6e39ce..23c3a4ce45d9 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -277,17 +277,17 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
* hugepmd ranges.
*/
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
- struct vm_area_struct *vma,
- unsigned long address,
- unsigned long flags,
- unsigned long reason)
+ struct vm_fault *vmf,
+ unsigned long reason)
{
+ struct vm_area_struct *vma = vmf->vma;
pte_t *ptep, pte;
bool ret = true;
- mmap_assert_locked(ctx->mm);
+ if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
+ mmap_assert_locked(ctx->mm);
- ptep = hugetlb_walk(vma, address, vma_mmu_pagesize(vma));
+ ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma));
if (!ptep)
goto out;
@@ -308,10 +308,8 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
}
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
- struct vm_area_struct *vma,
- unsigned long address,
- unsigned long flags,
- unsigned long reason)
+ struct vm_fault *vmf,
+ unsigned long reason)
{
return false; /* should never get here */
}
@@ -325,11 +323,11 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
* threads.
*/
static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
- unsigned long address,
- unsigned long flags,
+ struct vm_fault *vmf,
unsigned long reason)
{
struct mm_struct *mm = ctx->mm;
+ unsigned long address = vmf->address;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
@@ -337,7 +335,8 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pte_t *pte;
bool ret = true;
- mmap_assert_locked(mm);
+ if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
+ mmap_assert_locked(mm);
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
@@ -445,7 +444,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* Coredumping runs without mmap_lock so we can only check that
* the mmap_lock is held, if PF_DUMPCORE was not set.
*/
- mmap_assert_locked(mm);
+ if (!(vmf->flags & FAULT_FLAG_VMA_LOCK))
+ mmap_assert_locked(mm);
ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
@@ -561,15 +561,17 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vma))
- must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
- reason);
+ must_wait = userfaultfd_must_wait(ctx, vmf, reason);
else
- must_wait = userfaultfd_huge_must_wait(ctx, vma,
- vmf->address,
- vmf->flags, reason);
+ must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason);
if (is_vm_hugetlb_page(vma))
hugetlb_vma_unlock_read(vma);
- mmap_read_unlock(mm);
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ vma_end_read(vma);
+ /* WARNING: VMA can't be used after this */
+ ret |= VM_FAULT_VMA_UNLOCKED;
+ } else
+ mmap_read_unlock(mm);
if (likely(must_wait && !READ_ONCE(ctx->released))) {
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..b3b57c6da0e1 100644
index 306a3d1a0fa6..bd6b95c82f7a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
@@ -1030,6 +1030,8 @@ typedef __bitwise unsigned int vm_fault_t;
* fsync() to complete (for synchronous page faults
* in DAX)
* @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
+ * @VM_FAULT_VMA_UNLOCKED: VMA lock was released
+ * @VM_FAULT_VMA_UNLOCKED: VMA lock was released, vmf->vma should no longer
+ * be accessed
* @VM_FAULT_HINDEX_MASK: mask HINDEX value
*
*/
@@ -1047,6 +1048,7 @@ enum vm_fault_reason {
@@ -1047,6 +1049,7 @@ enum vm_fault_reason {
VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
@ -15608,7 +15712,7 @@ index 306a3d1a0fa6..b3b57c6da0e1 100644
VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
};
@@ -1070,7 +1072,9 @@ enum vm_fault_reason {
@@ -1070,7 +1073,9 @@ enum vm_fault_reason {
{ VM_FAULT_RETRY, "RETRY" }, \
{ VM_FAULT_FALLBACK, "FALLBACK" }, \
{ VM_FAULT_DONE_COW, "DONE_COW" }, \
@ -15726,8 +15830,30 @@ index b4c9bd368b7e..838955635fbc 100644
return true;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index b5ffbaf616f5..b1e8adf1234e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -215,7 +215,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
continue;
page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
- vma, index, false, &splug);
+ vma, index, &splug);
if (page)
put_page(page);
}
@@ -252,7 +252,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
rcu_read_unlock();
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
- NULL, 0, false, &splug);
+ NULL, 0, &splug);
if (page)
put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..e1cd39f00756 100644
index f69fbc251198..b2ea015dcb87 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3568,6 +3568,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
@ -15750,12 +15876,11 @@ index f69fbc251198..e1cd39f00756 100644
return VM_FAULT_RETRY;
}
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
@@ -3704,27 +3707,40 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
@@ -3704,27 +3707,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
- int locked;
+ bool locked;
+ bool lock_dropped;
vm_fault_t ret = 0;
void *shadow = NULL;
@ -15779,7 +15904,7 @@ index f69fbc251198..e1cd39f00756 100644
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ /* No need to hold VMA lock for migration */
+ vma_end_read(vma);
+ /* CAUTION! VMA can't be used after this */
+ /* WARNING: VMA can't be used after this */
+ ret |= VM_FAULT_VMA_UNLOCKED;
+ }
+ migration_entry_wait(mm, vmf->pmd, vmf->address);
@ -15799,23 +15924,108 @@ index f69fbc251198..e1cd39f00756 100644
vmf->page = pfn_swap_entry_to_page(entry);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
@@ -3825,9 +3841,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
@@ -3825,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out_release;
}
- locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
-
+ locked = folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped);
if (!locked) {
- if (!locked) {
+ if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
+ if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
+ ret |= VM_FAULT_VMA_UNLOCKED;
ret |= VM_FAULT_RETRY;
goto out_release;
}
@@ -5291,15 +5306,6 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
if (!vma_start_read(vma))
goto inval;
- /*
- * Due to the possibility of userfault handler dropping mmap_lock, avoid
- * it for now and fall back to page fault handling under mmap_lock.
- */
- if (userfaultfd_armed(vma)) {
- vma_end_read(vma);
- goto inval;
- }
-
/* Check since vm_start/vm_end might change before we lock the VMA */
if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
vma_end_read(vma);
diff --git a/mm/swap.h b/mm/swap.h
index 7c033d793f15..8a3c7a0ace4f 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -46,7 +46,6 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma,
unsigned long addr,
- bool do_poll,
struct swap_iocb **plug);
struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b76a65ac28b3..a3839de71f3f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -517,15 +517,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
*/
struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma,
- unsigned long addr, bool do_poll,
- struct swap_iocb **plug)
+ unsigned long addr, struct swap_iocb **plug)
{
bool page_was_allocated;
struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
vma, addr, &page_was_allocated);
if (page_was_allocated)
- swap_readpage(retpage, do_poll, plug);
+ swap_readpage(retpage, false, plug);
return retpage;
}
@@ -620,7 +619,7 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
struct swap_info_struct *si = swp_swap_info(entry);
struct blk_plug plug;
struct swap_iocb *splug = NULL;
- bool do_poll = true, page_allocated;
+ bool page_allocated;
struct vm_area_struct *vma = vmf->vma;
unsigned long addr = vmf->address;
@@ -628,7 +627,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
if (!mask)
goto skip;
- do_poll = false;
/* Read a page_cluster sized and aligned cluster around offset. */
start_offset = offset & ~mask;
end_offset = offset | mask;
@@ -660,7 +658,7 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
lru_add_drain(); /* Push any new pages onto the LRU now */
skip:
/* The page was likely read above, so no need for plugging here */
- return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll, NULL);
+ return read_swap_cache_async(entry, gfp_mask, vma, addr, NULL);
}
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
@@ -825,7 +823,7 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
skip:
/* The page was likely read above, so no need for plugging here */
return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
- ra_info.win == 1, NULL);
+ NULL);
}
/**
--
2.41.0
From 4c79598323457b04cefa717a2adbf82586477840 Mon Sep 17 00:00:00 2001
From 8af5970aac59ebe84af695501c91e8c7f2a1b91d Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Tue, 9 May 2023 18:40:05 +0200
Subject: [PATCH 8/8] zstd 1.5.5

File diff suppressed because it is too large Load Diff

421
patches/0003-bore.patch Normal file
View File

@ -0,0 +1,421 @@
From 32c617afc05751783be3eb0f5a1d15e31dfc7919 Mon Sep 17 00:00:00 2001
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
Date: Thu, 8 Jun 2023 11:14:23 +0200
Subject: [PATCH] bore-cachy
Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
---
include/linux/sched.h | 10 +++
init/Kconfig | 20 ++++++
kernel/sched/core.c | 62 ++++++++++++++++++
kernel/sched/debug.c | 3 +
kernel/sched/fair.c | 136 ++++++++++++++++++++++++++++++++++++++++
kernel/sched/features.h | 8 +++
6 files changed, 239 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eed5d65b8..38fbebe4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -557,6 +557,12 @@ struct sched_entity {
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
+#ifdef CONFIG_SCHED_BORE
+ u64 prev_burst_time;
+ u64 burst_time;
+ u64 max_burst_time;
+ u8 penalty_score;
+#endif // CONFIG_SCHED_BORE
u64 nr_migrations;
@@ -985,6 +991,10 @@ struct task_struct {
struct list_head children;
struct list_head sibling;
struct task_struct *group_leader;
+#ifdef CONFIG_SCHED_BORE
+ u64 child_burst_cache;
+ u64 child_burst_last_cached;
+#endif // CONFIG_SCHED_BORE
/*
* 'ptraced' is the list of tasks this task is using ptrace() on.
diff --git a/init/Kconfig b/init/Kconfig
index 0147b4a33..4ab7e154b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1290,6 +1290,26 @@ config CHECKPOINT_RESTORE
If unsure, say N here.
+config SCHED_BORE
+ bool "Burst-Oriented Response Enhancer"
+ default y
+ help
+ In Desktop and Mobile computing, one might prefer interactive
+ tasks to keep responsive no matter what they run in the background.
+
+ Enabling this kernel feature modifies the scheduler to discriminate
+ tasks by their burst time (runtime since it last went sleeping or
+ yielding state) and prioritize those that run less bursty.
+ Such tasks usually include window compositor, widgets backend,
+ terminal emulator, video playback, games and so on.
+ With a little impact to scheduling fairness, it may improve
+ responsiveness especially under heavy background workload.
+
+ You can turn it off by setting the sysctl kernel.sched_bore = 0.
+ Enabling this feature implies NO_GENTLE_FAIR_SLEEPERS by default.
+
+ If unsure say Y here.
+
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select CGROUPS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcb3a7e68..a0f227344 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4484,6 +4484,57 @@ int wake_up_state(struct task_struct *p, unsigned int state)
return try_to_wake_up(p, state, 0);
}
+#ifdef CONFIG_SCHED_BORE
+#define CHILD_BURST_CUTOFF_BITS 9
+extern unsigned int sched_burst_cache_lifetime;
+
+void __init sched_init_bore(void) {
+ init_task.child_burst_cache = 0;
+ init_task.child_burst_last_cached = 0;
+ init_task.se.prev_burst_time = 0;
+ init_task.se.burst_time = 0;
+ init_task.se.max_burst_time = 0;
+}
+
+void inline __sched_fork_bore(struct task_struct *p) {
+ p->child_burst_cache = 0;
+ p->child_burst_last_cached = 0;
+ p->se.burst_time = 0;
+}
+
+static inline void update_task_child_burst_time_cache(struct task_struct *p) {
+ u64 sum = 0, avg_burst_time = 0;
+ u32 cnt = 0;
+ struct task_struct *child;
+
+ read_lock(&tasklist_lock);
+ list_for_each_entry(child, &p->children, sibling) {
+ cnt++;
+ sum += child->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS;
+ }
+ read_unlock(&tasklist_lock);
+
+ if (cnt) avg_burst_time = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS;
+ p->child_burst_cache = max(avg_burst_time, p->se.max_burst_time);
+}
+
+static void update_task_initial_burst_time(struct task_struct *task) {
+ struct sched_entity *se = &task->se;
+ struct task_struct *par = task->real_parent;
+ u64 ktime = ktime_to_ns(ktime_get());
+
+ if (likely(par)) {
+ if (par->child_burst_last_cached + sched_burst_cache_lifetime < ktime) {
+ par->child_burst_last_cached = ktime;
+ update_task_child_burst_time_cache(par);
+ }
+ se->prev_burst_time = max(se->prev_burst_time, par->child_burst_cache);
+ }
+
+ se->max_burst_time = se->prev_burst_time;
+}
+#endif // CONFIG_SCHED_BORE
+
/*
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
@@ -4500,6 +4551,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;
+#ifdef CONFIG_SCHED_BORE
+ __sched_fork_bore(p);
+#endif // CONFIG_SCHED_BORE
INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4726,6 +4780,9 @@ late_initcall(sched_core_sysctl_init);
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
__sched_fork(clone_flags, p);
+#ifdef CONFIG_SCHED_BORE
+ update_task_initial_burst_time(p);
+#endif // CONFIG_SCHED_BORE
/*
* We mark the process as NEW here. This guarantees that
* nobody will actually run it, and a signal or other external
@@ -9922,6 +9979,11 @@ void __init sched_init(void)
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
#endif
+#ifdef CONFIG_SCHED_BORE
+ sched_init_bore();
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.4.0 by Masahito Suzuki");
+#endif // CONFIG_SCHED_BORE
+
wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 066ff1c8a..4bc07d405 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -593,6 +593,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
+#ifdef CONFIG_SCHED_BORE
+ SEQ_printf(m, " %2d", p->se.penalty_score);
+#endif
#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9fe8288b1..ac29ac350 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@
*
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler
+ * Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com>
*/
#include <linux/energy_model.h>
#include <linux/mmap_lock.h>
@@ -140,6 +143,61 @@ static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#ifdef CONFIG_SCHED_BORE
+unsigned int __read_mostly sched_bore = 1;
+unsigned int __read_mostly sched_burst_cache_lifetime = 15000000;
+unsigned int __read_mostly sched_burst_penalty_offset = 12;
+unsigned int __read_mostly sched_burst_penalty_scale = 1292;
+unsigned int __read_mostly sched_burst_smoothness = 1;
+static int three = 3;
+static int sixty_four = 64;
+static int maxval_12_bits = 4095;
+
+#define FIXED_SHIFT 10
+#define FIXED_ONE (1 << FIXED_SHIFT)
+typedef u32 fixed;
+
+static void update_burst_score(struct sched_entity *se) {
+ u64 burst_time = se->max_burst_time;
+
+ int msb = fls64(burst_time);
+ fixed integer_part = msb << FIXED_SHIFT;
+ fixed fractional_part = burst_time << (64 - msb) << 1 >> (64 - FIXED_SHIFT);
+ fixed greed = integer_part | fractional_part;
+
+ fixed tolerance = sched_burst_penalty_offset << FIXED_SHIFT;
+ fixed penalty = max(0, (s32)greed - (s32)tolerance);
+ fixed scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
+
+ u8 score = min(39U, scaled_penalty >> FIXED_SHIFT);
+ se->penalty_score = score;
+}
+
+static inline u64 penalty_scale(u64 delta, struct sched_entity *se) {
+ return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->penalty_score], 22);
+}
+
+static inline u64 __binary_smooth(u64 new, u64 old, unsigned int smoothness) {
+ return (new + old * ((1 << smoothness) - 1)) >> smoothness;
+}
+
+void restart_burst(struct sched_entity *se) {
+ se->max_burst_time = se->prev_burst_time = __binary_smooth(
+ se->burst_time, se->prev_burst_time, sched_burst_smoothness);
+ se->burst_time = 0;
+}
+
+#define calc_delta_fair(delta, se) __calc_delta_fair(delta, se, true)
+#define calc_delta_fair_unscaled(delta, se) __calc_delta_fair(delta, se, false)
+static inline u64
+__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale);
+
+static s64 wakeup_preempt_backstep_delta(u64 rtime, struct sched_entity *se) {
+ u64 delta = calc_delta_fair_unscaled(rtime, se);
+ return delta - penalty_scale(delta, se);
+}
+#endif // CONFIG_SCHED_BORE
+
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
{
@@ -203,6 +261,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
+#ifdef CONFIG_SCHED_BORE
+ {
+ .procname = "sched_bore",
+ .data = &sched_bore,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "sched_burst_cache_lifetime",
+ .data = &sched_burst_cache_lifetime,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_burst_penalty_offset",
+ .data = &sched_burst_penalty_offset,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &sixty_four,
+ },
+ {
+ .procname = "sched_burst_penalty_scale",
+ .data = &sched_burst_penalty_scale,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &maxval_12_bits,
+ },
+ {
+ .procname = "sched_burst_smoothness",
+ .data = &sched_burst_smoothness,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &three,
+ },
+#endif // CONFIG_SCHED_BORE
{
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
@@ -724,11 +827,19 @@ int sched_update_scaling(void)
/*
* delta /= w
*/
+#ifdef CONFIG_SCHED_BORE
+static inline u64
+__calc_delta_fair(u64 delta, struct sched_entity *se, bool bscale)
+#else // CONFIG_SCHED_BORE
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
+#endif // CONFIG_SCHED_BORE
{
if (unlikely(se->load.weight != NICE_0_LOAD))
delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+#ifdef CONFIG_SCHED_BORE
+ if (bscale && sched_bore) delta = penalty_scale(delta, se);
+#endif // CONFIG_SCHED_BORE
return delta;
}
@@ -938,6 +1049,14 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec);
+#ifdef CONFIG_SCHED_BORE
+ curr->burst_time += delta_exec;
+ curr->max_burst_time = max(curr->max_burst_time, curr->burst_time);
+ update_burst_score(curr);
+ if (sched_bore)
+ curr->vruntime += penalty_scale(calc_delta_fair(delta_exec, curr), curr);
+ else
+#endif // CONFIG_SCHED_BORE
curr->vruntime += calc_delta_fair(delta_exec, curr);
update_min_vruntime(cfs_rq);
@@ -6410,6 +6529,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
util_est_dequeue(&rq->cfs, p);
for_each_sched_entity(se) {
+#ifdef CONFIG_SCHED_BORE
+ if (task_sleep) restart_burst(se);
+#endif // CONFIG_SCHED_BORE
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
@@ -7844,7 +7966,11 @@ static unsigned long wakeup_gran(struct sched_entity *se)
* This is especially important for buddies when the leftmost
* task is higher priority than the buddy.
*/
+#ifdef CONFIG_SCHED_BORE
+ return calc_delta_fair_unscaled(gran, se);
+#else // CONFIG_SCHED_BORE
return calc_delta_fair(gran, se);
+#endif // CONFIG_SCHED_BORE
}
/*
@@ -7865,6 +7991,13 @@ static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;
+#ifdef CONFIG_SCHED_BORE
+ if (sched_bore) {
+ u64 rtime = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ vdiff += wakeup_preempt_backstep_delta(rtime, curr)
+ - wakeup_preempt_backstep_delta(rtime, se);
+ }
+#endif // CONFIG_SCHED_BORE
if (vdiff <= 0)
return -1;
@@ -8210,6 +8343,9 @@ static void yield_task_fair(struct rq *rq)
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se;
+#ifdef CONFIG_SCHED_BORE
+ restart_burst(se);
+#endif // CONFIG_SCHED_BORE
/*
* Are we the only task in the tree?
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 9e390eb82..696ea7081 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -4,7 +4,11 @@
* them to run sooner, but does not allow tons of sleepers to
* rip the spread apart.
*/
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, false)
+#else // CONFIG_SCHED_BORE
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
+#endif // CONFIG_SCHED_BORE
/*
* Place new tasks ahead so that they do not starve already running
@@ -17,7 +21,11 @@ SCHED_FEAT(START_DEBIT, true)
* wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality.
*/
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(NEXT_BUDDY, true)
+#else // CONFIG_SCHED_BORE
SCHED_FEAT(NEXT_BUDDY, false)
+#endif // CONFIG_SCHED_BORE
/*
* Prefer to schedule the task that ran last (when we did
--
2.41.0.rc2

View File

@ -2,4 +2,4 @@
echo "Pika Kernel - Building"
make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-5
make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-1

View File

@ -7,8 +7,10 @@ echo "Pika Kernel - Applying patches"
patch -Np1 < "../patches/0001-cachy-all.patch"
# orig patch from cachy
patch -Np1 < "../patches/0002-eevdf.patch"
# orig patch from cachy
patch -Np1 < "../patches/0003-bore.patch"
# Nobara patches are here: https://github.com/sammilucia/nobara-kernel-fork
# Allow setting custom pollrates for usb devices
patch -Np1 < "../patches/0003-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
patch -Np1 < "../patches/0004-Allow-to-set-custom-USB-pollrate-for-specific-device.patch"
# Allow pre polaris cards to use the amdgpu kernel module
patch -Np1 < "../patches/0004-amdgpu-si-cik-default.patch"
patch -Np1 < "../patches/0005-amdgpu-si-cik-default.patch"

View File

@ -2,7 +2,7 @@
echo "Pika Kernel - Getting source"
wget -nv https://git.kernel.org/torvalds/t/linux-6.4-rc5.tar.gz
tar -xf ./linux-6.4-rc5.tar.gz
wget -nv https://git.kernel.org/torvalds/t/linux-6.4-rc6.tar.gz
tar -xf ./linux-6.4-rc6.tar.gz
cd linux-6.4-rc5
cd linux-6.4-rc6