728 lines
24 KiB
Diff
728 lines
24 KiB
Diff
|
From 58a9cf052bf4f9bca16b7a4626b00643d35a2c2a Mon Sep 17 00:00:00 2001
|
||
|
From: Peter Jung <admin@ptr1337.dev>
|
||
|
Date: Mon, 11 Nov 2024 09:25:43 +0100
|
||
|
Subject: [PATCH] preempt-lazy
|
||
|
|
||
|
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||
|
---
|
||
|
arch/x86/Kconfig | 1 +
|
||
|
arch/x86/include/asm/thread_info.h | 6 +-
|
||
|
include/linux/entry-common.h | 3 +-
|
||
|
include/linux/entry-kvm.h | 5 +-
|
||
|
include/linux/preempt.h | 8 ++-
|
||
|
include/linux/sched.h | 3 +-
|
||
|
include/linux/thread_info.h | 21 ++++--
|
||
|
include/linux/trace_events.h | 8 +--
|
||
|
kernel/Kconfig.preempt | 25 +++++--
|
||
|
kernel/entry/common.c | 2 +-
|
||
|
kernel/entry/kvm.c | 4 +-
|
||
|
kernel/rcu/srcutiny.c | 2 +-
|
||
|
kernel/sched/core.c | 112 +++++++++++++++++++++++++----
|
||
|
kernel/sched/debug.c | 7 +-
|
||
|
kernel/sched/fair.c | 6 +-
|
||
|
kernel/sched/sched.h | 1 +
|
||
|
kernel/trace/trace.c | 2 +
|
||
|
kernel/trace/trace_output.c | 16 ++++-
|
||
|
18 files changed, 186 insertions(+), 46 deletions(-)
|
||
|
|
||
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||
|
index 89b8fc452a7c..b7316721ebf2 100644
|
||
|
--- a/arch/x86/Kconfig
|
||
|
+++ b/arch/x86/Kconfig
|
||
|
@@ -93,6 +93,7 @@ config X86
|
||
|
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
|
||
|
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||
|
select ARCH_HAS_PMEM_API if X86_64
|
||
|
+ select ARCH_HAS_PREEMPT_LAZY
|
||
|
select ARCH_HAS_PTE_DEVMAP if X86_64
|
||
|
select ARCH_HAS_PTE_SPECIAL
|
||
|
select ARCH_HAS_HW_PTE_YOUNG
|
||
|
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
||
|
index 12da7dfd5ef1..75bb390f7baf 100644
|
||
|
--- a/arch/x86/include/asm/thread_info.h
|
||
|
+++ b/arch/x86/include/asm/thread_info.h
|
||
|
@@ -87,8 +87,9 @@ struct thread_info {
|
||
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
||
|
#define TIF_SIGPENDING 2 /* signal pending */
|
||
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
||
|
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
||
|
-#define TIF_SSBD 5 /* Speculative store bypass disable */
|
||
|
+#define TIF_NEED_RESCHED_LAZY 4 /* rescheduling necessary */
|
||
|
+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
|
||
|
+#define TIF_SSBD 6 /* Speculative store bypass disable */
|
||
|
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
|
||
|
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
|
||
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
||
|
@@ -110,6 +111,7 @@ struct thread_info {
|
||
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||
|
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
||
|
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
||
|
#define _TIF_SSBD (1 << TIF_SSBD)
|
||
|
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
|
||
|
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
|
||
|
index 1e50cdb83ae5..fc61d0205c97 100644
|
||
|
--- a/include/linux/entry-common.h
|
||
|
+++ b/include/linux/entry-common.h
|
||
|
@@ -64,7 +64,8 @@
|
||
|
|
||
|
#define EXIT_TO_USER_MODE_WORK \
|
||
|
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
||
|
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
||
|
+ _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
|
||
|
+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
||
|
ARCH_EXIT_TO_USER_MODE_WORK)
|
||
|
|
||
|
/**
|
||
|
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
|
||
|
index 6813171afccb..16149f6625e4 100644
|
||
|
--- a/include/linux/entry-kvm.h
|
||
|
+++ b/include/linux/entry-kvm.h
|
||
|
@@ -17,8 +17,9 @@
|
||
|
#endif
|
||
|
|
||
|
#define XFER_TO_GUEST_MODE_WORK \
|
||
|
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
|
||
|
- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
|
||
|
+ (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
|
||
|
+ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \
|
||
|
+ ARCH_XFER_TO_GUEST_MODE_WORK)
|
||
|
|
||
|
struct kvm_vcpu;
|
||
|
|
||
|
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
|
||
|
index ce76f1a45722..ca86235ac15c 100644
|
||
|
--- a/include/linux/preempt.h
|
||
|
+++ b/include/linux/preempt.h
|
||
|
@@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||
|
extern bool preempt_model_none(void);
|
||
|
extern bool preempt_model_voluntary(void);
|
||
|
extern bool preempt_model_full(void);
|
||
|
+extern bool preempt_model_lazy(void);
|
||
|
|
||
|
#else
|
||
|
|
||
|
@@ -502,6 +503,11 @@ static inline bool preempt_model_full(void)
|
||
|
return IS_ENABLED(CONFIG_PREEMPT);
|
||
|
}
|
||
|
|
||
|
+static inline bool preempt_model_lazy(void)
|
||
|
+{
|
||
|
+ return IS_ENABLED(CONFIG_PREEMPT_LAZY);
|
||
|
+}
|
||
|
+
|
||
|
#endif
|
||
|
|
||
|
static inline bool preempt_model_rt(void)
|
||
|
@@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void)
|
||
|
*/
|
||
|
static inline bool preempt_model_preemptible(void)
|
||
|
{
|
||
|
- return preempt_model_full() || preempt_model_rt();
|
||
|
+ return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
|
||
|
}
|
||
|
|
||
|
#endif /* __LINUX_PREEMPT_H */
|
||
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||
|
index bb343136ddd0..ade641760900 100644
|
||
|
--- a/include/linux/sched.h
|
||
|
+++ b/include/linux/sched.h
|
||
|
@@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk)
|
||
|
|
||
|
static inline void clear_tsk_need_resched(struct task_struct *tsk)
|
||
|
{
|
||
|
- clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
|
||
|
+ atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY,
|
||
|
+ (atomic_long_t *)&task_thread_info(tsk)->flags);
|
||
|
}
|
||
|
|
||
|
static inline int test_tsk_need_resched(struct task_struct *tsk)
|
||
|
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
|
||
|
index 9ea0b28068f4..cf2446c9c30d 100644
|
||
|
--- a/include/linux/thread_info.h
|
||
|
+++ b/include/linux/thread_info.h
|
||
|
@@ -59,6 +59,14 @@ enum syscall_work_bit {
|
||
|
|
||
|
#include <asm/thread_info.h>
|
||
|
|
||
|
+#ifndef TIF_NEED_RESCHED_LAZY
|
||
|
+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
|
||
|
+#error Inconsistent PREEMPT_LAZY
|
||
|
+#endif
|
||
|
+#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
|
||
|
+#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
|
||
|
+#endif
|
||
|
+
|
||
|
#ifdef __KERNEL__
|
||
|
|
||
|
#ifndef arch_set_restart_data
|
||
|
@@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
|
||
|
|
||
|
#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
|
||
|
|
||
|
-static __always_inline bool tif_need_resched(void)
|
||
|
+static __always_inline bool tif_test_bit(int bit)
|
||
|
{
|
||
|
- return arch_test_bit(TIF_NEED_RESCHED,
|
||
|
+ return arch_test_bit(bit,
|
||
|
(unsigned long *)(¤t_thread_info()->flags));
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
|
||
|
-static __always_inline bool tif_need_resched(void)
|
||
|
+static __always_inline bool tif_test_bit(int bit)
|
||
|
{
|
||
|
- return test_bit(TIF_NEED_RESCHED,
|
||
|
+ return test_bit(bit,
|
||
|
(unsigned long *)(¤t_thread_info()->flags));
|
||
|
}
|
||
|
|
||
|
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
|
||
|
|
||
|
+static __always_inline bool tif_need_resched(void)
|
||
|
+{
|
||
|
+ return tif_test_bit(TIF_NEED_RESCHED);
|
||
|
+}
|
||
|
+
|
||
|
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
|
||
|
static inline int arch_within_stack_frames(const void * const stack,
|
||
|
const void * const stackend,
|
||
|
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
|
||
|
index 42bedcddd511..4cae6f258137 100644
|
||
|
--- a/include/linux/trace_events.h
|
||
|
+++ b/include/linux/trace_events.h
|
||
|
@@ -184,8 +184,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
|
||
|
|
||
|
enum trace_flag_type {
|
||
|
TRACE_FLAG_IRQS_OFF = 0x01,
|
||
|
- TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
|
||
|
- TRACE_FLAG_NEED_RESCHED = 0x04,
|
||
|
+ TRACE_FLAG_NEED_RESCHED = 0x02,
|
||
|
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x04,
|
||
|
TRACE_FLAG_HARDIRQ = 0x08,
|
||
|
TRACE_FLAG_SOFTIRQ = 0x10,
|
||
|
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
|
||
|
@@ -211,11 +211,11 @@ static inline unsigned int tracing_gen_ctx(void)
|
||
|
|
||
|
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
|
||
|
{
|
||
|
- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
|
||
|
+ return tracing_gen_ctx_irq_test(0);
|
||
|
}
|
||
|
static inline unsigned int tracing_gen_ctx(void)
|
||
|
{
|
||
|
- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
|
||
|
+ return tracing_gen_ctx_irq_test(0);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
|
||
|
index fe782cd77388..7c1b29a3a491 100644
|
||
|
--- a/kernel/Kconfig.preempt
|
||
|
+++ b/kernel/Kconfig.preempt
|
||
|
@@ -11,12 +11,16 @@ config PREEMPT_BUILD
|
||
|
select PREEMPTION
|
||
|
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
|
||
|
|
||
|
+config ARCH_HAS_PREEMPT_LAZY
|
||
|
+ bool
|
||
|
+
|
||
|
choice
|
||
|
prompt "Preemption Model"
|
||
|
default PREEMPT_NONE
|
||
|
|
||
|
config PREEMPT_NONE
|
||
|
bool "No Forced Preemption (Server)"
|
||
|
+ depends on !PREEMPT_RT
|
||
|
select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
|
||
|
help
|
||
|
This is the traditional Linux preemption model, geared towards
|
||
|
@@ -32,6 +36,7 @@ config PREEMPT_NONE
|
||
|
config PREEMPT_VOLUNTARY
|
||
|
bool "Voluntary Kernel Preemption (Desktop)"
|
||
|
depends on !ARCH_NO_PREEMPT
|
||
|
+ depends on !PREEMPT_RT
|
||
|
select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
|
||
|
help
|
||
|
This option reduces the latency of the kernel by adding more
|
||
|
@@ -51,7 +56,7 @@ config PREEMPT_VOLUNTARY
|
||
|
config PREEMPT
|
||
|
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||
|
depends on !ARCH_NO_PREEMPT
|
||
|
- select PREEMPT_BUILD
|
||
|
+ select PREEMPT_BUILD if !PREEMPT_DYNAMIC
|
||
|
help
|
||
|
This option reduces the latency of the kernel by making
|
||
|
all kernel code (that is not executing in a critical section)
|
||
|
@@ -67,6 +72,20 @@ config PREEMPT
|
||
|
embedded system with latency requirements in the milliseconds
|
||
|
range.
|
||
|
|
||
|
+config PREEMPT_LAZY
|
||
|
+ bool "Scheduler controlled preemption model"
|
||
|
+ depends on !ARCH_NO_PREEMPT
|
||
|
+ depends on ARCH_HAS_PREEMPT_LAZY
|
||
|
+ select PREEMPT_BUILD if !PREEMPT_DYNAMIC
|
||
|
+ help
|
||
|
+ This option provides a scheduler driven preemption model that
|
||
|
+ is fundamentally similar to full preemption, but is less
|
||
|
+ eager to preempt SCHED_NORMAL tasks in an attempt to
|
||
|
+ reduce lock holder preemption and recover some of the performance
|
||
|
+ gains seen from using Voluntary preemption.
|
||
|
+
|
||
|
+endchoice
|
||
|
+
|
||
|
config PREEMPT_RT
|
||
|
bool "Fully Preemptible Kernel (Real-Time)"
|
||
|
depends on EXPERT && ARCH_SUPPORTS_RT
|
||
|
@@ -84,8 +103,6 @@ config PREEMPT_RT
|
||
|
Select this if you are building a kernel for systems which
|
||
|
require real-time guarantees.
|
||
|
|
||
|
-endchoice
|
||
|
-
|
||
|
config PREEMPT_COUNT
|
||
|
bool
|
||
|
|
||
|
@@ -95,7 +112,7 @@ config PREEMPTION
|
||
|
|
||
|
config PREEMPT_DYNAMIC
|
||
|
bool "Preemption behaviour defined on boot"
|
||
|
- depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
|
||
|
+ depends on HAVE_PREEMPT_DYNAMIC
|
||
|
select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
|
||
|
select PREEMPT_BUILD
|
||
|
default y if HAVE_PREEMPT_DYNAMIC_CALL
|
||
|
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
|
||
|
index 5b6934e23c21..e33691d5adf7 100644
|
||
|
--- a/kernel/entry/common.c
|
||
|
+++ b/kernel/entry/common.c
|
||
|
@@ -98,7 +98,7 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
||
|
|
||
|
local_irq_enable_exit_to_user(ti_work);
|
||
|
|
||
|
- if (ti_work & _TIF_NEED_RESCHED)
|
||
|
+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
|
||
|
schedule();
|
||
|
|
||
|
if (ti_work & _TIF_UPROBE)
|
||
|
diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
|
||
|
index 2e0f75bcb7fd..8485f63863af 100644
|
||
|
--- a/kernel/entry/kvm.c
|
||
|
+++ b/kernel/entry/kvm.c
|
||
|
@@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
|
||
|
return -EINTR;
|
||
|
}
|
||
|
|
||
|
- if (ti_work & _TIF_NEED_RESCHED)
|
||
|
+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
|
||
|
schedule();
|
||
|
|
||
|
if (ti_work & _TIF_NOTIFY_RESUME)
|
||
|
@@ -24,7 +24,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
|
||
|
return ret;
|
||
|
|
||
|
ti_work = read_thread_flags();
|
||
|
- } while (ti_work & XFER_TO_GUEST_MODE_WORK || need_resched());
|
||
|
+ } while (ti_work & XFER_TO_GUEST_MODE_WORK);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
|
||
|
index 549c03336ee9..4dcbf8aa80ff 100644
|
||
|
--- a/kernel/rcu/srcutiny.c
|
||
|
+++ b/kernel/rcu/srcutiny.c
|
||
|
@@ -122,8 +122,8 @@ void srcu_drive_gp(struct work_struct *wp)
|
||
|
ssp = container_of(wp, struct srcu_struct, srcu_work);
|
||
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
||
|
if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) {
|
||
|
- return; /* Already running or nothing to do. */
|
||
|
preempt_enable();
|
||
|
+ return; /* Already running or nothing to do. */
|
||
|
}
|
||
|
|
||
|
/* Remove recently arrived callbacks and wait for readers. */
|
||
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||
|
index b35752fdbcc0..a1b01cd25e6d 100644
|
||
|
--- a/kernel/sched/core.c
|
||
|
+++ b/kernel/sched/core.c
|
||
|
@@ -941,10 +941,9 @@ static inline void hrtick_rq_init(struct rq *rq)
|
||
|
* this avoids any races wrt polling state changes and thereby avoids
|
||
|
* spurious IPIs.
|
||
|
*/
|
||
|
-static inline bool set_nr_and_not_polling(struct task_struct *p)
|
||
|
+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
|
||
|
{
|
||
|
- struct thread_info *ti = task_thread_info(p);
|
||
|
- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
|
||
|
+ return !(fetch_or(&ti->flags, 1 << tif) & _TIF_POLLING_NRFLAG);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
@@ -969,9 +968,9 @@ static bool set_nr_if_polling(struct task_struct *p)
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
-static inline bool set_nr_and_not_polling(struct task_struct *p)
|
||
|
+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
|
||
|
{
|
||
|
- set_tsk_need_resched(p);
|
||
|
+ atomic_long_or(1 << tif, (atomic_long_t *)&ti->flags);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
@@ -1076,28 +1075,66 @@ void wake_up_q(struct wake_q_head *head)
|
||
|
* might also involve a cross-CPU call to trigger the scheduler on
|
||
|
* the target CPU.
|
||
|
*/
|
||
|
-void resched_curr(struct rq *rq)
|
||
|
+static void __resched_curr(struct rq *rq, int tif)
|
||
|
{
|
||
|
struct task_struct *curr = rq->curr;
|
||
|
+ struct thread_info *cti = task_thread_info(curr);
|
||
|
int cpu;
|
||
|
|
||
|
lockdep_assert_rq_held(rq);
|
||
|
|
||
|
- if (test_tsk_need_resched(curr))
|
||
|
+ if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY)
|
||
|
+ tif = TIF_NEED_RESCHED;
|
||
|
+
|
||
|
+ if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED))
|
||
|
return;
|
||
|
|
||
|
cpu = cpu_of(rq);
|
||
|
|
||
|
if (cpu == smp_processor_id()) {
|
||
|
- set_tsk_need_resched(curr);
|
||
|
- set_preempt_need_resched();
|
||
|
+ set_ti_thread_flag(cti, tif);
|
||
|
+ if (tif == TIF_NEED_RESCHED)
|
||
|
+ set_preempt_need_resched();
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- if (set_nr_and_not_polling(curr))
|
||
|
- smp_send_reschedule(cpu);
|
||
|
- else
|
||
|
+ if (set_nr_and_not_polling(cti, tif)) {
|
||
|
+ if (tif == TIF_NEED_RESCHED)
|
||
|
+ smp_send_reschedule(cpu);
|
||
|
+ } else {
|
||
|
trace_sched_wake_idle_without_ipi(cpu);
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+void resched_curr(struct rq *rq)
|
||
|
+{
|
||
|
+ __resched_curr(rq, TIF_NEED_RESCHED);
|
||
|
+}
|
||
|
+
|
||
|
+#ifdef CONFIG_PREEMPT_DYNAMIC
|
||
|
+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
|
||
|
+static __always_inline bool dynamic_preempt_lazy(void)
|
||
|
+{
|
||
|
+ return static_branch_unlikely(&sk_dynamic_preempt_lazy);
|
||
|
+}
|
||
|
+#else
|
||
|
+static __always_inline bool dynamic_preempt_lazy(void)
|
||
|
+{
|
||
|
+ return IS_ENABLED(PREEMPT_LAZY);
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
+static __always_inline int tif_need_resched_lazy(void)
|
||
|
+{
|
||
|
+ if (dynamic_preempt_lazy())
|
||
|
+ return TIF_NEED_RESCHED_LAZY;
|
||
|
+
|
||
|
+ return TIF_NEED_RESCHED;
|
||
|
+}
|
||
|
+
|
||
|
+void resched_curr_lazy(struct rq *rq)
|
||
|
+{
|
||
|
+ __resched_curr(rq, tif_need_resched_lazy());
|
||
|
}
|
||
|
|
||
|
void resched_cpu(int cpu)
|
||
|
@@ -1192,7 +1229,7 @@ static void wake_up_idle_cpu(int cpu)
|
||
|
* and testing of the above solutions didn't appear to report
|
||
|
* much benefits.
|
||
|
*/
|
||
|
- if (set_nr_and_not_polling(rq->idle))
|
||
|
+ if (set_nr_and_not_polling(task_thread_info(rq->idle), TIF_NEED_RESCHED))
|
||
|
smp_send_reschedule(cpu);
|
||
|
else
|
||
|
trace_sched_wake_idle_without_ipi(cpu);
|
||
|
@@ -5613,6 +5650,10 @@ void sched_tick(void)
|
||
|
update_rq_clock(rq);
|
||
|
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
|
||
|
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
|
||
|
+
|
||
|
+ if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
|
||
|
+ resched_curr(rq);
|
||
|
+
|
||
|
curr->sched_class->task_tick(rq, curr, 0);
|
||
|
if (sched_feat(LATENCY_WARN))
|
||
|
resched_latency = cpu_resched_latency(rq);
|
||
|
@@ -7358,6 +7399,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
||
|
* preempt_schedule <- NOP
|
||
|
* preempt_schedule_notrace <- NOP
|
||
|
* irqentry_exit_cond_resched <- NOP
|
||
|
+ * dynamic_preempt_lazy <- false
|
||
|
*
|
||
|
* VOLUNTARY:
|
||
|
* cond_resched <- __cond_resched
|
||
|
@@ -7365,6 +7407,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
||
|
* preempt_schedule <- NOP
|
||
|
* preempt_schedule_notrace <- NOP
|
||
|
* irqentry_exit_cond_resched <- NOP
|
||
|
+ * dynamic_preempt_lazy <- false
|
||
|
*
|
||
|
* FULL:
|
||
|
* cond_resched <- RET0
|
||
|
@@ -7372,6 +7415,15 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
||
|
* preempt_schedule <- preempt_schedule
|
||
|
* preempt_schedule_notrace <- preempt_schedule_notrace
|
||
|
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
||
|
+ * dynamic_preempt_lazy <- false
|
||
|
+ *
|
||
|
+ * LAZY:
|
||
|
+ * cond_resched <- RET0
|
||
|
+ * might_resched <- RET0
|
||
|
+ * preempt_schedule <- preempt_schedule
|
||
|
+ * preempt_schedule_notrace <- preempt_schedule_notrace
|
||
|
+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
||
|
+ * dynamic_preempt_lazy <- true
|
||
|
*/
|
||
|
|
||
|
enum {
|
||
|
@@ -7379,30 +7431,41 @@ enum {
|
||
|
preempt_dynamic_none,
|
||
|
preempt_dynamic_voluntary,
|
||
|
preempt_dynamic_full,
|
||
|
+ preempt_dynamic_lazy,
|
||
|
};
|
||
|
|
||
|
int preempt_dynamic_mode = preempt_dynamic_undefined;
|
||
|
|
||
|
int sched_dynamic_mode(const char *str)
|
||
|
{
|
||
|
+#ifndef CONFIG_PREEMPT_RT
|
||
|
if (!strcmp(str, "none"))
|
||
|
return preempt_dynamic_none;
|
||
|
|
||
|
if (!strcmp(str, "voluntary"))
|
||
|
return preempt_dynamic_voluntary;
|
||
|
+#endif
|
||
|
|
||
|
if (!strcmp(str, "full"))
|
||
|
return preempt_dynamic_full;
|
||
|
|
||
|
+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
|
||
|
+ if (!strcmp(str, "lazy"))
|
||
|
+ return preempt_dynamic_lazy;
|
||
|
+#endif
|
||
|
+
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
|
||
|
+#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key)
|
||
|
+#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key)
|
||
|
+
|
||
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||
|
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
|
||
|
#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
|
||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||
|
-#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
|
||
|
-#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
|
||
|
+#define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f)
|
||
|
+#define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f)
|
||
|
#else
|
||
|
#error "Unsupported PREEMPT_DYNAMIC mechanism"
|
||
|
#endif
|
||
|
@@ -7422,6 +7485,7 @@ static void __sched_dynamic_update(int mode)
|
||
|
preempt_dynamic_enable(preempt_schedule);
|
||
|
preempt_dynamic_enable(preempt_schedule_notrace);
|
||
|
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||
|
+ preempt_dynamic_key_disable(preempt_lazy);
|
||
|
|
||
|
switch (mode) {
|
||
|
case preempt_dynamic_none:
|
||
|
@@ -7431,6 +7495,7 @@ static void __sched_dynamic_update(int mode)
|
||
|
preempt_dynamic_disable(preempt_schedule);
|
||
|
preempt_dynamic_disable(preempt_schedule_notrace);
|
||
|
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||
|
+ preempt_dynamic_key_disable(preempt_lazy);
|
||
|
if (mode != preempt_dynamic_mode)
|
||
|
pr_info("Dynamic Preempt: none\n");
|
||
|
break;
|
||
|
@@ -7442,6 +7507,7 @@ static void __sched_dynamic_update(int mode)
|
||
|
preempt_dynamic_disable(preempt_schedule);
|
||
|
preempt_dynamic_disable(preempt_schedule_notrace);
|
||
|
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||
|
+ preempt_dynamic_key_disable(preempt_lazy);
|
||
|
if (mode != preempt_dynamic_mode)
|
||
|
pr_info("Dynamic Preempt: voluntary\n");
|
||
|
break;
|
||
|
@@ -7453,9 +7519,22 @@ static void __sched_dynamic_update(int mode)
|
||
|
preempt_dynamic_enable(preempt_schedule);
|
||
|
preempt_dynamic_enable(preempt_schedule_notrace);
|
||
|
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||
|
+ preempt_dynamic_key_disable(preempt_lazy);
|
||
|
if (mode != preempt_dynamic_mode)
|
||
|
pr_info("Dynamic Preempt: full\n");
|
||
|
break;
|
||
|
+
|
||
|
+ case preempt_dynamic_lazy:
|
||
|
+ if (!klp_override)
|
||
|
+ preempt_dynamic_disable(cond_resched);
|
||
|
+ preempt_dynamic_disable(might_resched);
|
||
|
+ preempt_dynamic_enable(preempt_schedule);
|
||
|
+ preempt_dynamic_enable(preempt_schedule_notrace);
|
||
|
+ preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||
|
+ preempt_dynamic_key_enable(preempt_lazy);
|
||
|
+ if (mode != preempt_dynamic_mode)
|
||
|
+ pr_info("Dynamic Preempt: lazy\n");
|
||
|
+ break;
|
||
|
}
|
||
|
|
||
|
preempt_dynamic_mode = mode;
|
||
|
@@ -7518,6 +7597,8 @@ static void __init preempt_dynamic_init(void)
|
||
|
sched_dynamic_update(preempt_dynamic_none);
|
||
|
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
||
|
sched_dynamic_update(preempt_dynamic_voluntary);
|
||
|
+ } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
|
||
|
+ sched_dynamic_update(preempt_dynamic_lazy);
|
||
|
} else {
|
||
|
/* Default static call setting, nothing to do */
|
||
|
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
||
|
@@ -7538,6 +7619,7 @@ static void __init preempt_dynamic_init(void)
|
||
|
PREEMPT_MODEL_ACCESSOR(none);
|
||
|
PREEMPT_MODEL_ACCESSOR(voluntary);
|
||
|
PREEMPT_MODEL_ACCESSOR(full);
|
||
|
+PREEMPT_MODEL_ACCESSOR(lazy);
|
||
|
|
||
|
#else /* !CONFIG_PREEMPT_DYNAMIC: */
|
||
|
|
||
|
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||
|
index f4035c7a0fa1..a48b2a701ec2 100644
|
||
|
--- a/kernel/sched/debug.c
|
||
|
+++ b/kernel/sched/debug.c
|
||
|
@@ -245,11 +245,12 @@ static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
|
||
|
static int sched_dynamic_show(struct seq_file *m, void *v)
|
||
|
{
|
||
|
static const char * preempt_modes[] = {
|
||
|
- "none", "voluntary", "full"
|
||
|
+ "none", "voluntary", "full", "lazy",
|
||
|
};
|
||
|
- int i;
|
||
|
+ int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
|
||
|
+ int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2;
|
||
|
|
||
|
- for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
|
||
|
+ for (; i < j; i++) {
|
||
|
if (preempt_dynamic_mode == i)
|
||
|
seq_puts(m, "(");
|
||
|
seq_puts(m, preempt_modes[i]);
|
||
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||
|
index 54e7c4c3e2c5..10e9484d1d43 100644
|
||
|
--- a/kernel/sched/fair.c
|
||
|
+++ b/kernel/sched/fair.c
|
||
|
@@ -1264,7 +1264,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||
|
return;
|
||
|
|
||
|
if (resched || did_preempt_short(cfs_rq, curr)) {
|
||
|
- resched_curr(rq);
|
||
|
+ resched_curr_lazy(rq);
|
||
|
clear_buddies(cfs_rq, curr);
|
||
|
}
|
||
|
}
|
||
|
@@ -5691,7 +5691,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||
|
* validating it and just reschedule.
|
||
|
*/
|
||
|
if (queued) {
|
||
|
- resched_curr(rq_of(cfs_rq));
|
||
|
+ resched_curr_lazy(rq_of(cfs_rq));
|
||
|
return;
|
||
|
}
|
||
|
/*
|
||
|
@@ -8855,7 +8855,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
|
||
|
return;
|
||
|
|
||
|
preempt:
|
||
|
- resched_curr(rq);
|
||
|
+ resched_curr_lazy(rq);
|
||
|
}
|
||
|
|
||
|
static struct task_struct *pick_task_fair(struct rq *rq)
|
||
|
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||
|
index e7fbb1d0f316..1f2b6a50c344 100644
|
||
|
--- a/kernel/sched/sched.h
|
||
|
+++ b/kernel/sched/sched.h
|
||
|
@@ -2700,6 +2700,7 @@ extern void init_sched_rt_class(void);
|
||
|
extern void init_sched_fair_class(void);
|
||
|
|
||
|
extern void resched_curr(struct rq *rq);
|
||
|
+extern void resched_curr_lazy(struct rq *rq);
|
||
|
extern void resched_cpu(int cpu);
|
||
|
|
||
|
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
||
|
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
|
||
|
index 2b64b3ec67d9..7466b8713076 100644
|
||
|
--- a/kernel/trace/trace.c
|
||
|
+++ b/kernel/trace/trace.c
|
||
|
@@ -2544,6 +2544,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
|
||
|
trace_flags |= TRACE_FLAG_NEED_RESCHED;
|
||
|
if (test_preempt_need_resched())
|
||
|
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
|
||
|
+ if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
|
||
|
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
|
||
|
return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
|
||
|
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
|
||
|
}
|
||
|
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
|
||
|
index 868f2f912f28..23ca2155306b 100644
|
||
|
--- a/kernel/trace/trace_output.c
|
||
|
+++ b/kernel/trace/trace_output.c
|
||
|
@@ -460,20 +460,32 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
||
|
(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
|
||
|
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
|
||
|
bh_off ? 'b' :
|
||
|
- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
|
||
|
+ !IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' :
|
||
|
'.';
|
||
|
|
||
|
- switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
|
||
|
+ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY |
|
||
|
TRACE_FLAG_PREEMPT_RESCHED)) {
|
||
|
+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
|
||
|
+ need_resched = 'B';
|
||
|
+ break;
|
||
|
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
|
||
|
need_resched = 'N';
|
||
|
break;
|
||
|
+ case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
|
||
|
+ need_resched = 'L';
|
||
|
+ break;
|
||
|
+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY:
|
||
|
+ need_resched = 'b';
|
||
|
+ break;
|
||
|
case TRACE_FLAG_NEED_RESCHED:
|
||
|
need_resched = 'n';
|
||
|
break;
|
||
|
case TRACE_FLAG_PREEMPT_RESCHED:
|
||
|
need_resched = 'p';
|
||
|
break;
|
||
|
+ case TRACE_FLAG_NEED_RESCHED_LAZY:
|
||
|
+ need_resched = 'l';
|
||
|
+ break;
|
||
|
default:
|
||
|
need_resched = '.';
|
||
|
break;
|
||
|
--
|
||
|
2.47.0
|
||
|
|