From 368db37d4b1b6aabf546b7a0213a46ec390e2f6b Mon Sep 17 00:00:00 2001 From: ferreo Date: Sun, 12 Jan 2025 15:26:26 +0100 Subject: [PATCH] Delete patches/0003-lazy.patch --- patches/0003-lazy.patch | 958 ---------------------------------------- 1 file changed, 958 deletions(-) delete mode 100644 patches/0003-lazy.patch diff --git a/patches/0003-lazy.patch b/patches/0003-lazy.patch deleted file mode 100644 index 7654052..0000000 --- a/patches/0003-lazy.patch +++ /dev/null @@ -1,958 +0,0 @@ -From 5ddf15cb65a8c14868cdc743474bd0a4fa9b586f Mon Sep 17 00:00:00 2001 -From: Eric Naim -Date: Fri, 13 Dec 2024 23:03:09 +0800 -Subject: [PATCH] preempt-lazy - -Signed-off-by: Eric Naim ---- - arch/x86/Kconfig | 1 + - arch/x86/include/asm/thread_info.h | 6 +- - include/linux/entry-common.h | 3 +- - include/linux/entry-kvm.h | 5 +- - include/linux/preempt.h | 8 +- - include/linux/rcupdate.h | 2 +- - include/linux/rcutree.h | 2 +- - include/linux/sched.h | 3 +- - include/linux/srcutiny.h | 2 +- - include/linux/thread_info.h | 21 +++++- - include/linux/trace_events.h | 8 +- - kernel/Kconfig.preempt | 25 ++++++- - kernel/entry/common.c | 2 +- - kernel/entry/kvm.c | 4 +- - kernel/rcu/Kconfig | 4 +- - kernel/rcu/srcutiny.c | 14 ++-- - kernel/rcu/tree_plugin.h | 22 ++++-- - kernel/sched/core.c | 116 +++++++++++++++++++++++++---- - kernel/sched/debug.c | 7 +- - kernel/sched/fair.c | 6 +- - kernel/sched/sched.h | 1 + - kernel/trace/trace.c | 2 + - kernel/trace/trace_osnoise.c | 32 ++++---- - kernel/trace/trace_output.c | 16 +++- - 24 files changed, 232 insertions(+), 80 deletions(-) - -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index f127d0f1024e..4b28c191ae31 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -93,6 +93,7 @@ config X86 - select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS - select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE - select ARCH_HAS_PMEM_API if X86_64 -+ select ARCH_HAS_PREEMPT_LAZY - select ARCH_HAS_PTE_DEVMAP if X86_64 - select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_HW_PTE_YOUNG -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index 12da7dfd5ef1..a55c214f3ba6 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -87,8 +87,9 @@ struct thread_info { - #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ --#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ --#define TIF_SSBD 5 /* Speculative store bypass disable */ -+#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */ -+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/ -+#define TIF_SSBD 6 /* Speculative store bypass disable */ - #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ - #define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ - #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ -@@ -110,6 +111,7 @@ struct thread_info { - #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) - #define _TIF_SSBD (1 << TIF_SSBD) - #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) -diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h -index 1e50cdb83ae5..fc61d0205c97 100644 ---- a/include/linux/entry-common.h -+++ b/include/linux/entry-common.h -@@ -64,7 +64,8 @@ - - #define EXIT_TO_USER_MODE_WORK \ - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ -- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ -+ _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ -+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ - ARCH_EXIT_TO_USER_MODE_WORK) - - /** -diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h -index 6813171afccb..16149f6625e4 100644 ---- a/include/linux/entry-kvm.h -+++ b/include/linux/entry-kvm.h -@@ -17,8 +17,9 @@ - #endif - - #define XFER_TO_GUEST_MODE_WORK \ -- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ -- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) -+ (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ -+ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ -+ ARCH_XFER_TO_GUEST_MODE_WORK) - - struct kvm_vcpu; - -diff --git a/include/linux/preempt.h b/include/linux/preempt.h -index ce76f1a45722..ca86235ac15c 100644 ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) - extern bool preempt_model_none(void); - extern bool preempt_model_voluntary(void); - extern bool preempt_model_full(void); -+extern bool preempt_model_lazy(void); - - #else - -@@ -502,6 +503,11 @@ static inline bool preempt_model_full(void) - return IS_ENABLED(CONFIG_PREEMPT); - } - -+static inline bool preempt_model_lazy(void) -+{ -+ return IS_ENABLED(CONFIG_PREEMPT_LAZY); -+} -+ - #endif - - static inline bool preempt_model_rt(void) -@@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void) - */ - static inline bool preempt_model_preemptible(void) - { -- return preempt_model_full() || preempt_model_rt(); -+ return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); - } - - #endif /* __LINUX_PREEMPT_H */ -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 48e5c03df1dd..257e9ae34414 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -95,9 +95,9 @@ static inline void __rcu_read_lock(void) - - static inline void __rcu_read_unlock(void) - { -- preempt_enable(); - if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) - rcu_read_unlock_strict(); -+ preempt_enable(); - } - - static inline int rcu_preempt_depth(void) -diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h -index 90a684f94776..ae8b5cb475a3 100644 ---- a/include/linux/rcutree.h -+++ b/include/linux/rcutree.h -@@ -104,7 +104,7 @@ extern int rcu_scheduler_active; - void rcu_end_inkernel_boot(void); - bool rcu_inkernel_boot_has_ended(void); - bool rcu_is_watching(void); --#ifndef CONFIG_PREEMPTION -+#ifndef CONFIG_PREEMPT_RCU - void rcu_all_qs(void); - #endif - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index bb343136ddd0..ade641760900 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk) - - static inline void clear_tsk_need_resched(struct task_struct *tsk) - { -- clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); -+ atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY, -+ (atomic_long_t *)&task_thread_info(tsk)->flags); - } - - static inline int test_tsk_need_resched(struct task_struct *tsk) -diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h -index 4d96bbdb45f0..1635c5e2662f 100644 ---- a/include/linux/srcutiny.h -+++ b/include/linux/srcutiny.h -@@ -64,7 +64,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) - { - int idx; - -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; - WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); - preempt_enable(); -diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h -index 9ea0b28068f4..cf2446c9c30d 100644 ---- a/include/linux/thread_info.h -+++ b/include/linux/thread_info.h -@@ -59,6 +59,14 @@ enum syscall_work_bit { - - #include - -+#ifndef TIF_NEED_RESCHED_LAZY -+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY -+#error Inconsistent PREEMPT_LAZY -+#endif -+#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED -+#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED -+#endif -+ - #ifdef __KERNEL__ - - #ifndef arch_set_restart_data -@@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti - - #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H - --static __always_inline bool tif_need_resched(void) -+static __always_inline bool tif_test_bit(int bit) - { -- return arch_test_bit(TIF_NEED_RESCHED, -+ return arch_test_bit(bit, - (unsigned long *)(¤t_thread_info()->flags)); - } - - #else - --static __always_inline bool tif_need_resched(void) -+static __always_inline bool tif_test_bit(int bit) - { -- return test_bit(TIF_NEED_RESCHED, -+ return test_bit(bit, - (unsigned long *)(¤t_thread_info()->flags)); - } - - #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ - -+static __always_inline bool tif_need_resched(void) -+{ -+ return tif_test_bit(TIF_NEED_RESCHED); -+} -+ - #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES - static inline int arch_within_stack_frames(const void * const stack, - const void * const stackend, -diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index 42bedcddd511..4cae6f258137 100644 ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -184,8 +184,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); - - enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, -- TRACE_FLAG_IRQS_NOSUPPORT = 0x02, -- TRACE_FLAG_NEED_RESCHED = 0x04, -+ TRACE_FLAG_NEED_RESCHED = 0x02, -+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, -@@ -211,11 +211,11 @@ static inline unsigned int tracing_gen_ctx(void) - - static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) - { -- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -+ return tracing_gen_ctx_irq_test(0); - } - static inline unsigned int tracing_gen_ctx(void) - { -- return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -+ return tracing_gen_ctx_irq_test(0); - } - #endif - -diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt -index fe782cd77388..7c1b29a3a491 100644 ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -11,12 +11,16 @@ config PREEMPT_BUILD - select PREEMPTION - select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK - -+config ARCH_HAS_PREEMPT_LAZY -+ bool -+ - choice - prompt "Preemption Model" - default PREEMPT_NONE - - config PREEMPT_NONE - bool "No Forced Preemption (Server)" -+ depends on !PREEMPT_RT - select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC - help - This is the traditional Linux preemption model, geared towards -@@ -32,6 +36,7 @@ config PREEMPT_NONE - config PREEMPT_VOLUNTARY - bool "Voluntary Kernel Preemption (Desktop)" - depends on !ARCH_NO_PREEMPT -+ depends on !PREEMPT_RT - select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC - help - This option reduces the latency of the kernel by adding more -@@ -51,7 +56,7 @@ config PREEMPT_VOLUNTARY - config PREEMPT - bool "Preemptible Kernel (Low-Latency Desktop)" - depends on !ARCH_NO_PREEMPT -- select PREEMPT_BUILD -+ select PREEMPT_BUILD if !PREEMPT_DYNAMIC - help - This option reduces the latency of the kernel by making - all kernel code (that is not executing in a critical section) -@@ -67,6 +72,20 @@ config PREEMPT - embedded system with latency requirements in the milliseconds - range. - -+config PREEMPT_LAZY -+ bool "Scheduler controlled preemption model" -+ depends on !ARCH_NO_PREEMPT -+ depends on ARCH_HAS_PREEMPT_LAZY -+ select PREEMPT_BUILD if !PREEMPT_DYNAMIC -+ help -+ This option provides a scheduler driven preemption model that -+ is fundamentally similar to full preemption, but is less -+ eager to preempt SCHED_NORMAL tasks in an attempt to -+ reduce lock holder preemption and recover some of the performance -+ gains seen from using Voluntary preemption. -+ -+endchoice -+ - config PREEMPT_RT - bool "Fully Preemptible Kernel (Real-Time)" - depends on EXPERT && ARCH_SUPPORTS_RT -@@ -84,8 +103,6 @@ config PREEMPT_RT - Select this if you are building a kernel for systems which - require real-time guarantees. - --endchoice -- - config PREEMPT_COUNT - bool - -@@ -95,7 +112,7 @@ config PREEMPTION - - config PREEMPT_DYNAMIC - bool "Preemption behaviour defined on boot" -- depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT -+ depends on HAVE_PREEMPT_DYNAMIC - select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY - select PREEMPT_BUILD - default y if HAVE_PREEMPT_DYNAMIC_CALL -diff --git a/kernel/entry/common.c b/kernel/entry/common.c -index 5b6934e23c21..e33691d5adf7 100644 ---- a/kernel/entry/common.c -+++ b/kernel/entry/common.c -@@ -98,7 +98,7 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, - - local_irq_enable_exit_to_user(ti_work); - -- if (ti_work & _TIF_NEED_RESCHED) -+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); - - if (ti_work & _TIF_UPROBE) -diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c -index 2e0f75bcb7fd..8485f63863af 100644 ---- a/kernel/entry/kvm.c -+++ b/kernel/entry/kvm.c -@@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) - return -EINTR; - } - -- if (ti_work & _TIF_NEED_RESCHED) -+ if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); - - if (ti_work & _TIF_NOTIFY_RESUME) -@@ -24,7 +24,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) - return ret; - - ti_work = read_thread_flags(); -- } while (ti_work & XFER_TO_GUEST_MODE_WORK || need_resched()); -+ } while (ti_work & XFER_TO_GUEST_MODE_WORK); - return 0; - } - -diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig -index 3e079de0f5b4..9d52f87fac27 100644 ---- a/kernel/rcu/Kconfig -+++ b/kernel/rcu/Kconfig -@@ -18,7 +18,7 @@ config TREE_RCU - - config PREEMPT_RCU - bool -- default y if PREEMPTION -+ default y if (PREEMPT || PREEMPT_RT || PREEMPT_DYNAMIC) - select TREE_RCU - help - This option selects the RCU implementation that is -@@ -91,7 +91,7 @@ config NEED_TASKS_RCU - - config TASKS_RCU - bool -- default NEED_TASKS_RCU && (PREEMPTION || PREEMPT_AUTO) -+ default NEED_TASKS_RCU && PREEMPTION - select IRQ_WORK - - config FORCE_TASKS_RUDE_RCU -diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c -index 4dcbf8aa80ff..f688bdad293e 100644 ---- a/kernel/rcu/srcutiny.c -+++ b/kernel/rcu/srcutiny.c -@@ -98,7 +98,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) - { - int newval; - -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; - WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); - preempt_enable(); -@@ -120,7 +120,7 @@ void srcu_drive_gp(struct work_struct *wp) - struct srcu_struct *ssp; - - ssp = container_of(wp, struct srcu_struct, srcu_work); -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) { - preempt_enable(); - return; /* Already running or nothing to do. */ -@@ -138,7 +138,7 @@ void srcu_drive_gp(struct work_struct *wp) - WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ - preempt_enable(); - swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ - WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); - preempt_enable(); -@@ -159,7 +159,7 @@ void srcu_drive_gp(struct work_struct *wp) - * at interrupt level, but the ->srcu_gp_running checks will - * straighten that out. - */ -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - WRITE_ONCE(ssp->srcu_gp_running, false); - idx = ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)); - preempt_enable(); -@@ -172,7 +172,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) - { - unsigned long cookie; - -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - cookie = get_state_synchronize_srcu(ssp); - if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) { - preempt_enable(); -@@ -199,7 +199,7 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, - - rhp->func = func; - rhp->next = NULL; -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - local_irq_save(flags); - *ssp->srcu_cb_tail = rhp; - ssp->srcu_cb_tail = &rhp->next; -@@ -261,7 +261,7 @@ unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) - { - unsigned long ret; - -- preempt_disable(); // Needed for PREEMPT_AUTO -+ preempt_disable(); // Needed for PREEMPT_LAZY - ret = get_state_synchronize_srcu(ssp); - srcu_gp_start_if_needed(ssp); - preempt_enable(); -diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h -index 1c7cbd145d5e..304e3405e6ec 100644 ---- a/kernel/rcu/tree_plugin.h -+++ b/kernel/rcu/tree_plugin.h -@@ -832,8 +832,17 @@ void rcu_read_unlock_strict(void) - { - struct rcu_data *rdp; - -- if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) -+ if (irqs_disabled() || in_atomic_preempt_off() || !rcu_state.gp_kthread) - return; -+ -+ /* -+ * rcu_report_qs_rdp() can only be invoked with a stable rdp and -+ * from the local CPU. -+ * -+ * The in_atomic_preempt_off() check ensures that we come here holding -+ * the last preempt_count (which will get dropped once we return to -+ * __rcu_read_unlock(). -+ */ - rdp = this_cpu_ptr(&rcu_data); - rdp->cpu_no_qs.b.norm = false; - rcu_report_qs_rdp(rdp); -@@ -974,13 +983,16 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) - */ - static void rcu_flavor_sched_clock_irq(int user) - { -- if (user || rcu_is_cpu_rrupt_from_idle()) { -+ if (user || rcu_is_cpu_rrupt_from_idle() || -+ (IS_ENABLED(CONFIG_PREEMPT_COUNT) && -+ (preempt_count() == HARDIRQ_OFFSET))) { - - /* - * Get here if this CPU took its interrupt from user -- * mode or from the idle loop, and if this is not a -- * nested interrupt. In this case, the CPU is in -- * a quiescent state, so note it. -+ * mode, from the idle loop without this being a nested -+ * interrupt, or while not holding the task preempt count -+ * (with PREEMPT_COUNT=y). In this case, the CPU is in a -+ * quiescent state, so note it. - * - * No memory barrier is required here because rcu_qs() - * references only CPU-local variables that other CPUs -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 76b27b2a9c56..e82948e247c1 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -941,10 +941,9 @@ static inline void hrtick_rq_init(struct rq *rq) - * this avoids any races wrt polling state changes and thereby avoids - * spurious IPIs. - */ --static inline bool set_nr_and_not_polling(struct task_struct *p) -+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif) - { -- struct thread_info *ti = task_thread_info(p); -- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); -+ return !(fetch_or(&ti->flags, 1 << tif) & _TIF_POLLING_NRFLAG); - } - - /* -@@ -969,9 +968,9 @@ static bool set_nr_if_polling(struct task_struct *p) - } - - #else --static inline bool set_nr_and_not_polling(struct task_struct *p) -+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif) - { -- set_tsk_need_resched(p); -+ atomic_long_or(1 << tif, (atomic_long_t *)&ti->flags); - return true; - } - -@@ -1076,28 +1075,66 @@ void wake_up_q(struct wake_q_head *head) - * might also involve a cross-CPU call to trigger the scheduler on - * the target CPU. - */ --void resched_curr(struct rq *rq) -+static void __resched_curr(struct rq *rq, int tif) - { - struct task_struct *curr = rq->curr; -+ struct thread_info *cti = task_thread_info(curr); - int cpu; - - lockdep_assert_rq_held(rq); - -- if (test_tsk_need_resched(curr)) -+ if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY) -+ tif = TIF_NEED_RESCHED; -+ -+ if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED)) - return; - - cpu = cpu_of(rq); - - if (cpu == smp_processor_id()) { -- set_tsk_need_resched(curr); -- set_preempt_need_resched(); -+ set_ti_thread_flag(cti, tif); -+ if (tif == TIF_NEED_RESCHED) -+ set_preempt_need_resched(); - return; - } - -- if (set_nr_and_not_polling(curr)) -- smp_send_reschedule(cpu); -- else -+ if (set_nr_and_not_polling(cti, tif)) { -+ if (tif == TIF_NEED_RESCHED) -+ smp_send_reschedule(cpu); -+ } else { - trace_sched_wake_idle_without_ipi(cpu); -+ } -+} -+ -+void resched_curr(struct rq *rq) -+{ -+ __resched_curr(rq, TIF_NEED_RESCHED); -+} -+ -+#ifdef CONFIG_PREEMPT_DYNAMIC -+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy); -+static __always_inline bool dynamic_preempt_lazy(void) -+{ -+ return static_branch_unlikely(&sk_dynamic_preempt_lazy); -+} -+#else -+static __always_inline bool dynamic_preempt_lazy(void) -+{ -+ return IS_ENABLED(CONFIG_PREEMPT_LAZY); -+} -+#endif -+ -+static __always_inline int tif_need_resched_lazy(void) -+{ -+ if (dynamic_preempt_lazy()) -+ return TIF_NEED_RESCHED_LAZY; -+ -+ return TIF_NEED_RESCHED; -+} -+ -+void resched_curr_lazy(struct rq *rq) -+{ -+ __resched_curr(rq, tif_need_resched_lazy()); - } - - void resched_cpu(int cpu) -@@ -1192,7 +1229,7 @@ static void wake_up_idle_cpu(int cpu) - * and testing of the above solutions didn't appear to report - * much benefits. - */ -- if (set_nr_and_not_polling(rq->idle)) -+ if (set_nr_and_not_polling(task_thread_info(rq->idle), TIF_NEED_RESCHED)) - smp_send_reschedule(cpu); - else - trace_sched_wake_idle_without_ipi(cpu); -@@ -5604,6 +5641,10 @@ void sched_tick(void) - update_rq_clock(rq); - hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); - update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); -+ -+ if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY)) -+ resched_curr(rq); -+ - curr->sched_class->task_tick(rq, curr, 0); - if (sched_feat(LATENCY_WARN)) - resched_latency = cpu_resched_latency(rq); -@@ -7219,7 +7260,7 @@ int __sched __cond_resched(void) - return 1; - } - /* -- * In preemptible kernels, ->rcu_read_lock_nesting tells the tick -+ * In PREEMPT_RCU kernels, ->rcu_read_lock_nesting tells the tick - * whether the current CPU is in an RCU read-side critical section, - * so the tick can report quiescent states even for CPUs looping - * in kernel context. In contrast, in non-preemptible kernels, -@@ -7228,6 +7269,8 @@ int __sched __cond_resched(void) - * RCU quiescent state. Therefore, the following code causes - * cond_resched() to report a quiescent state, but only when RCU - * is in urgent need of one. -+ * A third case, preemptible, but non-PREEMPT_RCU provides for -+ * urgently needed quiescent states via rcu_flavor_sched_clock_irq(). - */ - #ifndef CONFIG_PREEMPT_RCU - rcu_all_qs(); -@@ -7352,6 +7395,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); - * preempt_schedule <- NOP - * preempt_schedule_notrace <- NOP - * irqentry_exit_cond_resched <- NOP -+ * dynamic_preempt_lazy <- false - * - * VOLUNTARY: - * cond_resched <- __cond_resched -@@ -7359,6 +7403,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); - * preempt_schedule <- NOP - * preempt_schedule_notrace <- NOP - * irqentry_exit_cond_resched <- NOP -+ * dynamic_preempt_lazy <- false - * - * FULL: - * cond_resched <- RET0 -@@ -7366,6 +7411,15 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); - * preempt_schedule <- preempt_schedule - * preempt_schedule_notrace <- preempt_schedule_notrace - * irqentry_exit_cond_resched <- irqentry_exit_cond_resched -+ * dynamic_preempt_lazy <- false -+ * -+ * LAZY: -+ * cond_resched <- RET0 -+ * might_resched <- RET0 -+ * preempt_schedule <- preempt_schedule -+ * preempt_schedule_notrace <- preempt_schedule_notrace -+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched -+ * dynamic_preempt_lazy <- true - */ - - enum { -@@ -7373,30 +7427,41 @@ enum { - preempt_dynamic_none, - preempt_dynamic_voluntary, - preempt_dynamic_full, -+ preempt_dynamic_lazy, - }; - - int preempt_dynamic_mode = preempt_dynamic_undefined; - - int sched_dynamic_mode(const char *str) - { -+#ifndef CONFIG_PREEMPT_RT - if (!strcmp(str, "none")) - return preempt_dynamic_none; - - if (!strcmp(str, "voluntary")) - return preempt_dynamic_voluntary; -+#endif - - if (!strcmp(str, "full")) - return preempt_dynamic_full; - -+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY -+ if (!strcmp(str, "lazy")) -+ return preempt_dynamic_lazy; -+#endif -+ - return -EINVAL; - } - -+#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key) -+#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key) -+ - #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) - #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) - #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) --#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) --#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) -+#define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f) -+#define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f) - #else - #error "Unsupported PREEMPT_DYNAMIC mechanism" - #endif -@@ -7416,6 +7481,7 @@ static void __sched_dynamic_update(int mode) - preempt_dynamic_enable(preempt_schedule); - preempt_dynamic_enable(preempt_schedule_notrace); - preempt_dynamic_enable(irqentry_exit_cond_resched); -+ preempt_dynamic_key_disable(preempt_lazy); - - switch (mode) { - case preempt_dynamic_none: -@@ -7425,6 +7491,7 @@ static void __sched_dynamic_update(int mode) - preempt_dynamic_disable(preempt_schedule); - preempt_dynamic_disable(preempt_schedule_notrace); - preempt_dynamic_disable(irqentry_exit_cond_resched); -+ preempt_dynamic_key_disable(preempt_lazy); - if (mode != preempt_dynamic_mode) - pr_info("Dynamic Preempt: none\n"); - break; -@@ -7436,6 +7503,7 @@ static void __sched_dynamic_update(int mode) - preempt_dynamic_disable(preempt_schedule); - preempt_dynamic_disable(preempt_schedule_notrace); - preempt_dynamic_disable(irqentry_exit_cond_resched); -+ preempt_dynamic_key_disable(preempt_lazy); - if (mode != preempt_dynamic_mode) - pr_info("Dynamic Preempt: voluntary\n"); - break; -@@ -7447,9 +7515,22 @@ static void __sched_dynamic_update(int mode) - preempt_dynamic_enable(preempt_schedule); - preempt_dynamic_enable(preempt_schedule_notrace); - preempt_dynamic_enable(irqentry_exit_cond_resched); -+ preempt_dynamic_key_disable(preempt_lazy); - if (mode != preempt_dynamic_mode) - pr_info("Dynamic Preempt: full\n"); - break; -+ -+ case preempt_dynamic_lazy: -+ if (!klp_override) -+ preempt_dynamic_disable(cond_resched); -+ preempt_dynamic_disable(might_resched); -+ preempt_dynamic_enable(preempt_schedule); -+ preempt_dynamic_enable(preempt_schedule_notrace); -+ preempt_dynamic_enable(irqentry_exit_cond_resched); -+ preempt_dynamic_key_enable(preempt_lazy); -+ if (mode != preempt_dynamic_mode) -+ pr_info("Dynamic Preempt: lazy\n"); -+ break; - } - - preempt_dynamic_mode = mode; -@@ -7512,6 +7593,8 @@ static void __init preempt_dynamic_init(void) - sched_dynamic_update(preempt_dynamic_none); - } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { - sched_dynamic_update(preempt_dynamic_voluntary); -+ } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) { -+ sched_dynamic_update(preempt_dynamic_lazy); - } else { - /* Default static call setting, nothing to do */ - WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); -@@ -7532,6 +7615,7 @@ static void __init preempt_dynamic_init(void) - PREEMPT_MODEL_ACCESSOR(none); - PREEMPT_MODEL_ACCESSOR(voluntary); - PREEMPT_MODEL_ACCESSOR(full); -+PREEMPT_MODEL_ACCESSOR(lazy); - - #else /* !CONFIG_PREEMPT_DYNAMIC: */ - -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index f4035c7a0fa1..a48b2a701ec2 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -245,11 +245,12 @@ static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, - static int sched_dynamic_show(struct seq_file *m, void *v) - { - static const char * preempt_modes[] = { -- "none", "voluntary", "full" -+ "none", "voluntary", "full", "lazy", - }; -- int i; -+ int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY); -+ int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2; - -- for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { -+ for (; i < j; i++) { - if (preempt_dynamic_mode == i) - seq_puts(m, "("); - seq_puts(m, preempt_modes[i]); -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 54e7c4c3e2c5..10e9484d1d43 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -1264,7 +1264,7 @@ static void update_curr(struct cfs_rq *cfs_rq) - return; - - if (resched || did_preempt_short(cfs_rq, curr)) { -- resched_curr(rq); -+ resched_curr_lazy(rq); - clear_buddies(cfs_rq, curr); - } - } -@@ -5691,7 +5691,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) - * validating it and just reschedule. - */ - if (queued) { -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - return; - } - /* -@@ -8855,7 +8855,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int - return; - - preempt: -- resched_curr(rq); -+ resched_curr_lazy(rq); - } - - static struct task_struct *pick_task_fair(struct rq *rq) -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index c5d6012794de..b5f3890f3050 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2696,6 +2696,7 @@ extern void init_sched_rt_class(void); - extern void init_sched_fair_class(void); - - extern void resched_curr(struct rq *rq); -+extern void resched_curr_lazy(struct rq *rq); - extern void resched_cpu(int cpu); - - extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); -diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index 6a891e00aa7f..acbed0ffe083 100644 ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -2563,6 +2563,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) - trace_flags |= TRACE_FLAG_NEED_RESCHED; - if (test_preempt_need_resched()) - trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; -+ if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) -+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; - return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | - (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; - } -diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c -index a50ed23bee77..4a9087112526 100644 ---- a/kernel/trace/trace_osnoise.c -+++ b/kernel/trace/trace_osnoise.c -@@ -1537,27 +1537,25 @@ static int run_osnoise(void) - - /* - * In some cases, notably when running on a nohz_full CPU with -- * a stopped tick PREEMPT_RCU has no way to account for QSs. -- * This will eventually cause unwarranted noise as PREEMPT_RCU -- * will force preemption as the means of ending the current -- * grace period. We avoid this problem by calling -- * rcu_momentary_eqs(), which performs a zero duration -- * EQS allowing PREEMPT_RCU to end the current grace period. -- * This call shouldn't be wrapped inside an RCU critical -- * section. -+ * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to -+ * account for QSs. This will eventually cause unwarranted -+ * noise as RCU forces preemption as the means of ending the -+ * current grace period. We avoid this by calling -+ * rcu_momentary_eqs(), which performs a zero duration EQS -+ * allowing RCU to end the current grace period. This call -+ * shouldn't be wrapped inside an RCU critical section. - * -- * Note that in non PREEMPT_RCU kernels QSs are handled through -- * cond_resched() -+ * Normally QSs for other cases are handled through cond_resched(). -+ * For simplicity, however, we call rcu_momentary_eqs() for all -+ * configurations here. - */ -- if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -- if (!disable_irq) -- local_irq_disable(); -+ if (!disable_irq) -+ local_irq_disable(); - -- rcu_momentary_eqs(); -+ rcu_momentary_eqs(); - -- if (!disable_irq) -- local_irq_enable(); -- } -+ if (!disable_irq) -+ local_irq_enable(); - - /* - * For the non-preemptive kernel config: let threads runs, if -diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c -index 868f2f912f28..23ca2155306b 100644 ---- a/kernel/trace/trace_output.c -+++ b/kernel/trace/trace_output.c -@@ -460,20 +460,32 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) - (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - bh_off ? 'b' : -- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : -+ !IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' : - '.'; - -- switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | -+ switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | - TRACE_FLAG_PREEMPT_RESCHED)) { -+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: -+ need_resched = 'B'; -+ break; - case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'N'; - break; -+ case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED: -+ need_resched = 'L'; -+ break; -+ case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY: -+ need_resched = 'b'; -+ break; - case TRACE_FLAG_NEED_RESCHED: - need_resched = 'n'; - break; - case TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'p'; - break; -+ case TRACE_FLAG_NEED_RESCHED_LAZY: -+ need_resched = 'l'; -+ break; - default: - need_resched = '.'; - break; --- -2.47.1 -