From 6e71e0f1db613dd1c78aa1af75028c8b89e5c3c1 Mon Sep 17 00:00:00 2001 From: ferrreo Date: Wed, 26 Jul 2023 10:45:05 +0100 Subject: [PATCH] 6.4.6 --- patches/0001-cachy-all.patch | 2452 +++++++++++++++++++++++++++++++--- patches/0002-eevdf.patch | 80 +- patches/0002-eevdfbore.patch | 193 ++- patches/0003-bcachefs.patch | 1340 ++++++++++--------- scripts/build.sh | 2 +- scripts/source.sh | 6 +- 6 files changed, 3161 insertions(+), 912 deletions(-) diff --git a/patches/0001-cachy-all.patch b/patches/0001-cachy-all.patch index ee56662..1f68942 100644 --- a/patches/0001-cachy-all.patch +++ b/patches/0001-cachy-all.patch @@ -1,7 +1,7 @@ -From a2168c50c2c846ad624b028bbca121f11b732a95 Mon Sep 17 00:00:00 2001 +From fa0fa964d3d7500eedd2d76639075887583968bc Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Tue, 11 Jul 2023 19:24:11 +0200 -Subject: [PATCH 1/7] bbr2 +Subject: [PATCH 1/8] bbr2 Signed-off-by: Peter Jung --- @@ -3037,7 +3037,7 @@ index 1b34050a7538..66d40449b3f4 100644 icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index bf8b22218dd4..3ae56b0676a8 100644 +index 57f1e4883b76..3879af9b5b69 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) @@ -3089,7 +3089,7 @@ index bf8b22218dd4..3ae56b0676a8 100644 /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep -@@ -3813,6 +3829,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -3819,6 +3835,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); @@ -3097,7 +3097,7 @@ index bf8b22218dd4..3ae56b0676a8 100644 /* ts_recent update must be made after we are sure that the packet * is in window. -@@ -3911,6 +3928,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -3917,6 +3934,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); @@ -3105,7 +3105,7 @@ index bf8b22218dd4..3ae56b0676a8 100644 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); -@@ -5521,13 +5539,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) +@@ -5527,13 +5545,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && @@ -3283,10 +3283,10 @@ index 39eb947fe392..61ab4ee55b22 100644 -- 2.41.0 -From 583c46f67e8db3fb6478523ff297ab3f469186ba Mon Sep 17 00:00:00 2001 +From 3af499ab9158feb0393935535517c2a38f6017ed Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:24:37 +0200 -Subject: [PATCH 2/7] cachy +Date: Wed, 19 Jul 2023 18:48:44 +0200 +Subject: [PATCH 2/8] cachy Signed-off-by: Peter Jung --- @@ -3381,7 +3381,7 @@ index 9e5bab29685f..794e7a91219a 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Makefile b/Makefile -index 56abbcac061d..c7cd86bb99e4 100644 +index c324529158cc..e83e1d3420f9 100644 --- a/Makefile +++ b/Makefile @@ -818,6 +818,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) @@ -4505,10 +4505,10 @@ index 00476e94db90..c3a219218fac 100644 This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index 2548ec92faa2..381ddedcf693 100644 +index f29182512b98..873e46f8c459 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -3485,6 +3485,8 @@ static int __init intel_pstate_setup(char *str) +@@ -3487,6 +3487,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; @@ -5720,7 +5720,7 @@ index 000000000000..e105e6f5cc91 +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index c525867760bf..9232b0fcb945 100644 +index b7c65193e786..1b55bec81999 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3625,6 +3625,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) @@ -5830,7 +5830,7 @@ index c525867760bf..9232b0fcb945 100644 /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. -@@ -5015,6 +5115,7 @@ static const struct pci_dev_acs_enabled { +@@ -5017,6 +5117,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, @@ -9398,7 +9398,7 @@ index 8103ffd217e9..f405763e06ae 100644 if (err) goto bad_unshare_out; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 373ff5f55884..9671df93d1f5 100644 +index 4da5f3541762..6742b1e1a359 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -69,9 +69,13 @@ @@ -9519,7 +9519,7 @@ index e3454087fd31..bc617f00f97c 100644 # diff --git a/mm/page-writeback.c b/mm/page-writeback.c -index db7943999007..97a00017793a 100644 +index 6faa09f1783b..bc1f17fd658e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; @@ -9584,7 +9584,7 @@ index b52644771cc4..11a4b0e3b583 100644 /* diff --git a/mm/vmscan.c b/mm/vmscan.c -index 5bf98d0a22c9..28f6d5cd362e 100644 +index 6114a1fc6c68..6aa2d0a0b1d6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -186,7 +186,11 @@ struct scan_control { @@ -9614,10 +9614,10 @@ index 5bf98d0a22c9..28f6d5cd362e 100644 -- 2.41.0 -From 3a68ae439de252da49e718998385b91b69809642 Mon Sep 17 00:00:00 2001 +From 9be3158c9f2e0568c8f532aebd5a19cde1aaa806 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:25:14 +0200 -Subject: [PATCH 3/7] fixes +Date: Wed, 19 Jul 2023 18:49:32 +0200 +Subject: [PATCH 3/8] fixes Signed-off-by: Peter Jung --- @@ -9625,7 +9625,7 @@ Signed-off-by: Peter Jung .../testing/sysfs-class-led-trigger-blkdev | 78 ++ Documentation/leds/index.rst | 1 + Documentation/leds/ledtrig-blkdev.rst | 158 +++ - drivers/bluetooth/btusb.c | 2 +- + drivers/bluetooth/btusb.c | 3 +- drivers/leds/trigger/Kconfig | 9 + drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-blkdev.c | 1221 +++++++++++++++++ @@ -9634,11 +9634,9 @@ Signed-off-by: Peter Jung lib/decompress_inflate.c | 2 +- lib/decompress_unxz.c | 2 + lib/decompress_unzstd.c | 2 + - mm/mmap.c | 9 +- - net/netfilter/nf_tables_api.c | 2 + scripts/Makefile.vmlinux_o | 2 +- sound/pci/hda/cs35l41_hda.c | 2 +- - 17 files changed, 1496 insertions(+), 11 deletions(-) + 15 files changed, 1490 insertions(+), 7 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-led-trigger-blkdev create mode 100644 Documentation/leds/ledtrig-blkdev.rst create mode 100644 drivers/leds/trigger/ledtrig-blkdev.c @@ -9925,7 +9923,7 @@ index 000000000000..9ff5b99de451 + A device can be associated with multiple LEDs, and an LED can be associated + with multiple devices. diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 2a8e2bb038f5..c13dd79720f7 100644 +index 2a8e2bb038f5..15b79f558a16 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -940,7 +940,7 @@ static void btusb_qca_cmd_timeout(struct hci_dev *hdev) @@ -9937,6 +9935,14 @@ index 2a8e2bb038f5..c13dd79720f7 100644 gpiod_set_value_cansleep(reset_gpio, 1); return; +@@ -4099,6 +4099,7 @@ static int btusb_probe(struct usb_interface *intf, + BT_DBG("intf %p id %p", intf, id); + + if ((id->driver_info & BTUSB_IFNUM_2) && ++ (intf->cur_altsetting->desc.bInterfaceNumber != 0) && + (intf->cur_altsetting->desc.bInterfaceNumber != 2)) + return -ENODEV; + diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index 2a57328eca20..05e80cfd0ed8 100644 --- a/drivers/leds/trigger/Kconfig @@ -11265,49 +11271,6 @@ index a512b99ae16a..bba2c0bb10cb 100644 #endif #include -diff --git a/mm/mmap.c b/mm/mmap.c -index 30bf7772d4ac..5c5a917b261e 100644 ---- a/mm/mmap.c -+++ b/mm/mmap.c -@@ -2480,7 +2480,8 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, - } - vma_start_write(next); - mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); -- if (mas_store_gfp(&mas_detach, next, GFP_KERNEL)) -+ error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); -+ if (error) - goto munmap_gather_failed; - vma_mark_detached(next, true); - if (next->vm_flags & VM_LOCKED) -@@ -2529,12 +2530,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, - BUG_ON(count != test_count); - } - #endif -- /* Point of no return */ -- error = -ENOMEM; - vma_iter_set(vmi, start); -- if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL)) -+ error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); -+ if (error) - goto clear_tree_failed; - -+ /* Point of no return */ - mm->locked_vm -= locked_vm; - mm->map_count -= count; - /* -diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c -index 4c7937fd803f..1d64c163076a 100644 ---- a/net/netfilter/nf_tables_api.c -+++ b/net/netfilter/nf_tables_api.c -@@ -5343,6 +5343,8 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, - nft_set_trans_unbind(ctx, set); - if (nft_set_is_anonymous(set)) - nft_deactivate_next(ctx->net, set); -+ else -+ list_del_rcu(&binding->list); - - set->use--; - break; diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0edfdb40364b..ae52d3b3f063 100644 --- a/scripts/Makefile.vmlinux_o @@ -11337,10 +11300,2199 @@ index b5210abb5141..4d8936e1f769 100644 -- 2.41.0 -From cf10d13e4dc6b189366dd15713486e50d71aa718 Mon Sep 17 00:00:00 2001 +From b26229592714772a1f76c9b86f0651568efdc2c9 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:25:28 +0200 -Subject: [PATCH 4/7] ksm +Date: Wed, 19 Jul 2023 18:52:10 +0200 +Subject: [PATCH 4/8] HDR + +Signed-off-by: Peter Jung +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 125 ++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 69 ++ + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 110 +++- + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 612 ++++++++++++++++-- + .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 72 ++- + .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 213 +++++- + drivers/gpu/drm/amd/display/dc/core/dc.c | 49 +- + drivers/gpu/drm/amd/display/dc/dc.h | 8 + + .../amd/display/dc/dcn10/dcn10_cm_common.c | 109 +++- + .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 5 +- + .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 9 +- + .../amd/display/dc/dcn301/dcn301_resource.c | 26 +- + .../gpu/drm/amd/display/include/fixed31_32.h | 12 + + drivers/gpu/drm/arm/malidp_crtc.c | 2 +- + drivers/gpu/drm/drm_atomic.c | 1 + + drivers/gpu/drm/drm_atomic_state_helper.c | 1 + + drivers/gpu/drm/drm_atomic_uapi.c | 43 +- + drivers/gpu/drm/drm_property.c | 49 ++ + include/drm/drm_mode_object.h | 2 +- + include/drm/drm_plane.h | 7 + + include/drm/drm_property.h | 6 + + 22 files changed, 1417 insertions(+), 141 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +index d60fe7eb5579..708866da7863 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +@@ -1248,6 +1248,127 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, + return &amdgpu_fb->base; + } + ++static const struct drm_prop_enum_list drm_transfer_function_enum_list[] = { ++ { DRM_TRANSFER_FUNCTION_DEFAULT, "Default" }, ++ { DRM_TRANSFER_FUNCTION_SRGB, "sRGB" }, ++ { DRM_TRANSFER_FUNCTION_BT709, "BT.709" }, ++ { DRM_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, ++ { DRM_TRANSFER_FUNCTION_LINEAR, "Linear" }, ++ { DRM_TRANSFER_FUNCTION_UNITY, "Unity" }, ++ { DRM_TRANSFER_FUNCTION_HLG, "HLG (Hybrid Log Gamma)" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, ++}; ++ ++#ifdef AMD_PRIVATE_COLOR ++static int ++amdgpu_display_create_color_properties(struct amdgpu_device *adev) ++{ ++ struct drm_property *prop; ++ ++ prop = drm_property_create_enum(adev_to_drm(adev), ++ DRM_MODE_PROP_ENUM, ++ "AMD_REGAMMA_TF", ++ drm_transfer_function_enum_list, ++ ARRAY_SIZE(drm_transfer_function_enum_list)); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.regamma_tf_property = prop; ++ ++ prop = drm_property_create(adev_to_drm(adev), ++ DRM_MODE_PROP_BLOB, ++ "AMD_PLANE_DEGAMMA_LUT", 0); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_degamma_lut_property = prop; ++ ++ prop = drm_property_create_range(adev_to_drm(adev), ++ DRM_MODE_PROP_IMMUTABLE, ++ "AMD_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_degamma_lut_size_property = prop; ++ ++ prop = drm_property_create_enum(adev_to_drm(adev), ++ DRM_MODE_PROP_ENUM, ++ "AMD_PLANE_DEGAMMA_TF", ++ drm_transfer_function_enum_list, ++ ARRAY_SIZE(drm_transfer_function_enum_list)); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_degamma_tf_property = prop; ++ ++ prop = drm_property_create_range(adev_to_drm(adev), ++ 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_hdr_mult_property = prop; ++ ++ prop = drm_property_create(adev_to_drm(adev), ++ DRM_MODE_PROP_BLOB, ++ "AMD_PLANE_SHAPER_LUT", 0); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_shaper_lut_property = prop; ++ ++ prop = drm_property_create_range(adev_to_drm(adev), ++ DRM_MODE_PROP_IMMUTABLE, ++ "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_shaper_lut_size_property = prop; ++ ++ prop = drm_property_create_enum(adev_to_drm(adev), ++ DRM_MODE_PROP_ENUM, ++ "AMD_PLANE_SHAPER_TF", ++ drm_transfer_function_enum_list, ++ ARRAY_SIZE(drm_transfer_function_enum_list)); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_shaper_tf_property = prop; ++ ++ prop = drm_property_create(adev_to_drm(adev), ++ DRM_MODE_PROP_BLOB, ++ "AMD_PLANE_LUT3D", 0); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_lut3d_property = prop; ++ ++ prop = drm_property_create_range(adev_to_drm(adev), ++ DRM_MODE_PROP_IMMUTABLE, ++ "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_lut3d_size_property = prop; ++ ++ prop = drm_property_create(adev_to_drm(adev), ++ DRM_MODE_PROP_BLOB, ++ "AMD_PLANE_BLEND_LUT", 0); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_blend_lut_property = prop; ++ ++ prop = drm_property_create_range(adev_to_drm(adev), ++ DRM_MODE_PROP_IMMUTABLE, ++ "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_blend_lut_size_property = prop; ++ ++ prop = drm_property_create_enum(adev_to_drm(adev), ++ DRM_MODE_PROP_ENUM, ++ "AMD_PLANE_BLEND_TF", ++ drm_transfer_function_enum_list, ++ ARRAY_SIZE(drm_transfer_function_enum_list)); ++ if (!prop) ++ return -ENOMEM; ++ adev->mode_info.plane_blend_tf_property = prop; ++ ++ return 0; ++} ++#endif ++ + const struct drm_mode_config_funcs amdgpu_mode_funcs = { + .fb_create = amdgpu_display_user_framebuffer_create, + }; +@@ -1324,6 +1445,10 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) + return -ENOMEM; + } + ++#ifdef AMD_PRIVATE_COLOR ++ if (amdgpu_display_create_color_properties(adev)) ++ return -ENOMEM; ++#endif + return 0; + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +index 32fe05c810c6..34291cd134a1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +@@ -343,6 +343,75 @@ struct amdgpu_mode_info { + int disp_priority; + const struct amdgpu_display_funcs *funcs; + const enum drm_plane_type *plane_type; ++ ++ /* Driver-private color mgmt props */ ++ ++ /* @regamma_tf_property: Transfer function for CRTC regamma ++ * (post-blending). Possible values are defined by `enum ++ * drm_transfer_function`. ++ */ ++ struct drm_property *regamma_tf_property; ++ /* @plane_degamma_lut_property: Plane property to set a degamma LUT to ++ * convert color space before blending. ++ */ ++ struct drm_property *plane_degamma_lut_property; ++ /* @plane_degamma_lut_size_property: Plane property to define the max ++ * size of degamma LUT as supported by the driver (read-only). ++ */ ++ struct drm_property *plane_degamma_lut_size_property; ++ /** ++ * @plane_degamma_tf_property: Predefined transfer function to ++ * linearize content with or without LUT. ++ */ ++ struct drm_property *plane_degamma_tf_property; ++ /** ++ * @plane_hdr_mult_property: ++ */ ++ struct drm_property *plane_hdr_mult_property; ++ /** ++ * @shaper_lut_property: Plane property to set pre-blending shaper LUT ++ * that converts color content before 3D LUT. ++ */ ++ struct drm_property *plane_shaper_lut_property; ++ /** ++ * @shaper_lut_size_property: Plane property for the size of ++ * pre-blending shaper LUT as supported by the driver (read-only). ++ */ ++ struct drm_property *plane_shaper_lut_size_property; ++ /** ++ * @plane_shaper_tf_property: Plane property to set a predefined ++ * transfer function for pre-blending shaper (before applying 3D LUT) ++ * with or without LUT. ++ */ ++ struct drm_property *plane_shaper_tf_property; ++ /** ++ * @plane_lut3d_property: Plane property for gamma correction using a ++ * 3D LUT (pre-blending). ++ */ ++ struct drm_property *plane_lut3d_property; ++ /** ++ * @plane_degamma_lut_size_property: Plane property to define the max ++ * size of 3D LUT as supported by the driver (read-only). ++ */ ++ struct drm_property *plane_lut3d_size_property; ++ /** ++ * @plane_blend_lut_property: Plane property for output gamma before ++ * blending. Userspace set a blend LUT to convert colors after 3D LUT ++ * conversion. It works as a post-3D LUT 1D LUT, with shaper LUT, they ++ * are sandwiching 3D LUT with two 1D LUT. ++ */ ++ struct drm_property *plane_blend_lut_property; ++ /** ++ * @plane_blend_lut_size_property: Plane property to define the max ++ * size of blend LUT as supported by the driver (read-only). ++ */ ++ struct drm_property *plane_blend_lut_size_property; ++ /** ++ * @plane_blend_tf_property: Plane property to set a predefined ++ * transfer function for pre-blending blend (before applying 3D LUT) ++ * with or without LUT. ++ */ ++ struct drm_property *plane_blend_tf_property; + }; + + #define AMDGPU_MAX_BL_LEVEL 0xFF +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 44f4c7441974..0e6bae54f6fe 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -5046,7 +5046,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, + * Always set input transfer function, since plane state is refreshed + * every time. + */ +- ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); ++ ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, ++ plane_state, ++ dc_plane_state); + if (ret) + return ret; + +@@ -7993,6 +7995,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, + bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; + bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; + bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; ++ bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; ++ bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; ++ bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; ++ bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf; + } + + amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, +@@ -8194,6 +8200,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, + &acrtc_state->stream->csc_color_matrix; + bundle->stream_update.out_transfer_func = + acrtc_state->stream->out_transfer_func; ++ bundle->stream_update.lut3d_func = ++ (struct dc_3dlut *) acrtc_state->stream->lut3d_func; ++ bundle->stream_update.func_shaper = ++ (struct dc_transfer_func *) acrtc_state->stream->func_shaper; + } + + acrtc_state->stream->abm_level = acrtc_state->abm_level; +@@ -9384,7 +9394,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, + */ + if (dm_new_crtc_state->base.color_mgmt_changed || + drm_atomic_crtc_needs_modeset(new_crtc_state)) { +- ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); ++ if (!dm_state) { ++ ret = dm_atomic_get_state(state, &dm_state); ++ if (ret) ++ goto fail; ++ } ++ ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state, dm_state->context); + if (ret) + goto fail; + } +@@ -9440,6 +9455,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, + if (drm_atomic_crtc_needs_modeset(new_crtc_state)) + return true; + ++ if (new_plane_state->color_mgmt_changed) ++ return true; ++ + /* + * If there are any new primary or overlay planes being added or + * removed then the z-order can potentially change. To ensure +@@ -9945,6 +9963,12 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, + goto fail; + } + ++ ret = amdgpu_dm_verify_lut3d_size(adev, new_crtc_state); ++ if (ret) { ++ drm_dbg_driver(dev, "amdgpu_dm_verify_lut_sizes() failed\n"); ++ goto fail; ++ } ++ + if (!new_crtc_state->enable) + continue; + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +index 2e2413fd73a4..bf4a1d6be99e 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +@@ -51,6 +51,7 @@ + + #define AMDGPU_DMUB_NOTIFICATION_MAX 5 + ++#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL) + /* + #include "include/amdgpu_dal_power_if.h" + #include "amdgpu_dm_irq.h" +@@ -699,9 +700,78 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status, + + extern const struct amdgpu_ip_block_version dm_ip_block; + ++enum drm_transfer_function { ++ DRM_TRANSFER_FUNCTION_DEFAULT, ++ DRM_TRANSFER_FUNCTION_SRGB, ++ DRM_TRANSFER_FUNCTION_BT709, ++ DRM_TRANSFER_FUNCTION_PQ, ++ DRM_TRANSFER_FUNCTION_LINEAR, ++ DRM_TRANSFER_FUNCTION_UNITY, ++ DRM_TRANSFER_FUNCTION_HLG, ++ DRM_TRANSFER_FUNCTION_GAMMA22, ++ DRM_TRANSFER_FUNCTION_GAMMA24, ++ DRM_TRANSFER_FUNCTION_GAMMA26, ++ DRM_TRANSFER_FUNCTION_MAX, ++}; ++ + struct dm_plane_state { + struct drm_plane_state base; + struct dc_plane_state *dc_state; ++ ++ /* Plane color mgmt */ ++ /** ++ * @degamma_lut: ++ * ++ * LUT for converting plane pixel data before going into plane merger. ++ * The blob (if not NULL) is an array of &struct drm_color_lut. ++ */ ++ struct drm_property_blob *degamma_lut; ++ /** ++ * @degamma_tf: ++ * ++ * Predefined transfer function to tell DC driver the input space to ++ * linearize. ++ */ ++ enum drm_transfer_function degamma_tf; ++ /** ++ * @hdr_mult: ++ * ++ * Multiplier to 'gain' the plane. When PQ is decoded using the fixed ++ * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on ++ * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously. ++ * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you ++ * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is ++ * S31.32 sign-magnitude. ++ */ ++ __u64 hdr_mult; ++ /** ++ * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an ++ * array of &struct drm_color_lut. ++ */ ++ struct drm_property_blob *shaper_lut; ++ /** ++ * @shaper_tf: ++ * ++ * Predefined transfer function to delinearize color space. ++ */ ++ enum drm_transfer_function shaper_tf; ++ /** ++ * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of ++ * &struct drm_color_lut. ++ */ ++ struct drm_property_blob *lut3d; ++ /** ++ * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an ++ * array of &struct drm_color_lut. ++ */ ++ struct drm_property_blob *blend_lut; ++ /** ++ * @blend_tf: ++ * ++ * Pre-defined transfer function for converting plane pixel data before ++ * applying blend LUT. ++ */ ++ enum drm_transfer_function blend_tf; + }; + + struct dm_crtc_state { +@@ -726,6 +796,36 @@ struct dm_crtc_state { + struct dc_info_packet vrr_infopacket; + + int abm_level; ++ ++ /* AMD driver-private CRTC color management ++ * ++ * DRM provides CRTC degamma/ctm/gamma color mgmt features, but AMD HW ++ * has a larger set of post-blending color calibration. Here, DC MPC ++ * color caps are wired up to DM CRTC state: ++ */ ++ /** ++ * @shaper_lut: ++ * ++ * Post-blending 1D Lookup table used to de-linearize pixel data for 3D ++ * LUT. The blob (if not NULL) is an array of &struct drm_color_lut. ++ */ ++ struct drm_property_blob *shaper_lut; ++ /** ++ * @lut3d: ++ * ++ * Post-blending 3D Lookup table for converting pixel data. When ++ * supported by HW (DCN 3+), it is positioned just before post-blending ++ * regamma and always assumes a preceding shaper LUT. The blob (if not ++ * NULL) is an array of &struct drm_color_lut. ++ */ ++ struct drm_property_blob *lut3d; ++ /** ++ * @regamma_tf: ++ * ++ * Pre-defined transfer function for converting internal FB -> wire ++ * encoding. ++ */ ++ enum drm_transfer_function regamma_tf; + }; + + #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) +@@ -787,14 +887,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, + + void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); + ++/* 3D LUT max size is 17x17x17 */ ++#define MAX_COLOR_3DLUT_ENTRIES 4913 ++#define MAX_COLOR_3DLUT_BITDEPTH 12 ++int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, ++ const struct drm_crtc_state *crtc_state); ++/* 1D LUT size */ + #define MAX_COLOR_LUT_ENTRIES 4096 + /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ + #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 + + void amdgpu_dm_init_color_mod(void); + int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); +-int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); ++int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc, ++ struct dc_state *ctx); + int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, ++ struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state); + + void amdgpu_dm_update_connector_after_detect( +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +index a4cb23d059bd..714f07bb9c9c 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +@@ -72,6 +72,7 @@ + */ + + #define MAX_DRM_LUT_VALUE 0xFFFF ++#define SDR_WHITE_LEVEL_INIT_VALUE 80 + + /** + * amdgpu_dm_init_color_mod - Initialize the color module. +@@ -182,7 +183,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, + static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, + struct fixed31_32 *matrix) + { +- int64_t val; + int i; + + /* +@@ -201,12 +201,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, + } + + /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ +- val = ctm->matrix[i - (i / 4)]; +- /* If negative, convert to 2's complement. */ +- if (val & (1ULL << 63)) +- val = -(val & ~(1ULL << 63)); +- +- matrix[i].value = val; ++ matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); + } + } + +@@ -268,16 +263,18 @@ static int __set_output_tf(struct dc_transfer_func *func, + struct calculate_buffer cal_buffer = {0}; + bool res; + +- ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); +- + cal_buffer.buffer_index = -1; + +- gamma = dc_create_gamma(); +- if (!gamma) +- return -ENOMEM; ++ if (lut_size) { ++ ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); + +- gamma->num_entries = lut_size; +- __drm_lut_to_dc_gamma(lut, gamma, false); ++ gamma = dc_create_gamma(); ++ if (!gamma) ++ return -ENOMEM; ++ ++ gamma->num_entries = lut_size; ++ __drm_lut_to_dc_gamma(lut, gamma, false); ++ } + + if (func->tf == TRANSFER_FUNCTION_LINEAR) { + /* +@@ -285,27 +282,63 @@ static int __set_output_tf(struct dc_transfer_func *func, + * on top of a linear input. But degamma params can be used + * instead to simulate this. + */ +- gamma->type = GAMMA_CUSTOM; ++ if (gamma) ++ gamma->type = GAMMA_CUSTOM; + res = mod_color_calculate_degamma_params(NULL, func, +- gamma, true); ++ gamma, gamma != NULL); + } else { + /* + * Assume sRGB. The actual mapping will depend on whether the + * input was legacy or not. + */ +- gamma->type = GAMMA_CS_TFM_1D; +- res = mod_color_calculate_regamma_params(func, gamma, false, ++ if (gamma) ++ gamma->type = GAMMA_CS_TFM_1D; ++ res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL, + has_rom, NULL, &cal_buffer); + } + +- dc_gamma_release(&gamma); ++ if (gamma) ++ dc_gamma_release(&gamma); + + return res ? 0 : -ENOMEM; + } + ++static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, ++ const struct drm_color_lut *regamma_lut, ++ uint32_t regamma_size, bool has_rom, ++ enum dc_transfer_func_predefined tf) ++{ ++ struct dc_transfer_func *out_tf = stream->out_transfer_func; ++ int ret = 0; ++ ++ if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { ++ /* CRTC RGM goes into RGM LUT. ++ * ++ * Note: there is no implicit sRGB regamma here. We are using ++ * degamma calculation from color module to calculate the curve ++ * from a linear base. ++ */ ++ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; ++ out_tf->tf = tf; ++ out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; ++ ++ ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); ++ } else { ++ /* ++ * No CRTC RGM means we can just put the block into bypass ++ * since we don't have any plane level adjustments using it. ++ */ ++ out_tf->type = TF_TYPE_BYPASS; ++ out_tf->tf = TRANSFER_FUNCTION_LINEAR; ++ } ++ ++ return ret; ++} ++ + /** + * __set_input_tf - calculates the input transfer function based on expected + * input space. ++ * @caps: dc color capabilities + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut. +@@ -313,27 +346,288 @@ static int __set_output_tf(struct dc_transfer_func *func, + * Returns: + * 0 in case of success. -ENOMEM if fails. + */ +-static int __set_input_tf(struct dc_transfer_func *func, ++static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func, + const struct drm_color_lut *lut, uint32_t lut_size) + { + struct dc_gamma *gamma = NULL; + bool res; + +- gamma = dc_create_gamma(); +- if (!gamma) +- return -ENOMEM; ++ if (lut_size) { ++ gamma = dc_create_gamma(); ++ if (!gamma) ++ return -ENOMEM; + +- gamma->type = GAMMA_CUSTOM; +- gamma->num_entries = lut_size; ++ gamma->type = GAMMA_CUSTOM; ++ gamma->num_entries = lut_size; + +- __drm_lut_to_dc_gamma(lut, gamma, false); ++ __drm_lut_to_dc_gamma(lut, gamma, false); ++ } + +- res = mod_color_calculate_degamma_params(NULL, func, gamma, true); +- dc_gamma_release(&gamma); ++ res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL); ++ ++ if (gamma) ++ dc_gamma_release(&gamma); + + return res ? 0 : -ENOMEM; + } + ++static enum dc_transfer_func_predefined drm_tf_to_dc_tf(enum drm_transfer_function drm_tf) ++{ ++ switch (drm_tf) ++ { ++ default: ++ case DRM_TRANSFER_FUNCTION_DEFAULT: return TRANSFER_FUNCTION_LINEAR; ++ case DRM_TRANSFER_FUNCTION_SRGB: return TRANSFER_FUNCTION_SRGB; ++ case DRM_TRANSFER_FUNCTION_BT709: return TRANSFER_FUNCTION_BT709; ++ case DRM_TRANSFER_FUNCTION_PQ: return TRANSFER_FUNCTION_PQ; ++ case DRM_TRANSFER_FUNCTION_LINEAR: return TRANSFER_FUNCTION_LINEAR; ++ case DRM_TRANSFER_FUNCTION_UNITY: return TRANSFER_FUNCTION_UNITY; ++ case DRM_TRANSFER_FUNCTION_HLG: return TRANSFER_FUNCTION_HLG; ++ case DRM_TRANSFER_FUNCTION_GAMMA22: return TRANSFER_FUNCTION_GAMMA22; ++ case DRM_TRANSFER_FUNCTION_GAMMA24: return TRANSFER_FUNCTION_GAMMA24; ++ case DRM_TRANSFER_FUNCTION_GAMMA26: return TRANSFER_FUNCTION_GAMMA26; ++ } ++} ++ ++static void __to_dc_lut3d_color(struct dc_rgb *rgb, ++ const struct drm_color_lut lut, ++ int bit_precision) ++{ ++ rgb->red = drm_color_lut_extract(lut.red, bit_precision); ++ rgb->green = drm_color_lut_extract(lut.green, bit_precision); ++ rgb->blue = drm_color_lut_extract(lut.blue, bit_precision); ++} ++ ++static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut, ++ uint32_t lut3d_size, ++ struct tetrahedral_params *params, ++ bool use_tetrahedral_9, ++ int bit_depth) ++{ ++ struct dc_rgb *lut0; ++ struct dc_rgb *lut1; ++ struct dc_rgb *lut2; ++ struct dc_rgb *lut3; ++ int lut_i, i; ++ ++ ++ if (use_tetrahedral_9) { ++ lut0 = params->tetrahedral_9.lut0; ++ lut1 = params->tetrahedral_9.lut1; ++ lut2 = params->tetrahedral_9.lut2; ++ lut3 = params->tetrahedral_9.lut3; ++ } else { ++ lut0 = params->tetrahedral_17.lut0; ++ lut1 = params->tetrahedral_17.lut1; ++ lut2 = params->tetrahedral_17.lut2; ++ lut3 = params->tetrahedral_17.lut3; ++ } ++ ++ for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) { ++ /* We should consider the 3dlut RGB values are distributed ++ * along four arrays lut0-3 where the first sizes 1229 and the ++ * other 1228. The bit depth supported for 3dlut channel is ++ * 12-bit, but DC also supports 10-bit. ++ * ++ * TODO: improve color pipeline API to enable the userspace set ++ * bit depth and 3D LUT size/stride, as specified by VA-API. ++ */ ++ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); ++ __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth); ++ __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth); ++ __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth); ++ } ++ /* lut0 has 1229 points (lut_size/4 + 1) */ ++ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); ++} ++ ++/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream ++ * @drm_lut3d: DRM CRTC (user) 3D LUT ++ * @drm_lut3d_size: size of 3D LUT ++ * @lut3d: DC 3D LUT ++ * ++ * Map DRM CRTC 3D LUT to DC 3D LUT and all necessary bits to program it ++ * on DCN MPC accordingly. ++ */ ++static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut, ++ uint32_t drm_lut3d_size, ++ struct dc_3dlut *lut) ++{ ++ if (!drm_lut3d_size) { ++ lut->state.bits.initialized = 0; ++ } else { ++ /* Stride and bit depth are not programmable by API yet. ++ * Therefore, only supports 17x17x17 3D LUT (12-bit). ++ */ ++ lut->lut_3d.use_tetrahedral_9 = false; ++ lut->lut_3d.use_12bits = true; ++ lut->state.bits.initialized = 1; ++ __drm_3dlut_to_dc_3dlut(drm_lut, drm_lut3d_size, &lut->lut_3d, ++ lut->lut_3d.use_tetrahedral_9, ++ MAX_COLOR_3DLUT_BITDEPTH); ++ } ++} ++ ++static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, ++ bool has_rom, ++ enum dc_transfer_func_predefined tf, ++ uint32_t shaper_size, ++ struct dc_transfer_func *func_shaper) ++{ ++ int ret = 0; ++ ++ if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) { ++ /* If DRM shaper LUT is set, we assume a linear color space ++ * (linearized by DRM degamma 1D LUT or not) ++ */ ++ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; ++ func_shaper->tf = tf; ++ func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; ++ ++ ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom); ++ } else { ++ func_shaper->type = TF_TYPE_BYPASS; ++ func_shaper->tf = TRANSFER_FUNCTION_LINEAR; ++ } ++ ++ return ret; ++} ++ ++static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, ++ bool has_rom, ++ enum dc_transfer_func_predefined tf, ++ uint32_t blend_size, ++ struct dc_transfer_func *func_blend) ++{ ++ int ret = 0; ++ ++ if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) { ++ /* DRM plane gamma LUT or TF means we are linearizing color ++ * space before blending (similar to degamma programming). As ++ * we don't have hardcoded curve support, or we use AMD color ++ * module to fill the parameters that will be translated to HW ++ * points. ++ */ ++ func_blend->type = TF_TYPE_DISTRIBUTED_POINTS; ++ func_blend->tf = tf; ++ func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; ++ ++ ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size); ++ } else { ++ func_blend->type = TF_TYPE_BYPASS; ++ func_blend->tf = TRANSFER_FUNCTION_LINEAR; ++ } ++ ++ return ret; ++} ++ ++/* amdgpu_dm_atomic_shaper_lut3d - set DRM CRTC shaper LUT and 3D LUT to DC ++ * interface ++ * @dc: Display Core control structure ++ * @ctx: new DC state information ++ * @stream: DC stream state to set shaper LUT and 3D LUT ++ * @drm_shaper_lut: DRM CRTC (user) shaper LUT ++ * @drm_shaper_size: size of shaper LUT ++ * @drm_lut3d: DRM CRTC (user) 3D LUT ++ * @drm_lut3d_size: size of 3D LUT ++ * ++ * Returns: ++ * 0 on success. ++ */ ++static int amdgpu_dm_atomic_shaper_lut3d(struct dc *dc, ++ struct dc_state *ctx, ++ struct dc_stream_state *stream, ++ const struct drm_color_lut *drm_shaper_lut, ++ uint32_t drm_shaper_size, ++ bool has_rom, ++ enum dc_transfer_func_predefined tf, ++ const struct drm_color_lut *drm_lut3d, ++ uint32_t drm_lut3d_size) ++{ ++ struct dc_3dlut *lut3d_func; ++ struct dc_transfer_func *func_shaper; ++ bool acquire = drm_shaper_size || drm_lut3d_size; ++ ++ lut3d_func = (struct dc_3dlut *)stream->lut3d_func; ++ func_shaper = (struct dc_transfer_func *)stream->func_shaper; ++ ++ ASSERT((lut3d_func && func_shaper) || (!lut3d_func && !func_shaper)); ++ if ((acquire && !lut3d_func && !func_shaper) || ++ (!acquire && lut3d_func && func_shaper)) ++ { ++ if (!dc_acquire_release_mpc_3dlut_for_ctx(dc, acquire, ctx, stream, ++ &lut3d_func, &func_shaper)) ++ return DC_ERROR_UNEXPECTED; ++ } ++ ++ stream->func_shaper = func_shaper; ++ stream->lut3d_func = lut3d_func; ++ ++ if (!acquire) ++ return 0; ++ ++ amdgpu_dm_atomic_lut3d(drm_lut3d, drm_lut3d_size, lut3d_func); ++ ++ return amdgpu_dm_atomic_shaper_lut(drm_shaper_lut, has_rom, tf, ++ drm_shaper_size, func_shaper); ++} ++ ++/** ++ * amdgpu_dm_lut3d_size - get expected size according to hw color caps ++ * @adev: amdgpu device ++ * @lut_size: default size ++ * ++ * Return: ++ * lut_size if DC 3D LUT is supported, zero otherwise. ++ */ ++static uint32_t amdgpu_dm_get_lut3d_size(struct amdgpu_device *adev, ++ uint32_t lut_size) ++{ ++ return adev->dm.dc->caps.color.mpc.num_3dluts ? lut_size : 0; ++} ++ ++/** ++ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if DRM 3D ++ * LUT matches the hw supported size ++ * @adev: amdgpu device ++ * @crtc_state: the DRM CRTC state ++ * ++ * Verifies if post-blending (MPC) 3D LUT is supported by the HW (DCN 3.0 or ++ * newer) and if the DRM 3D LUT matches the supported size. ++ * ++ * Returns: ++ * 0 on success. -EINVAL if lut size are invalid. ++ */ ++int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, ++ const struct drm_crtc_state *crtc_state) ++{ ++ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc_state); ++ const struct drm_color_lut *shaper = NULL, *lut3d = NULL; ++ uint32_t exp_size, size; ++ ++ /* shaper LUT is only available if 3D LUT color caps*/ ++ exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_LUT_ENTRIES); ++ shaper = __extract_blob_lut(acrtc_state->shaper_lut, &size); ++ ++ if (shaper && size != exp_size) { ++ drm_dbg(&adev->ddev, ++ "Invalid Shaper LUT size. Should be %u but got %u.\n", ++ exp_size, size); ++ return -EINVAL; ++ } ++ ++ exp_size = amdgpu_dm_get_lut3d_size(adev, MAX_COLOR_3DLUT_ENTRIES); ++ lut3d = __extract_blob_lut(acrtc_state->lut3d, &size); ++ ++ if (lut3d && size != exp_size) { ++ drm_dbg(&adev->ddev, "Invalid 3D LUT size. Should be %u but got %u.\n", ++ exp_size, size); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + /** + * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes + * @crtc_state: the DRM CRTC state +@@ -373,6 +667,7 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) + /** + * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * @crtc: amdgpu_dm crtc state ++ * @ctx: new DC state information + * + * With no plane level color management properties we're free to use any + * of the HW blocks as long as the CRTC CTM always comes before the +@@ -392,7 +687,8 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) + * Returns: + * 0 on success. Error code if setup fails. + */ +-int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) ++int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc, ++ struct dc_state *ctx) + { + struct dc_stream_state *stream = crtc->stream; + struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); +@@ -401,9 +697,20 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + const struct drm_color_lut *degamma_lut, *regamma_lut; + uint32_t degamma_size, regamma_size; + bool has_regamma, has_degamma; ++ enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR; + bool is_legacy; ++ const struct drm_color_lut *shaper_lut, *lut3d; ++ uint32_t shaper_size, lut3d_size; + int r; + ++ r = amdgpu_dm_verify_lut3d_size(adev, &crtc->base); ++ if (r) ++ return r; ++ ++ lut3d = __extract_blob_lut(crtc->lut3d, &lut3d_size); ++ shaper_lut = __extract_blob_lut(crtc->shaper_lut, &shaper_size); ++ tf = drm_tf_to_dc_tf(crtc->regamma_tf); ++ + r = amdgpu_dm_verify_lut_sizes(&crtc->base); + if (r) + return r; +@@ -440,26 +747,41 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + ++ /* Note: although we pass has_rom as parameter here, we never ++ * actually use ROM because the color module only takes the ROM ++ * path if transfer_func->type == PREDEFINED. ++ * ++ * See more in mod_color_calculate_regamma_params() ++ */ + r = __set_legacy_tf(stream->out_transfer_func, regamma_lut, + regamma_size, has_rom); + if (r) + return r; +- } else if (has_regamma) { +- /* If atomic regamma, CRTC RGM goes into RGM LUT. */ +- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; +- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; ++ } else { ++ /* We are not exposing CRTC 3D LUT properties yet, so DC 3D LUT ++ * programming is expected to be set to bypass mode, since ++ * there is no user-blob. ++ */ ++ lut3d_size = lut3d != NULL ? lut3d_size : 0; ++ shaper_size = shaper_lut != NULL ? shaper_size : 0; ++ r = amdgpu_dm_atomic_shaper_lut3d(adev->dm.dc, ctx, stream, ++ shaper_lut, shaper_size, ++ has_rom, tf, ++ lut3d, lut3d_size); ++ if (r) { ++ drm_dbg(&adev->ddev, "Failed on shaper/3D LUTs setup\n"); ++ return r; ++ } + +- r = __set_output_tf(stream->out_transfer_func, regamma_lut, +- regamma_size, has_rom); ++ /* Note: OGAM is disabled if 3D LUT is successfully programmed. ++ * See params and set_output_gamma in ++ * dcn30_set_output_transfer_func() ++ */ ++ regamma_size = has_regamma ? regamma_size : 0; ++ r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, ++ regamma_size, has_rom, tf); + if (r) + return r; +- } else { +- /* +- * No CRTC RGM means we can just put the block into bypass +- * since we don't have any plane level adjustments using it. +- */ +- stream->out_transfer_func->type = TF_TYPE_BYPASS; +- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; + } + + /* +@@ -495,20 +817,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + return 0; + } + +-/** +- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. +- * @crtc: amdgpu_dm crtc state +- * @dc_plane_state: target DC surface +- * +- * Update the underlying dc_stream_state's input transfer function (ITF) in +- * preparation for hardware commit. The transfer function used depends on +- * the preparation done on the stream for color management. +- * +- * Returns: +- * 0 on success. -ENOMEM if mem allocation fails. +- */ +-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, +- struct dc_plane_state *dc_plane_state) ++static int ++map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, ++ struct dc_plane_state *dc_plane_state, ++ struct dc_color_caps *caps) + { + const struct drm_color_lut *degamma_lut; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; +@@ -531,8 +843,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + °amma_size); + ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); + +- dc_plane_state->in_transfer_func->type = +- TF_TYPE_DISTRIBUTED_POINTS; ++ dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; + + /* + * This case isn't fully correct, but also fairly +@@ -564,11 +875,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + dc_plane_state->in_transfer_func->tf = + TRANSFER_FUNCTION_LINEAR; + +- r = __set_input_tf(dc_plane_state->in_transfer_func, ++ r = __set_input_tf(caps, dc_plane_state->in_transfer_func, + degamma_lut, degamma_size); + if (r) + return r; +- } else if (crtc->cm_is_degamma_srgb) { ++ } else { + /* + * For legacy gamma support we need the regamma input + * in linear space. Assume that the input is sRGB. +@@ -577,14 +888,183 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + dc_plane_state->in_transfer_func->tf = tf; + + if (tf != TRANSFER_FUNCTION_SRGB && +- !mod_color_calculate_degamma_params(NULL, +- dc_plane_state->in_transfer_func, NULL, false)) ++ !mod_color_calculate_degamma_params(caps, ++ dc_plane_state->in_transfer_func, ++ NULL, false)) ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static int ++__set_dm_plane_degamma(struct drm_plane_state *plane_state, ++ struct dc_plane_state *dc_plane_state, ++ struct dc_color_caps *color_caps) ++{ ++ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); ++ const struct drm_color_lut *degamma_lut; ++ enum drm_transfer_function drm_tf = DRM_TRANSFER_FUNCTION_DEFAULT; ++ uint32_t degamma_size; ++ bool has_degamma_lut; ++ int ret; ++ ++ if (dc_plane_state->ctx && dc_plane_state->ctx->dc) ++ color_caps = &dc_plane_state->ctx->dc->caps.color; ++ ++ degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut, ++ °amma_size); ++ ++ has_degamma_lut = degamma_lut && ++ !__is_lut_linear(degamma_lut, degamma_size); ++ ++ drm_tf = dm_plane_state->degamma_tf; ++ ++ /* If we don't have plane degamma LUT nor TF to set on DC, we have ++ * nothing to do here, return. ++ */ ++ if (!has_degamma_lut && drm_tf == DRM_TRANSFER_FUNCTION_DEFAULT) ++ return -EINVAL; ++ ++ dc_plane_state->in_transfer_func->tf = drm_tf_to_dc_tf(drm_tf); ++ ++ if (has_degamma_lut) { ++ ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); ++ ++ dc_plane_state->in_transfer_func->type = ++ TF_TYPE_DISTRIBUTED_POINTS; ++ ++ ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func, ++ degamma_lut, degamma_size); ++ if (ret) ++ return ret; ++ } else { ++ dc_plane_state->in_transfer_func->type = ++ TF_TYPE_PREDEFINED; ++ ++ if (!mod_color_calculate_degamma_params(NULL, ++ dc_plane_state->in_transfer_func, NULL, false)) + return -ENOMEM; +- } else { +- /* ...Otherwise we can just bypass the DGM block. */ +- dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; +- dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; ++ } ++ return 0; ++} ++ ++static int ++amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, ++ struct dc_plane_state *dc_plane_state, ++ struct dc_color_caps *color_caps) ++{ ++ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); ++ enum drm_transfer_function shaper_tf = DRM_TRANSFER_FUNCTION_DEFAULT; ++ enum drm_transfer_function blend_tf = DRM_TRANSFER_FUNCTION_DEFAULT; ++ const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut; ++ uint32_t lut3d_size, shaper_size, blend_size; ++ int ret; ++ ++ /* We have nothing to do here, return */ ++ if (!plane_state->color_mgmt_changed) ++ return 0; ++ ++ dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); ++ ++ shaper_tf = dm_plane_state->shaper_tf; ++ shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); ++ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size); ++ lut3d_size = lut3d != NULL ? lut3d_size : 0; ++ shaper_size = shaper_lut != NULL ? shaper_size : 0; ++ ++ amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func); ++ ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, ++ drm_tf_to_dc_tf(shaper_tf), ++ shaper_size, dc_plane_state->in_shaper_func); ++ if (ret) { ++ drm_dbg_kms(plane_state->plane->dev, ++ "setting plane %d shaper/3d lut failed.\n", ++ plane_state->plane->index); ++ ++ return ret; ++ } ++ ++ blend_tf = dm_plane_state->blend_tf; ++ blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size); ++ blend_size = blend_lut != NULL ? blend_size : 0; ++ ++ ret = amdgpu_dm_atomic_blend_lut(blend_lut, false, ++ drm_tf_to_dc_tf(blend_tf), ++ blend_size, dc_plane_state->blend_tf); ++ if (ret) { ++ drm_dbg_kms(plane_state->plane->dev, ++ "setting plane %d gamma lut failed.\n", ++ plane_state->plane->index); ++ ++ return ret; + } + + return 0; + } ++ ++/** ++ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. ++ * @crtc: amdgpu_dm crtc state ++ * @plane_state: DRM plane state ++ * @dc_plane_state: target DC surface ++ * ++ * Update the underlying dc_stream_state's input transfer function (ITF) in ++ * preparation for hardware commit. The transfer function used depends on ++ * the preparation done on the stream for color management. ++ * ++ * Returns: ++ * 0 on success. -ENOMEM if mem allocation fails. ++ */ ++int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, ++ struct drm_plane_state *plane_state, ++ struct dc_plane_state *dc_plane_state) ++{ ++ struct dc_color_caps *color_caps = NULL; ++ bool has_crtc_cm_degamma; ++ int ret; ++ ++ if (dc_plane_state->ctx && dc_plane_state->ctx->dc) ++ color_caps = &dc_plane_state->ctx->dc->caps.color; ++ ++ /* Initially, we can just bypass the DGM block. */ ++ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; ++ dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; ++ ++ /* After, we start to update values according to color props */ ++ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); ++ ++ ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps); ++ if (ret == -ENOMEM) ++ return ret; ++ ++ /* We only have one degamma block available (pre-blending) for the ++ * whole color correction pipeline, so that we can't actually perform ++ * plane and CRTC degamma at the same time. Explicitly reject atomic ++ * updates when userspace sets both plane and CRTC degamma properties. ++ */ ++ if (has_crtc_cm_degamma && ret != -EINVAL){ ++ drm_dbg_kms(crtc->base.crtc->dev, ++ "doesn't support plane and CRTC degamma at the same time\n"); ++ return -EINVAL; ++ } ++ ++ /* If we are here, it means we don't have plane degamma settings, check ++ * if we have CRTC degamma waiting for mapping to pre-blending degamma ++ * block ++ */ ++ if (has_crtc_cm_degamma) { ++ /* AMD HW doesn't have post-blending degamma caps. When DRM ++ * CRTC atomic degamma is set, we maps it to DPP degamma block ++ * (pre-blending) or, on legacy gamma, we use DPP degamma to ++ * linearize (implicit degamma) from sRGB/BT709 according to ++ * the input space. ++ */ ++ ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps); ++ if (ret) ++ return ret; ++ } ++ ++ return amdgpu_dm_plane_set_color_properties(plane_state, ++ dc_plane_state, color_caps); ++} +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +index 440fc0869a34..4a725aeef3e8 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +@@ -219,7 +219,6 @@ static void dm_crtc_destroy_state(struct drm_crtc *crtc, + if (cur->stream) + dc_stream_release(cur->stream); + +- + __drm_atomic_helper_crtc_destroy_state(state); + + +@@ -253,6 +252,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) + state->freesync_config = cur->freesync_config; + state->cm_has_degamma = cur->cm_has_degamma; + state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; ++ state->regamma_tf = cur->regamma_tf; + state->crc_skip_count = cur->crc_skip_count; + state->mpo_requested = cur->mpo_requested; + /* TODO Duplicate dc_stream after objects are stream object is flattened */ +@@ -289,6 +289,69 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) + } + #endif + ++#ifdef AMD_PRIVATE_COLOR ++/** ++ * drm_crtc_additional_color_mgmt - enable additional color properties ++ * @crtc: DRM CRTC ++ * ++ * This function lets the driver enable the 3D LUT color correction property ++ * on a CRTC. This includes shaper LUT, 3D LUT and regamma TF. The shaper ++ * LUT and 3D LUT property is only attached if its size is not 0. ++ */ ++static void ++dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) ++{ ++ struct amdgpu_device *adev = drm_to_adev(crtc->dev); ++ ++ if(adev->dm.dc->caps.color.mpc.ogam_ram) ++ drm_object_attach_property(&crtc->base, ++ adev->mode_info.regamma_tf_property, ++ DRM_TRANSFER_FUNCTION_DEFAULT); ++} ++ ++static int ++amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc, ++ struct drm_crtc_state *state, ++ struct drm_property *property, ++ uint64_t val) ++{ ++ struct amdgpu_device *adev = drm_to_adev(crtc->dev); ++ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); ++ ++ if (property == adev->mode_info.regamma_tf_property) { ++ if (acrtc_state->regamma_tf != val) { ++ acrtc_state->regamma_tf = val; ++ acrtc_state->base.color_mgmt_changed |= 1; ++ } ++ } else { ++ drm_dbg_atomic(crtc->dev, ++ "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n", ++ crtc->base.id, crtc->name, ++ property->base.id, property->name); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int ++amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc, ++ const struct drm_crtc_state *state, ++ struct drm_property *property, ++ uint64_t *val) ++{ ++ struct amdgpu_device *adev = drm_to_adev(crtc->dev); ++ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); ++ ++ if (property == adev->mode_info.regamma_tf_property) ++ *val = acrtc_state->regamma_tf; ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++#endif ++ + /* Implemented only the options currently available for the driver */ + static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { + .reset = dm_crtc_reset_state, +@@ -307,6 +370,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { + #if defined(CONFIG_DEBUG_FS) + .late_register = amdgpu_dm_crtc_late_register, + #endif ++#ifdef AMD_PRIVATE_COLOR ++ .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, ++ .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, ++#endif + }; + + static void dm_crtc_helper_disable(struct drm_crtc *crtc) +@@ -470,6 +537,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); + ++#ifdef AMD_PRIVATE_COLOR ++ dm_crtc_additional_color_mgmt(&acrtc->base); ++#endif + return 0; + + fail: +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +index 322668973747..ea13b49fa021 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +@@ -1317,8 +1317,14 @@ static void dm_drm_plane_reset(struct drm_plane *plane) + amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); + WARN_ON(amdgpu_state == NULL); + +- if (amdgpu_state) +- __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); ++ if (!amdgpu_state) ++ return; ++ ++ __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); ++ amdgpu_state->degamma_tf = DRM_TRANSFER_FUNCTION_DEFAULT; ++ amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; ++ amdgpu_state->shaper_tf = DRM_TRANSFER_FUNCTION_DEFAULT; ++ amdgpu_state->blend_tf = DRM_TRANSFER_FUNCTION_DEFAULT; + } + + static struct drm_plane_state * +@@ -1338,6 +1344,20 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane) + dc_plane_state_retain(dm_plane_state->dc_state); + } + ++ if (dm_plane_state->degamma_lut) ++ drm_property_blob_get(dm_plane_state->degamma_lut); ++ if (dm_plane_state->shaper_lut) ++ drm_property_blob_get(dm_plane_state->shaper_lut); ++ if (dm_plane_state->lut3d) ++ drm_property_blob_get(dm_plane_state->lut3d); ++ if (dm_plane_state->blend_lut) ++ drm_property_blob_get(dm_plane_state->blend_lut); ++ ++ dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; ++ dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; ++ dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf; ++ dm_plane_state->blend_tf = old_dm_plane_state->blend_tf; ++ + return &dm_plane_state->base; + } + +@@ -1405,12 +1425,194 @@ static void dm_drm_plane_destroy_state(struct drm_plane *plane, + { + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + ++ if (dm_plane_state->degamma_lut) ++ drm_property_blob_put(dm_plane_state->degamma_lut); ++ if (dm_plane_state->lut3d) ++ drm_property_blob_put(dm_plane_state->lut3d); ++ if (dm_plane_state->shaper_lut) ++ drm_property_blob_put(dm_plane_state->shaper_lut); ++ if (dm_plane_state->blend_lut) ++ drm_property_blob_put(dm_plane_state->blend_lut); ++ + if (dm_plane_state->dc_state) + dc_plane_state_release(dm_plane_state->dc_state); + + drm_atomic_helper_plane_destroy_state(plane, state); + } + ++static const struct drm_prop_enum_list drm_transfer_function_enum_list[] = { ++ { DRM_TRANSFER_FUNCTION_DEFAULT, "Default" }, ++ { DRM_TRANSFER_FUNCTION_SRGB, "sRGB" }, ++ { DRM_TRANSFER_FUNCTION_BT709, "BT.709" }, ++ { DRM_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, ++ { DRM_TRANSFER_FUNCTION_LINEAR, "Linear" }, ++ { DRM_TRANSFER_FUNCTION_UNITY, "Unity" }, ++ { DRM_TRANSFER_FUNCTION_HLG, "HLG (Hybrid Log Gamma)" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, ++ { DRM_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, ++}; ++ ++#ifdef AMD_PRIVATE_COLOR ++static void ++dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, ++ struct drm_plane *plane) ++{ ++ if (dm->dc->caps.color.dpp.dgam_ram || dm->dc->caps.color.dpp.gamma_corr ) { ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_degamma_lut_property, 0); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_degamma_lut_size_property, ++ MAX_COLOR_LUT_ENTRIES); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_degamma_tf_property, ++ DRM_TRANSFER_FUNCTION_DEFAULT); ++ } ++ /* HDR MULT is always available */ ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_hdr_mult_property, ++ AMDGPU_HDR_MULT_DEFAULT); ++ ++ if (dm->dc->caps.color.dpp.hw_3d_lut) { ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_shaper_lut_property, 0); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_shaper_lut_size_property, ++ MAX_COLOR_LUT_ENTRIES); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_shaper_tf_property, ++ DRM_TRANSFER_FUNCTION_DEFAULT); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_lut3d_property, 0); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_lut3d_size_property, ++ MAX_COLOR_3DLUT_ENTRIES); ++ } ++ ++ if (dm->dc->caps.color.dpp.ogam_ram) { ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_blend_lut_property, 0); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_blend_lut_size_property, ++ MAX_COLOR_LUT_ENTRIES); ++ drm_object_attach_property(&plane->base, ++ dm->adev->mode_info.plane_blend_tf_property, ++ DRM_TRANSFER_FUNCTION_DEFAULT); ++ } ++} ++ ++static int ++dm_atomic_plane_set_property(struct drm_plane *plane, ++ struct drm_plane_state *state, ++ struct drm_property *property, ++ uint64_t val) ++{ ++ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); ++ struct amdgpu_device *adev = drm_to_adev(plane->dev); ++ bool replaced = false; ++ int ret; ++ ++ if (property == adev->mode_info.plane_degamma_lut_property) { ++ ret = drm_property_replace_blob_from_id(plane->dev, ++ &dm_plane_state->degamma_lut, ++ val, ++ -1, sizeof(struct drm_color_lut), ++ &replaced); ++ dm_plane_state->base.color_mgmt_changed |= replaced; ++ return ret; ++ } else if (property == adev->mode_info.plane_degamma_tf_property) { ++ if (dm_plane_state->degamma_tf != val) { ++ dm_plane_state->degamma_tf = val; ++ dm_plane_state->base.color_mgmt_changed = 1; ++ } ++ } else if (property == adev->mode_info.plane_hdr_mult_property) { ++ if (dm_plane_state->hdr_mult != val) { ++ dm_plane_state->hdr_mult = val; ++ dm_plane_state->base.color_mgmt_changed = 1; ++ } ++ } else if (property == adev->mode_info.plane_shaper_lut_property) { ++ ret = drm_property_replace_blob_from_id(plane->dev, ++ &dm_plane_state->shaper_lut, ++ val, -1, ++ sizeof(struct drm_color_lut), ++ &replaced); ++ dm_plane_state->base.color_mgmt_changed |= replaced; ++ return ret; ++ } else if (property == adev->mode_info.plane_shaper_tf_property) { ++ if (dm_plane_state->shaper_tf != val) { ++ dm_plane_state->shaper_tf = val; ++ dm_plane_state->base.color_mgmt_changed = 1; ++ } ++ } else if (property == adev->mode_info.plane_lut3d_property) { ++ ret = drm_property_replace_blob_from_id(plane->dev, ++ &dm_plane_state->lut3d, ++ val, -1, ++ sizeof(struct drm_color_lut), ++ &replaced); ++ dm_plane_state->base.color_mgmt_changed |= replaced; ++ return ret; ++ } else if (property == adev->mode_info.plane_blend_lut_property) { ++ ret = drm_property_replace_blob_from_id(plane->dev, ++ &dm_plane_state->blend_lut, ++ val, -1, ++ sizeof(struct drm_color_lut), ++ &replaced); ++ dm_plane_state->base.color_mgmt_changed |= replaced; ++ return ret; ++ } else if (property == adev->mode_info.plane_blend_tf_property) { ++ if (dm_plane_state->blend_tf != val) { ++ dm_plane_state->blend_tf = val; ++ dm_plane_state->base.color_mgmt_changed = 1; ++ } ++ } else { ++ drm_dbg_atomic(plane->dev, ++ "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", ++ plane->base.id, plane->name, ++ property->base.id, property->name); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int ++dm_atomic_plane_get_property(struct drm_plane *plane, ++ const struct drm_plane_state *state, ++ struct drm_property *property, ++ uint64_t *val) ++{ ++ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); ++ struct amdgpu_device *adev = drm_to_adev(plane->dev); ++ ++ if (property == adev->mode_info.plane_degamma_lut_property) { ++ *val = (dm_plane_state->degamma_lut) ? ++ dm_plane_state->degamma_lut->base.id : 0; ++ } else if (property == adev->mode_info.plane_degamma_tf_property) { ++ *val = dm_plane_state->degamma_tf; ++ } else if (property == adev->mode_info.plane_hdr_mult_property) { ++ *val = dm_plane_state->hdr_mult; ++ } else if (property == adev->mode_info.plane_shaper_lut_property) { ++ *val = (dm_plane_state->shaper_lut) ? ++ dm_plane_state->shaper_lut->base.id : 0; ++ } else if (property == adev->mode_info.plane_shaper_tf_property) { ++ *val = dm_plane_state->shaper_tf; ++ } else if (property == adev->mode_info.plane_lut3d_property) { ++ *val = (dm_plane_state->lut3d) ? ++ dm_plane_state->lut3d->base.id : 0; ++ } else if (property == adev->mode_info.plane_blend_lut_property) { ++ *val = (dm_plane_state->blend_lut) ? ++ dm_plane_state->blend_lut->base.id : 0; ++ } else if (property == adev->mode_info.plane_blend_tf_property) { ++ *val = dm_plane_state->blend_tf; ++ ++ } else { ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++#endif ++ + static const struct drm_plane_funcs dm_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, +@@ -1419,6 +1621,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { + .atomic_duplicate_state = dm_drm_plane_duplicate_state, + .atomic_destroy_state = dm_drm_plane_destroy_state, + .format_mod_supported = dm_plane_format_mod_supported, ++#ifdef AMD_PRIVATE_COLOR ++ .atomic_set_property = dm_atomic_plane_set_property, ++ .atomic_get_property = dm_atomic_plane_get_property, ++#endif + }; + + int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, +@@ -1489,6 +1695,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + + drm_plane_helper_add(plane, &dm_plane_helper_funcs); + ++#ifdef AMD_PRIVATE_COLOR ++ dm_atomic_plane_attach_color_mgmt_properties(dm, plane); ++#endif + /* Create (reset) the plane state */ + if (plane->funcs->reset) + plane->funcs->reset(plane); +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c +index 6eace83c9c6f..35a8b8ca87de 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc.c +@@ -2126,6 +2126,45 @@ bool dc_acquire_release_mpc_3dlut( + return ret; + } + ++bool ++dc_acquire_release_mpc_3dlut_for_ctx(struct dc *dc, ++ bool acquire, ++ struct dc_state *state, ++ struct dc_stream_state *stream, ++ struct dc_3dlut **lut, ++ struct dc_transfer_func **shaper) ++{ ++ int pipe_idx; ++ bool ret = false; ++ bool found_pipe_idx = false; ++ const struct resource_pool *pool = dc->res_pool; ++ struct resource_context *res_ctx = &state->res_ctx; ++ int mpcc_id = 0; ++ ++ if (pool && res_ctx) { ++ if (acquire) { ++ /*find pipe idx for the given stream*/ ++ for (pipe_idx = 0; pipe_idx < pool->pipe_count; pipe_idx++) { ++ if (res_ctx->pipe_ctx[pipe_idx].stream == stream) { ++ found_pipe_idx = true; ++ mpcc_id = res_ctx->pipe_ctx[pipe_idx].plane_res.hubp->inst; ++ break; ++ } ++ } ++ } else ++ found_pipe_idx = true;/*for release pipe_idx is not required*/ ++ ++ if (found_pipe_idx) { ++ if (acquire && pool->funcs->acquire_post_bldn_3dlut) ++ ret = pool->funcs->acquire_post_bldn_3dlut(res_ctx, pool, mpcc_id, lut, shaper); ++ else if (!acquire && pool->funcs->release_post_bldn_3dlut) ++ ret = pool->funcs->release_post_bldn_3dlut(res_ctx, pool, lut, shaper); ++ } ++ } ++ return ret; ++} ++ ++ + static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context) + { + int i; +@@ -2606,7 +2645,7 @@ static enum surface_update_type check_update_surfaces_for_stream( + stream_update->integer_scaling_update) + su_flags->bits.scaling = 1; + +- if (stream_update->out_transfer_func) ++ if (stream_update->out_transfer_func || stream_update->lut3d_func) + su_flags->bits.out_tf = 1; + + if (stream_update->abm_level) +@@ -2955,6 +2994,14 @@ static void copy_stream_update_to_stream(struct dc *dc, + sizeof(struct dc_transfer_func_distributed_points)); + } + ++ if (update->func_shaper && ++ stream->func_shaper != update->func_shaper) ++ stream->func_shaper = update->func_shaper; ++ ++ if (update->lut3d_func && ++ stream->lut3d_func != update->lut3d_func) ++ stream->lut3d_func = update->lut3d_func; ++ + if (update->hdr_static_metadata) + stream->hdr_static_metadata = *update->hdr_static_metadata; + +diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h +index 4d93ca9c627b..2fd65f84dc5d 100644 +--- a/drivers/gpu/drm/amd/display/dc/dc.h ++++ b/drivers/gpu/drm/amd/display/dc/dc.h +@@ -1348,6 +1348,14 @@ bool dc_acquire_release_mpc_3dlut( + struct dc_3dlut **lut, + struct dc_transfer_func **shaper); + ++bool ++dc_acquire_release_mpc_3dlut_for_ctx(struct dc *dc, ++ bool acquire, ++ struct dc_state *state, ++ struct dc_stream_state *stream, ++ struct dc_3dlut **lut, ++ struct dc_transfer_func **shaper); ++ + void dc_resource_state_copy_construct( + const struct dc_state *src_ctx, + struct dc_state *dst_ctx); +diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +index 7a00fe525dfb..efa6cee649d0 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +@@ -346,20 +346,37 @@ bool cm_helper_translate_curve_to_hw_format( + * segment is from 2^-10 to 2^1 + * There are less than 256 points, for optimization + */ +- seg_distr[0] = 3; +- seg_distr[1] = 4; +- seg_distr[2] = 4; +- seg_distr[3] = 4; +- seg_distr[4] = 4; +- seg_distr[5] = 4; +- seg_distr[6] = 4; +- seg_distr[7] = 4; +- seg_distr[8] = 4; +- seg_distr[9] = 4; +- seg_distr[10] = 1; +- +- region_start = -10; +- region_end = 1; ++ if (output_tf->tf == TRANSFER_FUNCTION_LINEAR) { ++ seg_distr[0] = 0; /* 2 */ ++ seg_distr[1] = 1; /* 4 */ ++ seg_distr[2] = 2; /* 4 */ ++ seg_distr[3] = 3; /* 8 */ ++ seg_distr[4] = 4; /* 16 */ ++ seg_distr[5] = 5; /* 32 */ ++ seg_distr[6] = 6; /* 64 */ ++ seg_distr[7] = 7; /* 128 */ ++ ++ region_start = -8; ++ region_end = 1; ++ } else { ++ seg_distr[0] = 3; /* 8 */ ++ seg_distr[1] = 4; /* 16 */ ++ seg_distr[2] = 4; ++ seg_distr[3] = 4; ++ seg_distr[4] = 4; ++ seg_distr[5] = 4; ++ seg_distr[6] = 4; ++ seg_distr[7] = 4; ++ seg_distr[8] = 4; ++ seg_distr[9] = 4; ++ seg_distr[10] = 1; /* 2 */ ++ /* total = 8*16 + 8 + 64 + 2 = */ ++ ++ region_start = -10; ++ region_end = 1; ++ } ++ ++ + } + + for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) +@@ -372,16 +389,56 @@ bool cm_helper_translate_curve_to_hw_format( + + j = 0; + for (k = 0; k < (region_end - region_start); k++) { +- increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); ++ /* ++ * We're using an ugly-ish hack here. Our HW allows for ++ * 256 segments per region but SW_SEGMENTS is 16. ++ * SW_SEGMENTS has some undocumented relationship to ++ * the number of points in the tf_pts struct, which ++ * is 512, unlike what's suggested TRANSFER_FUNC_POINTS. ++ * ++ * In order to work past this dilemma we'll scale our ++ * increment by (1 << 4) and then do the inverse (1 >> 4) ++ * when accessing the elements in tf_pts. ++ * ++ * TODO: find a better way using SW_SEGMENTS and ++ * TRANSFER_FUNC_POINTS definitions ++ */ ++ increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]); + start_index = (region_start + k + MAX_LOW_POINT) * + NUMBER_SW_SEGMENTS; +- for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; ++ for (i = (start_index << 4); i < (start_index << 4) + (NUMBER_SW_SEGMENTS << 4); + i += increment) { ++ struct fixed31_32 in_plus_one, in; ++ struct fixed31_32 value, red_value, green_value, blue_value; ++ uint32_t t = i & 0xf; ++ + if (j == hw_points - 1) + break; +- rgb_resulted[j].red = output_tf->tf_pts.red[i]; +- rgb_resulted[j].green = output_tf->tf_pts.green[i]; +- rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; ++ ++ in_plus_one = output_tf->tf_pts.red[(i >> 4) + 1]; ++ in = output_tf->tf_pts.red[i >> 4]; ++ value = dc_fixpt_sub(in_plus_one, in); ++ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); ++ value = dc_fixpt_add(in, value); ++ red_value = value; ++ ++ in_plus_one = output_tf->tf_pts.green[(i >> 4) + 1]; ++ in = output_tf->tf_pts.green[i >> 4]; ++ value = dc_fixpt_sub(in_plus_one, in); ++ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); ++ value = dc_fixpt_add(in, value); ++ green_value = value; ++ ++ in_plus_one = output_tf->tf_pts.blue[(i >> 4) + 1]; ++ in = output_tf->tf_pts.blue[i >> 4]; ++ value = dc_fixpt_sub(in_plus_one, in); ++ value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); ++ value = dc_fixpt_add(in, value); ++ blue_value = value; ++ ++ rgb_resulted[j].red = red_value; ++ rgb_resulted[j].green = green_value; ++ rgb_resulted[j].blue = blue_value; + j++; + } + } +@@ -482,10 +539,18 @@ bool cm_helper_translate_curve_to_hw_format( + rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); + rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); + ++ + if (fixpoint == true) { +- rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red); +- rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green); +- rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue); ++ uint32_t red_clamp = dc_fixpt_clamp_u0d14(rgb->delta_red); ++ uint32_t green_clamp = dc_fixpt_clamp_u0d14(rgb->delta_green); ++ uint32_t blue_clamp = dc_fixpt_clamp_u0d14(rgb->delta_blue); ++ ++ if (red_clamp >> 10 || green_clamp >> 10 || blue_clamp >> 10) ++ DC_LOG_WARNING("Losing delta precision while programming shaper LUT."); ++ ++ rgb->delta_red_reg = red_clamp & 0x3ff; ++ rgb->delta_green_reg = green_clamp & 0x3ff; ++ rgb->delta_blue_reg = blue_clamp & 0x3ff; + rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red); + rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green); + rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue); +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +index c38be3c6c234..aad9dfcad37b 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +@@ -1766,8 +1766,9 @@ static void dcn20_program_pipe( + hws->funcs.set_hdr_multiplier(pipe_ctx); + + if (pipe_ctx->update_flags.bits.enable || +- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || +- pipe_ctx->plane_state->update_flags.bits.gamma_change) ++ pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || ++ pipe_ctx->plane_state->update_flags.bits.gamma_change || ++ pipe_ctx->plane_state->update_flags.bits.lut_3d) + hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); + + /* dcn10_translate_regamma_to_hw_format takes 750us to finish +diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +index 32121db2851e..fd2428871c8a 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +@@ -113,7 +113,6 @@ static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx, + } + + if (stream->lut3d_func && +- stream->lut3d_func->state.bits.initialized == 1 && + stream->lut3d_func->state.bits.rmu_idx_valid == 1) { + if (stream->lut3d_func->state.bits.rmu_mux_num == 0) + mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu0_mux; +@@ -131,8 +130,12 @@ static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx, + if (acquired_rmu != stream->lut3d_func->state.bits.rmu_mux_num) + BREAK_TO_DEBUGGER(); + +- result = mpc->funcs->program_3dlut(mpc, &stream->lut3d_func->lut_3d, +- stream->lut3d_func->state.bits.rmu_mux_num); ++ if (stream->lut3d_func->state.bits.initialized == 1) ++ result = mpc->funcs->program_3dlut(mpc, &stream->lut3d_func->lut_3d, ++ stream->lut3d_func->state.bits.rmu_mux_num); ++ else ++ result = mpc->funcs->program_3dlut(mpc, NULL, ++ stream->lut3d_func->state.bits.rmu_mux_num); + result = mpc->funcs->program_shaper(mpc, shaper_lut, + stream->lut3d_func->state.bits.rmu_mux_num); + } else { +diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +index 5ac2a272c380..a6d6fcaaca1c 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +@@ -1258,6 +1258,30 @@ static struct display_stream_compressor *dcn301_dsc_create( + return &dsc->base; + } + ++static enum dc_status ++dcn301_remove_stream_from_ctx(struct dc *dc, ++ struct dc_state *new_ctx, ++ struct dc_stream_state *dc_stream) ++{ ++ struct dc_3dlut *lut3d_func; ++ struct dc_transfer_func *func_shaper; ++ ++ lut3d_func = (struct dc_3dlut *)dc_stream->lut3d_func; ++ func_shaper = (struct dc_transfer_func *)dc_stream->func_shaper; ++ ++ ASSERT((lut3d_func && func_shaper) || (!lut3d_func && !func_shaper)); ++ if (lut3d_func && func_shaper) ++ { ++ if (!dc_acquire_release_mpc_3dlut_for_ctx(dc, false, new_ctx, dc_stream, ++ &lut3d_func, &func_shaper)) ++ return DC_ERROR_UNEXPECTED; ++ } ++ ++ dc_stream->lut3d_func = lut3d_func; ++ dc_stream->func_shaper = func_shaper; ++ ++ return dcn20_remove_stream_from_ctx(dc, new_ctx, dc_stream); ++} + + static void dcn301_destroy_resource_pool(struct resource_pool **pool) + { +@@ -1406,7 +1430,7 @@ static struct resource_funcs dcn301_res_pool_funcs = { + .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, +- .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, ++ .remove_stream_from_ctx = dcn301_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, +diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h +index ece97ae0e826..f4cc7f97329f 100644 +--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h ++++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h +@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; + static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL }; + static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL }; + ++static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x) ++{ ++ struct fixed31_32 val; ++ ++ /* If negative, convert to 2's complement. */ ++ if (x & (1ULL << 63)) ++ x = -(x & ~(1ULL << 63)); ++ ++ val.value = x; ++ return val; ++} ++ + /* + * @brief + * Initialization routines +diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c +index dc01c43f6193..d72c22dcf685 100644 +--- a/drivers/gpu/drm/arm/malidp_crtc.c ++++ b/drivers/gpu/drm/arm/malidp_crtc.c +@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, + + /* + * The size of the ctm is checked in +- * drm_atomic_replace_property_blob_from_id. ++ * drm_property_replace_blob_from_id. + */ + ctm = (struct drm_color_ctm *)state->ctm->data; + for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { +diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c +index 88fcc6bbc8b7..956362f9d57c 100644 +--- a/drivers/gpu/drm/drm_atomic.c ++++ b/drivers/gpu/drm/drm_atomic.c +@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, + drm_get_color_encoding_name(state->color_encoding)); + drm_printf(p, "\tcolor-range=%s\n", + drm_get_color_range_name(state->color_range)); ++ drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); + + if (plane->funcs->atomic_print_state) + plane->funcs->atomic_print_state(p, state); +diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c +index 784e63d70a42..25bb0859fda7 100644 +--- a/drivers/gpu/drm/drm_atomic_state_helper.c ++++ b/drivers/gpu/drm/drm_atomic_state_helper.c +@@ -338,6 +338,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, + state->fence = NULL; + state->commit = NULL; + state->fb_damage_clips = NULL; ++ state->color_mgmt_changed = false; + } + EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); + +diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c +index d867e7f9f2cd..a6a9ee5086dd 100644 +--- a/drivers/gpu/drm/drm_atomic_uapi.c ++++ b/drivers/gpu/drm/drm_atomic_uapi.c +@@ -362,39 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, + return fence_ptr; + } + +-static int +-drm_atomic_replace_property_blob_from_id(struct drm_device *dev, +- struct drm_property_blob **blob, +- uint64_t blob_id, +- ssize_t expected_size, +- ssize_t expected_elem_size, +- bool *replaced) +-{ +- struct drm_property_blob *new_blob = NULL; +- +- if (blob_id != 0) { +- new_blob = drm_property_lookup_blob(dev, blob_id); +- if (new_blob == NULL) +- return -EINVAL; +- +- if (expected_size > 0 && +- new_blob->length != expected_size) { +- drm_property_blob_put(new_blob); +- return -EINVAL; +- } +- if (expected_elem_size > 0 && +- new_blob->length % expected_elem_size != 0) { +- drm_property_blob_put(new_blob); +- return -EINVAL; +- } +- } +- +- *replaced |= drm_property_replace_blob(blob, new_blob); +- drm_property_blob_put(new_blob); +- +- return 0; +-} +- + static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, + struct drm_crtc_state *state, struct drm_property *property, + uint64_t val) +@@ -415,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, + } else if (property == config->prop_vrr_enabled) { + state->vrr_enabled = val; + } else if (property == config->degamma_lut_property) { +- ret = drm_atomic_replace_property_blob_from_id(dev, ++ ret = drm_property_replace_blob_from_id(dev, + &state->degamma_lut, + val, + -1, sizeof(struct drm_color_lut), +@@ -423,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, + state->color_mgmt_changed |= replaced; + return ret; + } else if (property == config->ctm_property) { +- ret = drm_atomic_replace_property_blob_from_id(dev, ++ ret = drm_property_replace_blob_from_id(dev, + &state->ctm, + val, + sizeof(struct drm_color_ctm), -1, +@@ -431,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, + state->color_mgmt_changed |= replaced; + return ret; + } else if (property == config->gamma_lut_property) { +- ret = drm_atomic_replace_property_blob_from_id(dev, ++ ret = drm_property_replace_blob_from_id(dev, + &state->gamma_lut, + val, + -1, sizeof(struct drm_color_lut), +@@ -563,7 +530,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, + } else if (property == plane->color_range_property) { + state->color_range = val; + } else if (property == config->prop_fb_damage_clips) { +- ret = drm_atomic_replace_property_blob_from_id(dev, ++ ret = drm_property_replace_blob_from_id(dev, + &state->fb_damage_clips, + val, + -1, +@@ -729,7 +696,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, + if (state->link_status != DRM_LINK_STATUS_GOOD) + state->link_status = val; + } else if (property == config->hdr_output_metadata_property) { +- ret = drm_atomic_replace_property_blob_from_id(dev, ++ ret = drm_property_replace_blob_from_id(dev, + &state->hdr_output_metadata, + val, + sizeof(struct hdr_output_metadata), -1, +diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c +index dfec479830e4..f72ef6493340 100644 +--- a/drivers/gpu/drm/drm_property.c ++++ b/drivers/gpu/drm/drm_property.c +@@ -751,6 +751,55 @@ bool drm_property_replace_blob(struct drm_property_blob **blob, + } + EXPORT_SYMBOL(drm_property_replace_blob); + ++/** ++ * drm_property_replace_blob_from_id - replace a blob property taking a reference ++ * @dev: DRM device ++ * @blob: a pointer to the member blob to be replaced ++ * @blob_id: the id of the new blob to replace with ++ * @expected_size: expected size of the blob property ++ * @expected_elem_size: expected size of an element in the blob property ++ * @replaced: if the blob was in fact replaced ++ * ++ * Look up the new blob from id, take its reference, check expected sizes of ++ * the blob and its element and replace the old blob by the new one. Advertise ++ * if the replacement operation was successful. ++ * ++ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was ++ * not found or sizes don't match. ++ */ ++int drm_property_replace_blob_from_id(struct drm_device *dev, ++ struct drm_property_blob **blob, ++ uint64_t blob_id, ++ ssize_t expected_size, ++ ssize_t expected_elem_size, ++ bool *replaced) ++{ ++ struct drm_property_blob *new_blob = NULL; ++ ++ if (blob_id != 0) { ++ new_blob = drm_property_lookup_blob(dev, blob_id); ++ if (new_blob == NULL) ++ return -EINVAL; ++ ++ if (expected_size > 0 && ++ new_blob->length != expected_size) { ++ drm_property_blob_put(new_blob); ++ return -EINVAL; ++ } ++ if (expected_elem_size > 0 && ++ new_blob->length % expected_elem_size != 0) { ++ drm_property_blob_put(new_blob); ++ return -EINVAL; ++ } ++ } ++ ++ *replaced |= drm_property_replace_blob(blob, new_blob); ++ drm_property_blob_put(new_blob); ++ ++ return 0; ++} ++EXPORT_SYMBOL(drm_property_replace_blob_from_id); ++ + int drm_mode_getblob_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) + { +diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h +index 912f1e415685..08d7a7f0188f 100644 +--- a/include/drm/drm_mode_object.h ++++ b/include/drm/drm_mode_object.h +@@ -60,7 +60,7 @@ struct drm_mode_object { + void (*free_cb)(struct kref *kref); + }; + +-#define DRM_OBJECT_MAX_PROPERTY 24 ++#define DRM_OBJECT_MAX_PROPERTY 64 + /** + * struct drm_object_properties - property tracking for &drm_mode_object + */ +diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h +index 51291983ea44..52c3287da0da 100644 +--- a/include/drm/drm_plane.h ++++ b/include/drm/drm_plane.h +@@ -237,6 +237,13 @@ struct drm_plane_state { + + /** @state: backpointer to global drm_atomic_state */ + struct drm_atomic_state *state; ++ ++ /** ++ * @color_mgmt_changed: Color management properties have changed. Used ++ * by the atomic helpers and drivers to steer the atomic commit control ++ * flow. ++ */ ++ bool color_mgmt_changed : 1; + }; + + static inline struct drm_rect +diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h +index 65bc9710a470..082f29156b3e 100644 +--- a/include/drm/drm_property.h ++++ b/include/drm/drm_property.h +@@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, + const void *data); + struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev, + uint32_t id); ++int drm_property_replace_blob_from_id(struct drm_device *dev, ++ struct drm_property_blob **blob, ++ uint64_t blob_id, ++ ssize_t expected_size, ++ ssize_t expected_elem_size, ++ bool *replaced); + int drm_property_replace_global_blob(struct drm_device *dev, + struct drm_property_blob **replace, + size_t length, +-- +2.41.0 + +From 3f03c9100fee5841528bc4cc5cbabe55fd2cd48a Mon Sep 17 00:00:00 2001 +From: Peter Jung +Date: Wed, 19 Jul 2023 18:50:00 +0200 +Subject: [PATCH 5/8] ksm Signed-off-by: Peter Jung --- @@ -11789,10 +13941,10 @@ index 860b2dcf3ac4..96fe36a6d0f5 100644 -- 2.41.0 -From 6118edead4a8f108c5069f9629816f4d52b2a131 Mon Sep 17 00:00:00 2001 +From fa79cac5b93e4f85435d80b748f1a1049e23a938 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:25:38 +0200 -Subject: [PATCH 5/7] kvm-lru +Date: Wed, 19 Jul 2023 18:50:17 +0200 +Subject: [PATCH 6/8] kvm-lru Signed-off-by: Peter Jung --- @@ -11800,8 +13952,8 @@ Signed-off-by: Peter Jung arch/arm64/include/asm/kvm_host.h | 6 + arch/arm64/include/asm/kvm_pgtable.h | 55 +++++++ arch/arm64/kvm/arm.c | 1 + - arch/arm64/kvm/hyp/pgtable.c | 61 +------ - arch/arm64/kvm/mmu.c | 53 +++++- + arch/arm64/kvm/hyp/pgtable.c | 61 +------- + arch/arm64/kvm/mmu.c | 53 ++++++- arch/powerpc/include/asm/kvm_host.h | 8 + arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s.c | 6 + @@ -11811,16 +13963,16 @@ Signed-off-by: Peter Jung arch/x86/include/asm/kvm_host.h | 13 ++ arch/x86/kvm/mmu.h | 6 - arch/x86/kvm/mmu/spte.h | 1 - - arch/x86/kvm/mmu/tdp_mmu.c | 34 ++++ + arch/x86/kvm/mmu/tdp_mmu.c | 34 +++++ include/linux/kvm_host.h | 22 +++ - include/linux/mmu_notifier.h | 79 ++++++--- + include/linux/mmu_notifier.h | 79 ++++++---- include/linux/mmzone.h | 6 +- include/trace/events/kvm.h | 15 -- mm/mmu_notifier.c | 48 ++---- mm/rmap.c | 8 +- - mm/vmscan.c | 152 +++++++++++++++--- - virt/kvm/kvm_main.c | 115 +++++++------ - 24 files changed, 553 insertions(+), 214 deletions(-) + mm/vmscan.c | 139 ++++++++++++++++-- + virt/kvm/kvm_main.c | 115 +++++++++------ + 24 files changed, 546 insertions(+), 208 deletions(-) diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst index 33e068830497..0ae2a6d4d94c 100644 @@ -11961,7 +14113,7 @@ index 14391826241c..ee93271035d9 100644 free_cpumask_var(kvm->arch.supported_cpus); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c -index 95dae02ccc2e..2bd1a2d4df61 100644 +index 37bd64e912ca..0ee3d5b49f27 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -12,49 +12,6 @@ @@ -12014,7 +14166,7 @@ index 95dae02ccc2e..2bd1a2d4df61 100644 struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; -@@ -714,16 +671,6 @@ static bool stage2_pte_is_locked(kvm_pte_t pte) +@@ -722,16 +679,6 @@ static bool stage2_pte_is_locked(kvm_pte_t pte) return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); } @@ -12031,7 +14183,7 @@ index 95dae02ccc2e..2bd1a2d4df61 100644 /** * stage2_try_break_pte() - Invalidates a pte according to the * 'break-before-make' requirements of the -@@ -1053,8 +1000,12 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, +@@ -1061,8 +1008,12 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), kvm_granule_size(ctx->level)); @@ -12731,7 +14883,7 @@ index 19392e090bec..51eae5411fa7 100644 pvmw.pte)) referenced++; diff --git a/mm/vmscan.c b/mm/vmscan.c -index 28f6d5cd362e..d6802821d8f7 100644 +index 6aa2d0a0b1d6..d6802821d8f7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -57,6 +57,7 @@ @@ -12985,65 +15137,7 @@ index 28f6d5cd362e..d6802821d8f7 100644 } /****************************************************************************** -@@ -4736,10 +4846,11 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) - { - int seg; - int old, new; -+ unsigned long flags; - int bin = get_random_u32_below(MEMCG_NR_BINS); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - -- spin_lock(&pgdat->memcg_lru.lock); -+ spin_lock_irqsave(&pgdat->memcg_lru.lock, flags); - - VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); - -@@ -4774,7 +4885,7 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) - if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) - WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); - -- spin_unlock(&pgdat->memcg_lru.lock); -+ spin_unlock_irqrestore(&pgdat->memcg_lru.lock, flags); - } - - void lru_gen_online_memcg(struct mem_cgroup *memcg) -@@ -4787,7 +4898,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) - struct pglist_data *pgdat = NODE_DATA(nid); - struct lruvec *lruvec = get_lruvec(memcg, nid); - -- spin_lock(&pgdat->memcg_lru.lock); -+ spin_lock_irq(&pgdat->memcg_lru.lock); - - VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list)); - -@@ -4798,7 +4909,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) - - lruvec->lrugen.gen = gen; - -- spin_unlock(&pgdat->memcg_lru.lock); -+ spin_unlock_irq(&pgdat->memcg_lru.lock); - } - } - -@@ -4822,7 +4933,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg) - struct pglist_data *pgdat = NODE_DATA(nid); - struct lruvec *lruvec = get_lruvec(memcg, nid); - -- spin_lock(&pgdat->memcg_lru.lock); -+ spin_lock_irq(&pgdat->memcg_lru.lock); - - VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); - -@@ -4834,7 +4945,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg) - if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) - WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); - -- spin_unlock(&pgdat->memcg_lru.lock); -+ spin_unlock_irq(&pgdat->memcg_lru.lock); - } - } - -@@ -5726,6 +5837,9 @@ static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, c +@@ -5727,6 +5837,9 @@ static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); @@ -13237,10 +15331,10 @@ index 65f94f592ff8..9db05880b6b9 100644 -- 2.41.0 -From a87c6bebcb1e942bbc824d451e0a93efb954116c Mon Sep 17 00:00:00 2001 +From f002178b1b3fe685dc55b8772ae140e09ad4d894 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:25:53 +0200 -Subject: [PATCH 6/7] sched +Date: Wed, 19 Jul 2023 18:51:24 +0200 +Subject: [PATCH 7/8] sched Signed-off-by: Peter Jung --- @@ -13796,7 +15890,7 @@ index 0b2340a79b65..aeeba46a096b 100644 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\ } while (0) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 9671df93d1f5..64cbea29b007 100644 +index 6742b1e1a359..a6205f9e6cb5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1082,6 +1082,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -13927,7 +16021,7 @@ index 9671df93d1f5..64cbea29b007 100644 list_del_rcu(&cfs_rq->throttled_list); raw_spin_unlock(&cfs_b->lock); -@@ -7045,6 +7076,37 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool +@@ -7063,6 +7094,37 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool return idle_cpu; } @@ -13965,7 +16059,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* * Scan the asym_capacity domain for idle CPUs; pick the first idle one on which * the task fits. If no CPU is big enough, but there are idle ones, try to -@@ -7217,6 +7279,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) +@@ -7235,6 +7297,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if ((unsigned)i < nr_cpumask_bits) return i; @@ -13978,7 +16072,7 @@ index 9671df93d1f5..64cbea29b007 100644 return target; } -@@ -8358,6 +8426,11 @@ enum group_type { +@@ -8376,6 +8444,11 @@ enum group_type { * more powerful CPU. */ group_misfit_task, @@ -13990,7 +16084,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* * SD_ASYM_PACKING only: One local CPU with higher capacity is available, * and the task should be migrated to it instead of running on the -@@ -9066,6 +9139,7 @@ struct sg_lb_stats { +@@ -9084,6 +9157,7 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ @@ -13998,7 +16092,7 @@ index 9671df93d1f5..64cbea29b007 100644 unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; -@@ -9339,6 +9413,9 @@ group_type group_classify(unsigned int imbalance_pct, +@@ -9357,6 +9431,9 @@ group_type group_classify(unsigned int imbalance_pct, if (sgs->group_asym_packing) return group_asym_packing; @@ -14008,7 +16102,7 @@ index 9671df93d1f5..64cbea29b007 100644 if (sgs->group_misfit_task_load) return group_misfit_task; -@@ -9349,98 +9426,128 @@ group_type group_classify(unsigned int imbalance_pct, +@@ -9367,98 +9444,128 @@ group_type group_classify(unsigned int imbalance_pct, } /** @@ -14205,7 +16299,7 @@ index 9671df93d1f5..64cbea29b007 100644 } static inline bool -@@ -9535,6 +9642,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, +@@ -9553,6 +9660,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_asym_packing = 1; } @@ -14216,7 +16310,7 @@ index 9671df93d1f5..64cbea29b007 100644 sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs); /* Computing avg_load makes sense only when group is overloaded */ -@@ -9619,6 +9730,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, +@@ -9637,6 +9748,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, return false; break; @@ -14224,7 +16318,7 @@ index 9671df93d1f5..64cbea29b007 100644 case group_fully_busy: /* * Select the fully busy group with highest avg_load. In -@@ -9628,13 +9740,37 @@ static bool update_sd_pick_busiest(struct lb_env *env, +@@ -9646,13 +9758,37 @@ static bool update_sd_pick_busiest(struct lb_env *env, * contention when accessing shared HW resources. * * XXX for now avg_load is not computed and always 0 so we @@ -14264,7 +16358,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* * Select not overloaded group with lowest number of idle cpus * and highest number of running tasks. We could also compare -@@ -9831,6 +9967,7 @@ static bool update_pick_idlest(struct sched_group *idlest, +@@ -9849,6 +9985,7 @@ static bool update_pick_idlest(struct sched_group *idlest, case group_imbalanced: case group_asym_packing: @@ -14272,7 +16366,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* Those types are not used in the slow wakeup path */ return false; -@@ -9962,6 +10099,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +@@ -9980,6 +10117,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) case group_imbalanced: case group_asym_packing: @@ -14280,7 +16374,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* Those type are not used in the slow wakeup path */ return NULL; -@@ -10106,7 +10244,6 @@ static void update_idle_cpu_scan(struct lb_env *env, +@@ -10124,7 +10262,6 @@ static void update_idle_cpu_scan(struct lb_env *env, static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { @@ -14288,7 +16382,7 @@ index 9671df93d1f5..64cbea29b007 100644 struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; struct sg_lb_stats tmp_sgs; -@@ -10147,8 +10284,13 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd +@@ -10165,8 +10302,13 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sg = sg->next; } while (sg != env->sd->groups); @@ -14304,7 +16398,7 @@ index 9671df93d1f5..64cbea29b007 100644 if (env->sd->flags & SD_NUMA) -@@ -10212,6 +10354,13 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s +@@ -10230,6 +10372,13 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return; } @@ -14318,7 +16412,7 @@ index 9671df93d1f5..64cbea29b007 100644 if (busiest->group_type == group_imbalanced) { /* * In the group_imb case we cannot rely on group-wide averages -@@ -10259,14 +10408,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s +@@ -10277,14 +10426,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s } if (busiest->group_weight == 1 || sds->prefer_sibling) { @@ -14334,7 +16428,7 @@ index 9671df93d1f5..64cbea29b007 100644 } else { /* -@@ -10458,22 +10605,32 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10476,22 +10623,32 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto out_balanced; } @@ -14371,7 +16465,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* * If the busiest group is not overloaded * and there is no imbalance between this and busiest -@@ -10484,12 +10641,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env) +@@ -10502,12 +10659,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env) * there is more than 1 CPU per group. */ goto out_balanced; @@ -14387,7 +16481,7 @@ index 9671df93d1f5..64cbea29b007 100644 } force_balance: -@@ -10560,8 +10719,15 @@ static struct rq *find_busiest_queue(struct lb_env *env, +@@ -10578,8 +10737,15 @@ static struct rq *find_busiest_queue(struct lb_env *env, nr_running == 1) continue; @@ -14404,7 +16498,7 @@ index 9671df93d1f5..64cbea29b007 100644 sched_asym_prefer(i, env->dst_cpu) && nr_running == 1) continue; -@@ -10650,12 +10816,19 @@ static inline bool +@@ -10668,12 +10834,19 @@ static inline bool asym_active_balance(struct lb_env *env) { /* @@ -14428,7 +16522,7 @@ index 9671df93d1f5..64cbea29b007 100644 } static inline bool -@@ -10709,7 +10882,7 @@ static int active_load_balance_cpu_stop(void *data); +@@ -10727,7 +10900,7 @@ static int active_load_balance_cpu_stop(void *data); static int should_we_balance(struct lb_env *env) { struct sched_group *sg = env->sd->groups; @@ -14437,7 +16531,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* * Ensure the balancing environment is consistent; can happen -@@ -10736,10 +10909,24 @@ static int should_we_balance(struct lb_env *env) +@@ -10754,10 +10927,24 @@ static int should_we_balance(struct lb_env *env) if (!idle_cpu(cpu)) continue; @@ -14462,7 +16556,7 @@ index 9671df93d1f5..64cbea29b007 100644 /* Are we the first CPU of this group ? */ return group_balance_cpu(sg) == env->dst_cpu; } -@@ -10762,7 +10949,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, +@@ -10780,7 +10967,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .sd = sd, .dst_cpu = this_cpu, .dst_rq = this_rq, @@ -14471,7 +16565,7 @@ index 9671df93d1f5..64cbea29b007 100644 .idle = idle, .loop_break = SCHED_NR_MIGRATE_BREAK, .cpus = cpus, -@@ -11389,9 +11576,13 @@ static void nohz_balancer_kick(struct rq *rq) +@@ -11407,9 +11594,13 @@ static void nohz_balancer_kick(struct rq *rq) * When ASYM_PACKING; see if there's a more preferred CPU * currently idle; in which case, kick the ILB to move tasks * around. @@ -14552,7 +16646,7 @@ index e072f6b31bf3..2ccb0b2ebd78 100644 if (group->rtpoll_states == 0) { group->rtpoll_until = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index ec7b3e0a2b20..0605fb53816d 100644 +index 81ac605b9cd5..a6e814eb84cd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -286,12 +286,6 @@ struct rt_bandwidth { @@ -14577,7 +16671,7 @@ index ec7b3e0a2b20..0605fb53816d 100644 int throttled; int throttle_count; struct list_head throttled_list; -@@ -1772,6 +1768,13 @@ queue_balance_callback(struct rq *rq, +@@ -1794,6 +1790,13 @@ queue_balance_callback(struct rq *rq, for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \ __sd; __sd = __sd->parent) @@ -14591,7 +16685,7 @@ index ec7b3e0a2b20..0605fb53816d 100644 /** * highest_flag_domain - Return highest sched_domain containing flag. * @cpu: The CPU whose highest level of sched domain is to -@@ -1779,16 +1782,25 @@ queue_balance_callback(struct rq *rq, +@@ -1801,16 +1804,25 @@ queue_balance_callback(struct rq *rq, * @flag: The flag to check for the highest sched_domain * for the given CPU. * @@ -14620,7 +16714,7 @@ index ec7b3e0a2b20..0605fb53816d 100644 } return hsd; -@@ -1844,6 +1856,7 @@ struct sched_group { +@@ -1866,6 +1878,7 @@ struct sched_group { atomic_t ref; unsigned int group_weight; @@ -14628,7 +16722,7 @@ index ec7b3e0a2b20..0605fb53816d 100644 struct sched_group_capacity *sgc; int asym_prefer_cpu; /* CPU of highest priority in group */ int flags; -@@ -2378,7 +2391,6 @@ extern struct rt_bandwidth def_rt_bandwidth; +@@ -2400,7 +2413,6 @@ extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); @@ -14699,10 +16793,10 @@ index 1b725510dd0f..a5758661875c 100644 -- 2.41.0 -From da924e0790a68d55b9e03f6892a1cd82c98b660b Mon Sep 17 00:00:00 2001 +From 691757954b7447086955a7d38e525c4be37c9a67 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:26:20 +0200 -Subject: [PATCH 7/7] zstd 1.5.5 +Date: Wed, 19 Jul 2023 18:52:42 +0200 +Subject: [PATCH 8/8] zstd Signed-off-by: Peter Jung --- diff --git a/patches/0002-eevdf.patch b/patches/0002-eevdf.patch index 63175c9..8fd8ec9 100644 --- a/patches/0002-eevdf.patch +++ b/patches/0002-eevdf.patch @@ -1,6 +1,6 @@ -From d5ebb5aa8f44f2a81002becad5f85b6e70801575 Mon Sep 17 00:00:00 2001 +From 5c15cb285591295dbbe5da9d7d957fa36e49db0b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Tue, 11 Jul 2023 19:27:06 +0200 +Date: Wed, 19 Jul 2023 18:55:28 +0200 Subject: [PATCH] EEVDF Signed-off-by: Peter Jung @@ -20,7 +20,7 @@ Signed-off-by: Peter Jung 12 files changed, 733 insertions(+), 658 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst -index e592a9364473..c826ab4e2e1a 100644 +index e592a93644739..c826ab4e2e1a1 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1121,6 +1121,16 @@ All time durations are in microseconds. @@ -41,7 +41,7 @@ index e592a9364473..c826ab4e2e1a 100644 Memory diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h -index 7ee7ed5de722..6dbc5a1bf6a8 100644 +index 7ee7ed5de7227..6dbc5a1bf6a8c 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node, @@ -78,7 +78,7 @@ index 7ee7ed5de722..6dbc5a1bf6a8 100644 * Template for declaring augmented rbtree callbacks (generic case) * diff --git a/include/linux/sched.h b/include/linux/sched.h -index 8473324705ca..88c3e7ba8992 100644 +index 8473324705caa..88c3e7ba8992e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -550,13 +550,18 @@ struct sched_entity { @@ -110,7 +110,7 @@ index 8473324705ca..88c3e7ba8992 100644 struct sched_entity se; struct sched_rt_entity rt; diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h -index 3bac0a8ceab2..b2e932c25be6 100644 +index 3bac0a8ceab26..b2e932c25be62 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -132,6 +132,7 @@ struct clone_args { @@ -131,7 +131,7 @@ index 3bac0a8ceab2..b2e932c25be6 100644 #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h -index f2c4589d4dbf..db1e8199e8c8 100644 +index f2c4589d4dbfe..db1e8199e8c80 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -10,6 +10,7 @@ struct sched_param { @@ -175,7 +175,7 @@ index f2c4589d4dbf..db1e8199e8c8 100644 #endif /* _UAPI_LINUX_SCHED_TYPES_H */ diff --git a/init/init_task.c b/init/init_task.c -index ff6c4b9bfe6b..511cbcf3510d 100644 +index ff6c4b9bfe6b1..511cbcf3510dc 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -78,6 +78,7 @@ struct task_struct init_task @@ -196,7 +196,7 @@ index ff6c4b9bfe6b..511cbcf3510d 100644 .rt = { .run_list = LIST_HEAD_INIT(init_task.rt.run_list), diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 1b971c69d3a2..df2f22a9729c 100644 +index 1b971c69d3a2a..df2f22a9729cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_struct *p, bool update_load) @@ -358,7 +358,7 @@ index 1b971c69d3a2..df2f22a9729c 100644 #ifdef CONFIG_CFS_BANDWIDTH { diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index aeeba46a096b..5c743bcb340d 100644 +index aeeba46a096b9..5c743bcb340d2 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -347,10 +347,7 @@ static __init int sched_init_debug(void) @@ -462,7 +462,7 @@ index aeeba46a096b..5c743bcb340d 100644 P(dl.runtime); P(dl.deadline); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 64cbea29b007..36dcf4770830 100644 +index a6205f9e6cb59..15167f12b9cf3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -47,6 +47,7 @@ @@ -1663,7 +1663,7 @@ index 64cbea29b007..36dcf4770830 100644 } -@@ -6241,13 +6335,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} +@@ -6259,13 +6353,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} static void hrtick_start_fair(struct rq *rq, struct task_struct *p) { struct sched_entity *se = &p->se; @@ -1678,7 +1678,7 @@ index 64cbea29b007..36dcf4770830 100644 s64 delta = slice - ran; if (delta < 0) { -@@ -6271,8 +6364,7 @@ static void hrtick_update(struct rq *rq) +@@ -6289,8 +6382,7 @@ static void hrtick_update(struct rq *rq) if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class) return; @@ -1688,7 +1688,7 @@ index 64cbea29b007..36dcf4770830 100644 } #else /* !CONFIG_SCHED_HRTICK */ static inline void -@@ -6313,17 +6405,6 @@ static int sched_idle_rq(struct rq *rq) +@@ -6331,17 +6423,6 @@ static int sched_idle_rq(struct rq *rq) rq->nr_running); } @@ -1706,7 +1706,7 @@ index 64cbea29b007..36dcf4770830 100644 #ifdef CONFIG_SMP static int sched_idle_cpu(int cpu) { -@@ -7809,18 +7890,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) +@@ -7827,18 +7908,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) { struct sched_entity *se = &p->se; @@ -1725,7 +1725,7 @@ index 64cbea29b007..36dcf4770830 100644 if (!task_on_rq_migrating(p)) { remove_entity_load_avg(se); -@@ -7858,66 +7927,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +@@ -7876,66 +7945,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } #endif /* CONFIG_SMP */ @@ -1792,7 +1792,7 @@ index 64cbea29b007..36dcf4770830 100644 static void set_next_buddy(struct sched_entity *se) { for_each_sched_entity(se) { -@@ -7929,12 +7938,6 @@ static void set_next_buddy(struct sched_entity *se) +@@ -7947,12 +7956,6 @@ static void set_next_buddy(struct sched_entity *se) } } @@ -1805,7 +1805,7 @@ index 64cbea29b007..36dcf4770830 100644 /* * Preempt the current task with a newly woken task if needed: */ -@@ -7943,7 +7946,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -7961,7 +7964,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct task_struct *curr = rq->curr; struct sched_entity *se = &curr->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(curr); @@ -1813,7 +1813,7 @@ index 64cbea29b007..36dcf4770830 100644 int next_buddy_marked = 0; int cse_is_idle, pse_is_idle; -@@ -7959,7 +7961,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -7977,7 +7979,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) return; @@ -1822,7 +1822,7 @@ index 64cbea29b007..36dcf4770830 100644 set_next_buddy(pse); next_buddy_marked = 1; } -@@ -8004,35 +8006,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -8022,35 +8024,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (cse_is_idle != pse_is_idle) return; @@ -1865,7 +1865,7 @@ index 64cbea29b007..36dcf4770830 100644 } #ifdef CONFIG_SMP -@@ -8233,8 +8219,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) +@@ -8251,8 +8237,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) /* * sched_yield() is very simple @@ -1874,7 +1874,7 @@ index 64cbea29b007..36dcf4770830 100644 */ static void yield_task_fair(struct rq *rq) { -@@ -8250,21 +8234,19 @@ static void yield_task_fair(struct rq *rq) +@@ -8268,21 +8252,19 @@ static void yield_task_fair(struct rq *rq) clear_buddies(cfs_rq, se); @@ -1908,7 +1908,7 @@ index 64cbea29b007..36dcf4770830 100644 } static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) -@@ -8512,8 +8494,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) +@@ -8530,8 +8512,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) * Buddy candidates are cache hot: */ if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running && @@ -1918,7 +1918,7 @@ index 64cbea29b007..36dcf4770830 100644 return 1; if (sysctl_sched_migration_cost == -1) -@@ -12139,8 +12120,8 @@ static void rq_offline_fair(struct rq *rq) +@@ -12157,8 +12138,8 @@ static void rq_offline_fair(struct rq *rq) static inline bool __entity_slice_used(struct sched_entity *se, int min_nr_tasks) { @@ -1928,7 +1928,7 @@ index 64cbea29b007..36dcf4770830 100644 return (rtime * min_nr_tasks > slice); } -@@ -12296,8 +12277,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) +@@ -12314,8 +12295,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) */ static void task_fork_fair(struct task_struct *p) { @@ -1938,7 +1938,7 @@ index 64cbea29b007..36dcf4770830 100644 struct rq *rq = this_rq(); struct rq_flags rf; -@@ -12306,22 +12287,9 @@ static void task_fork_fair(struct task_struct *p) +@@ -12324,22 +12305,9 @@ static void task_fork_fair(struct task_struct *p) cfs_rq = task_cfs_rq(current); curr = cfs_rq->curr; @@ -1963,7 +1963,7 @@ index 64cbea29b007..36dcf4770830 100644 rq_unlock(rq, &rf); } -@@ -12350,34 +12318,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) +@@ -12368,34 +12336,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) check_preempt_curr(rq, p, 0); } @@ -1998,7 +1998,7 @@ index 64cbea29b007..36dcf4770830 100644 #ifdef CONFIG_FAIR_GROUP_SCHED /* * Propagate the changes of the sched_entity across the tg tree to make it -@@ -12448,16 +12388,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se) +@@ -12466,16 +12406,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se) static void detach_task_cfs_rq(struct task_struct *p) { struct sched_entity *se = &p->se; @@ -2015,7 +2015,7 @@ index 64cbea29b007..36dcf4770830 100644 detach_entity_cfs_rq(se); } -@@ -12465,12 +12395,8 @@ static void detach_task_cfs_rq(struct task_struct *p) +@@ -12483,12 +12413,8 @@ static void detach_task_cfs_rq(struct task_struct *p) static void attach_task_cfs_rq(struct task_struct *p) { struct sched_entity *se = &p->se; @@ -2028,7 +2028,7 @@ index 64cbea29b007..36dcf4770830 100644 } static void switched_from_fair(struct rq *rq, struct task_struct *p) -@@ -12581,6 +12507,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) +@@ -12599,6 +12525,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) goto err; tg->shares = NICE_0_LOAD; @@ -2036,7 +2036,7 @@ index 64cbea29b007..36dcf4770830 100644 init_cfs_bandwidth(tg_cfs_bandwidth(tg)); -@@ -12679,6 +12606,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, +@@ -12697,6 +12624,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, } se->my_q = cfs_rq; @@ -2046,7 +2046,7 @@ index 64cbea29b007..36dcf4770830 100644 /* guarantee group entities always have weight */ update_load_set(&se->load, NICE_0_LOAD); se->parent = parent; -@@ -12809,6 +12739,29 @@ int sched_group_set_idle(struct task_group *tg, long idle) +@@ -12827,6 +12757,29 @@ int sched_group_set_idle(struct task_group *tg, long idle) return 0; } @@ -2076,7 +2076,7 @@ index 64cbea29b007..36dcf4770830 100644 #else /* CONFIG_FAIR_GROUP_SCHED */ void free_fair_sched_group(struct task_group *tg) { } -@@ -12835,7 +12788,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task +@@ -12853,7 +12806,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task * idle runqueue: */ if (rq->cfs.load.weight) @@ -2086,7 +2086,7 @@ index 64cbea29b007..36dcf4770830 100644 return rr_interval; } diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index 9e390eb82e38..ca95044a7479 100644 +index 9e390eb82e384..ca95044a74791 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -1,16 +1,12 @@ @@ -2133,7 +2133,7 @@ index 9e390eb82e38..ca95044a7479 100644 -SCHED_FEAT(ALT_PERIOD, true) -SCHED_FEAT(BASE_SLICE, true) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 0605fb53816d..96b1ae519f20 100644 +index a6e814eb84cd8..abf5a48b509c6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -372,6 +372,8 @@ struct task_group { @@ -2174,7 +2174,7 @@ index 0605fb53816d..96b1ae519f20 100644 #ifdef CONFIG_SCHED_DEBUG unsigned int nr_spread_over; -@@ -2170,6 +2176,7 @@ extern const u32 sched_prio_to_wmult[40]; +@@ -2192,6 +2198,7 @@ extern const u32 sched_prio_to_wmult[40]; #else #define ENQUEUE_MIGRATED 0x00 #endif @@ -2182,7 +2182,7 @@ index 0605fb53816d..96b1ae519f20 100644 #define RETRY_TASK ((void *)-1UL) -@@ -2474,11 +2481,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); +@@ -2496,11 +2503,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_migration_cost; @@ -2196,7 +2196,7 @@ index 0605fb53816d..96b1ae519f20 100644 extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_once; -@@ -2491,6 +2496,8 @@ extern unsigned int sysctl_numa_balancing_scan_size; +@@ -2513,6 +2518,8 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_numa_balancing_hot_threshold; #endif @@ -2205,7 +2205,7 @@ index 0605fb53816d..96b1ae519f20 100644 #ifdef CONFIG_SCHED_HRTICK /* -@@ -3499,4 +3506,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { } +@@ -3521,4 +3528,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { } static inline void init_sched_mm_cid(struct task_struct *t) { } #endif @@ -2214,7 +2214,7 @@ index 0605fb53816d..96b1ae519f20 100644 + #endif /* _KERNEL_SCHED_SCHED_H */ diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h -index 3bac0a8ceab2..b2e932c25be6 100644 +index 3bac0a8ceab26..b2e932c25be62 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -132,6 +132,7 @@ struct clone_args { diff --git a/patches/0002-eevdfbore.patch b/patches/0002-eevdfbore.patch index e9c6776..cf8bbf6 100644 --- a/patches/0002-eevdfbore.patch +++ b/patches/0002-eevdfbore.patch @@ -1,21 +1,21 @@ -From 4a346951e2b3c7de65511c95f74fdd7197e3d2e5 Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Tue, 11 Jul 2023 19:31:15 +0200 +From 06140f2f7a609e07d9fc7d1c79343772ead98dbd Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Sun, 23 Jul 2023 09:44:46 +0200 Subject: [PATCH] bore-eevdf -Signed-off-by: Peter Jung +Signed-off-by: Piotr Gorski --- - include/linux/sched.h | 10 +++ - init/Kconfig | 20 +++++ - kernel/sched/core.c | 62 +++++++++++++ + include/linux/sched.h | 10 ++ + init/Kconfig | 20 ++++ + kernel/sched/core.c | 117 +++++++++++++++++++++++ kernel/sched/debug.c | 4 + - kernel/sched/fair.c | 193 ++++++++++++++++++++++++++++++++++++++-- + kernel/sched/fair.c | 203 ++++++++++++++++++++++++++++++++++++++-- kernel/sched/features.h | 4 + kernel/sched/sched.h | 1 + - 7 files changed, 286 insertions(+), 8 deletions(-) + 7 files changed, 351 insertions(+), 8 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h -index 88c3e7ba8992..6b4c553aea75 100644 +index 88c3e7ba8..6b4c553ae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -560,6 +560,12 @@ struct sched_entity { @@ -43,7 +43,7 @@ index 88c3e7ba8992..6b4c553aea75 100644 /* * 'ptraced' is the list of tasks this task is using ptrace() on. diff --git a/init/Kconfig b/init/Kconfig -index b6d38eccca10..e90546df3182 100644 +index b6d38eccc..e90546df3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1277,6 +1277,26 @@ config CHECKPOINT_RESTORE @@ -74,16 +74,17 @@ index b6d38eccca10..e90546df3182 100644 bool "Automatic process group scheduling" select CGROUPS diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index df2f22a9729c..4995243a2ba4 100644 +index df2f22a97..df8b76e2c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4490,6 +4490,57 @@ int wake_up_state(struct task_struct *p, unsigned int state) +@@ -4490,6 +4490,112 @@ int wake_up_state(struct task_struct *p, unsigned int state) return try_to_wake_up(p, state, 0); } +#ifdef CONFIG_SCHED_BORE +#define CHILD_BURST_CUTOFF_BITS 9 +extern unsigned int sched_burst_cache_lifetime; ++extern unsigned int sched_burst_fork_atavistic; + +void __init sched_init_bore(void) { + init_task.child_burst_cache = 0; @@ -93,82 +94,136 @@ index df2f22a9729c..4995243a2ba4 100644 + init_task.se.max_burst_time = 0; +} + -+void inline __sched_fork_bore(struct task_struct *p) { ++void inline sched_fork_bore(struct task_struct *p) { + p->child_burst_cache = 0; + p->child_burst_last_cached = 0; + p->se.burst_time = 0; +} + -+static inline void update_task_child_burst_time_cache(struct task_struct *p) { -+ u64 sum = 0, avg_burst_time = 0; -+ u32 cnt = 0; ++static u32 count_child_tasks(struct task_struct *p) { + struct task_struct *child; ++ u32 cnt = 0; ++ list_for_each_entry(child, &p->children, sibling) {cnt++;} ++ return cnt; ++} ++ ++static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) { ++ return (p->child_burst_last_cached + sched_burst_cache_lifetime < now); ++} ++ ++static void __update_child_burst_cache( ++ struct task_struct *p, u32 cnt, u64 sum, u64 now) { ++ u64 avg = 0; ++ if (cnt) avg = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS; ++ p->child_burst_cache = max(avg, p->se.max_burst_time); ++ p->child_burst_last_cached = now; ++} ++ ++static void update_child_burst_cache(struct task_struct *p, u64 now) { ++ struct task_struct *child; ++ u32 cnt = 0; ++ u64 sum = 0; + -+ read_lock(&tasklist_lock); + list_for_each_entry(child, &p->children, sibling) { + cnt++; + sum += child->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS; + } -+ read_unlock(&tasklist_lock); + -+ if (cnt) avg_burst_time = div_u64(sum, cnt) << CHILD_BURST_CUTOFF_BITS; -+ p->child_burst_cache = max(avg_burst_time, p->se.max_burst_time); ++ __update_child_burst_cache(p, cnt, sum, now); +} + -+static void update_task_initial_burst_time(struct task_struct *task) { -+ struct sched_entity *se = &task->se; -+ struct task_struct *par = task->real_parent; -+ u64 now = ktime_get_ns(); ++static void update_child_burst_cache_atavistic( ++ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u64 *asum) { ++ struct task_struct *child, *dec; ++ u32 cnt = 0, dcnt = 0; ++ u64 sum = 0; + -+ if (likely(par)) { -+ if (par->child_burst_last_cached + sched_burst_cache_lifetime < now) { -+ par->child_burst_last_cached = now; -+ update_task_child_burst_time_cache(par); ++ list_for_each_entry(child, &p->children, sibling) { ++ dec = child; ++ while ((dcnt = count_child_tasks(dec)) == 1) ++ dec = list_first_entry(&dec->children, struct task_struct, sibling); ++ ++ if (!dcnt || !depth) { ++ cnt++; ++ sum += dec->se.max_burst_time >> CHILD_BURST_CUTOFF_BITS; ++ } else { ++ if (child_burst_cache_expired(dec, now)) ++ update_child_burst_cache_atavistic(dec, now, depth - 1, &cnt, &sum); ++ else { ++ cnt += dcnt; ++ sum += (dec->child_burst_cache >> CHILD_BURST_CUTOFF_BITS) * dcnt; ++ } + } -+ se->prev_burst_time = max(se->prev_burst_time, par->child_burst_cache); + } + -+ se->max_burst_time = se->prev_burst_time; ++ __update_child_burst_cache(p, cnt, sum, now); ++ *acnt += cnt; ++ *asum += sum; ++} ++ ++static void update_task_initial_burst_time(struct task_struct *p) { ++ struct sched_entity *se = &p->se; ++ struct task_struct *anc = p->real_parent; ++ u64 now = ktime_get_ns(); ++ u32 cnt = 0; ++ u64 sum = 0; ++ ++ read_lock(&tasklist_lock); ++ ++ if (sched_burst_fork_atavistic) { ++ while ((anc->real_parent != anc) && (count_child_tasks(anc) == 1)) ++ anc = anc->real_parent; ++ if (child_burst_cache_expired(anc, now)) ++ update_child_burst_cache_atavistic( ++ anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); ++ } else ++ if (child_burst_cache_expired(anc, now)) ++ update_child_burst_cache(anc, now); ++ ++ read_unlock(&tasklist_lock); ++ ++ se->max_burst_time = se->prev_burst_time = ++ max(se->prev_burst_time, anc->child_burst_cache); +} +#endif // CONFIG_SCHED_BORE + /* * Perform scheduler related setup for a newly forked process p. * p is forked by current. -@@ -4506,6 +4557,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4506,6 +4612,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; +#ifdef CONFIG_SCHED_BORE -+ __sched_fork_bore(p); ++ sched_fork_bore(p); +#endif // CONFIG_SCHED_BORE p->se.vlag = 0; INIT_LIST_HEAD(&p->se.group_node); -@@ -4735,6 +4789,9 @@ late_initcall(sched_core_sysctl_init); - int sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4827,6 +4936,9 @@ void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) + + void sched_post_fork(struct task_struct *p) { - __sched_fork(clone_flags, p); +#ifdef CONFIG_SCHED_BORE + update_task_initial_burst_time(p); +#endif // CONFIG_SCHED_BORE - /* - * We mark the process as NEW here. This guarantees that - * nobody will actually run it, and a signal or other external -@@ -9968,6 +10025,11 @@ void __init sched_init(void) + uclamp_post_fork(p); + } + +@@ -9968,6 +10080,11 @@ void __init sched_init(void) BUG_ON(&dl_sched_class != &stop_sched_class + 1); #endif +#ifdef CONFIG_SCHED_BORE + sched_init_bore(); -+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.4.2 by Masahito Suzuki"); ++ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 2.5.3 by Masahito Suzuki"); +#endif // CONFIG_SCHED_BORE + wait_bit_init(); #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index 5c743bcb340d..755ef4c8d34b 100644 +index 5c743bcb3..755ef4c8d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -348,6 +348,7 @@ static __init int sched_init_debug(void) @@ -190,7 +245,7 @@ index 5c743bcb340d..755ef4c8d34b 100644 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 36dcf4770830..30080b227866 100644 +index 15167f12b..51f1b7a67 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -19,6 +19,9 @@ @@ -226,7 +281,7 @@ index 36dcf4770830..30080b227866 100644 /* * After fork, child runs first. If set to 0 (default) then -@@ -84,8 +87,75 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; +@@ -84,8 +87,76 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; */ unsigned int sysctl_sched_child_runs_first __read_mostly; @@ -246,10 +301,11 @@ index 36dcf4770830..30080b227866 100644 +#ifdef CONFIG_SCHED_BORE +unsigned int __read_mostly sched_bore = 1; -+unsigned int __read_mostly sched_burst_cache_lifetime = 15000000; -+unsigned int __read_mostly sched_burst_penalty_offset = 18; ++unsigned int __read_mostly sched_burst_cache_lifetime = 60000000; ++unsigned int __read_mostly sched_burst_penalty_offset = 12; +unsigned int __read_mostly sched_burst_penalty_scale = 1292; -+unsigned int __read_mostly sched_burst_smoothness = 1; ++unsigned int __read_mostly sched_burst_smoothness = 2; ++unsigned int __read_mostly sched_burst_fork_atavistic = 2; +static int three = 3; +static int sixty_four = 64; +static int maxval_12_bits = 4095; @@ -279,7 +335,7 @@ index 36dcf4770830..30080b227866 100644 +} + +static inline u64 __binary_smooth(u64 new, u64 old, unsigned int smoothness) { -+ return (new + old * ((1 << smoothness) - 1)) >> smoothness; ++ return (new <= old)? new: (new + old * ((1 << smoothness) - 1)) >> smoothness; +} + +void restart_burst(struct sched_entity *se) { @@ -302,7 +358,7 @@ index 36dcf4770830..30080b227866 100644 int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { -@@ -145,6 +215,51 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; +@@ -145,6 +216,60 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { @@ -324,6 +380,15 @@ index 36dcf4770830..30080b227866 100644 + .proc_handler = proc_dointvec, + }, + { ++ .procname = "sched_burst_fork_atavistic", ++ .data = &sched_burst_fork_atavistic, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = &three, ++ }, ++ { + .procname = "sched_burst_penalty_offset", + .data = &sched_burst_penalty_offset, + .maxlen = sizeof(unsigned int), @@ -354,7 +419,7 @@ index 36dcf4770830..30080b227866 100644 { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, -@@ -238,6 +353,7 @@ static void update_sysctl(void) +@@ -238,6 +363,7 @@ static void update_sysctl(void) #define SET_SYSCTL(name) \ (sysctl_##name = (factor) * normalized_sysctl_##name) SET_SYSCTL(sched_base_slice); @@ -362,7 +427,7 @@ index 36dcf4770830..30080b227866 100644 #undef SET_SYSCTL } -@@ -308,11 +424,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight +@@ -308,11 +434,19 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight /* * delta /= w */ @@ -382,7 +447,7 @@ index 36dcf4770830..30080b227866 100644 return delta; } -@@ -708,7 +832,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +@@ -708,7 +842,11 @@ void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) SCHED_WARN_ON(!se->on_rq); lag = avg_vruntime(cfs_rq) - se->vruntime; @@ -394,7 +459,7 @@ index 36dcf4770830..30080b227866 100644 se->vlag = clamp(lag, -limit, limit); } -@@ -946,6 +1074,7 @@ int sched_update_scaling(void) +@@ -946,6 +1084,7 @@ int sched_update_scaling(void) #define WRT_SYSCTL(name) \ (normalized_sysctl_##name = sysctl_##name / (factor)) WRT_SYSCTL(sched_base_slice); @@ -402,7 +467,7 @@ index 36dcf4770830..30080b227866 100644 #undef WRT_SYSCTL return 0; -@@ -1123,6 +1252,11 @@ static void update_curr(struct cfs_rq *cfs_rq) +@@ -1123,6 +1262,11 @@ static void update_curr(struct cfs_rq *cfs_rq) curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq->exec_clock, delta_exec); @@ -414,7 +479,7 @@ index 36dcf4770830..30080b227866 100644 curr->vruntime += calc_delta_fair(delta_exec, curr); update_deadline(cfs_rq, curr); update_min_vruntime(cfs_rq); -@@ -5237,6 +5371,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +@@ -5237,6 +5381,9 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } @@ -424,7 +489,7 @@ index 36dcf4770830..30080b227866 100644 /* * Pick the next process, keeping these things in mind, in this order: * 1) keep things fair between processes/task groups -@@ -5247,14 +5384,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +@@ -5247,14 +5394,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static struct sched_entity * pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) { @@ -443,7 +508,7 @@ index 36dcf4770830..30080b227866 100644 } static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); -@@ -6504,6 +6643,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -6522,6 +6671,38 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) hrtick_update(rq); } @@ -482,7 +547,7 @@ index 36dcf4770830..30080b227866 100644 static void set_next_buddy(struct sched_entity *se); /* -@@ -6522,6 +6693,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +@@ -6540,6 +6721,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) util_est_dequeue(&rq->cfs, p); for_each_sched_entity(se) { @@ -492,7 +557,7 @@ index 36dcf4770830..30080b227866 100644 cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); -@@ -8012,7 +8186,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -8030,7 +8214,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ /* * XXX pick_eevdf(cfs_rq) != se ? */ @@ -501,7 +566,7 @@ index 36dcf4770830..30080b227866 100644 goto preempt; return; -@@ -8225,6 +8399,9 @@ static void yield_task_fair(struct rq *rq) +@@ -8243,6 +8427,9 @@ static void yield_task_fair(struct rq *rq) struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se; @@ -512,7 +577,7 @@ index 36dcf4770830..30080b227866 100644 /* * Are we the only task in the tree? diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index ca95044a7479..a7d34d1b28c5 100644 +index ca95044a7..a7d34d1b2 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -13,7 +13,11 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) @@ -528,10 +593,10 @@ index ca95044a7479..a7d34d1b28c5 100644 /* * Consider buddies to be cache hot, decreases the likeliness of a diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 96b1ae519f20..cc0a17fb23c2 100644 +index abf5a48b5..a9f9e80a1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2482,6 +2482,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate; +@@ -2504,6 +2504,7 @@ extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_base_slice; @@ -540,4 +605,4 @@ index 96b1ae519f20..cc0a17fb23c2 100644 #ifdef CONFIG_SCHED_DEBUG extern int sysctl_resched_latency_warn_ms; -- -2.41.0 +2.41.0.159.g0bfa463d37 diff --git a/patches/0003-bcachefs.patch b/patches/0003-bcachefs.patch index 58bd87c..d7ad7bd 100644 --- a/patches/0003-bcachefs.patch +++ b/patches/0003-bcachefs.patch @@ -1,13 +1,13 @@ -From 53d26f1e843c6117e14bf9d0b41ca7f986f4ff5b Mon Sep 17 00:00:00 2001 +From 5f9d0663e5c9895cfa7238b3456e2a268daf5878 Mon Sep 17 00:00:00 2001 From: Piotr Gorski -Date: Sun, 16 Jul 2023 11:24:25 +0200 +Date: Fri, 21 Jul 2023 08:07:37 +0200 Subject: [PATCH] bcachefs Signed-off-by: Piotr Gorski --- Documentation/admin-guide/sysctl/vm.rst | 16 + Documentation/filesystems/proc.rst | 28 + - MAINTAINERS | 55 + + MAINTAINERS | 56 + arch/arm64/include/asm/spectre.h | 4 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- arch/x86/kernel/amd_gart_64.c | 2 +- @@ -41,10 +41,10 @@ Signed-off-by: Piotr Gorski fs/bcachefs/alloc_foreground.c | 1536 +++++++ fs/bcachefs/alloc_foreground.h | 224 + fs/bcachefs/alloc_types.h | 126 + - fs/bcachefs/backpointers.c | 889 ++++ + fs/bcachefs/backpointers.c | 873 ++++ fs/bcachefs/backpointers.h | 131 + fs/bcachefs/bbpos.h | 48 + - fs/bcachefs/bcachefs.h | 1185 +++++ + fs/bcachefs/bcachefs.h | 1201 +++++ fs/bcachefs/bcachefs_format.h | 2319 ++++++++++ fs/bcachefs/bcachefs_ioctl.h | 368 ++ fs/bcachefs/bkey.c | 1063 +++++ @@ -52,16 +52,16 @@ Signed-off-by: Piotr Gorski fs/bcachefs/bkey_buf.h | 61 + fs/bcachefs/bkey_cmp.h | 129 + fs/bcachefs/bkey_methods.c | 519 +++ - fs/bcachefs/bkey_methods.h | 191 + + fs/bcachefs/bkey_methods.h | 193 + fs/bcachefs/bkey_sort.c | 201 + fs/bcachefs/bkey_sort.h | 44 + fs/bcachefs/bset.c | 1587 +++++++ fs/bcachefs/bset.h | 541 +++ fs/bcachefs/btree_cache.c | 1277 ++++++ fs/bcachefs/btree_cache.h | 130 + - fs/bcachefs/btree_gc.c | 2144 +++++++++ - fs/bcachefs/btree_gc.h | 112 + - fs/bcachefs/btree_io.c | 2266 ++++++++++ + fs/bcachefs/btree_gc.c | 2126 +++++++++ + fs/bcachefs/btree_gc.h | 113 + + fs/bcachefs/btree_io.c | 2267 ++++++++++ fs/bcachefs/btree_io.h | 228 + fs/bcachefs/btree_iter.c | 3214 +++++++++++++ fs/bcachefs/btree_iter.h | 924 ++++ @@ -69,16 +69,16 @@ Signed-off-by: Piotr Gorski fs/bcachefs/btree_key_cache.h | 48 + fs/bcachefs/btree_locking.c | 797 ++++ fs/bcachefs/btree_locking.h | 424 ++ - fs/bcachefs/btree_types.h | 742 +++ - fs/bcachefs/btree_update.h | 357 ++ + fs/bcachefs/btree_types.h | 743 +++ + fs/bcachefs/btree_update.h | 352 ++ fs/bcachefs/btree_update_interior.c | 2488 ++++++++++ - fs/bcachefs/btree_update_interior.h | 328 ++ - fs/bcachefs/btree_update_leaf.c | 2065 +++++++++ - fs/bcachefs/btree_write_buffer.c | 346 ++ + fs/bcachefs/btree_update_interior.h | 337 ++ + fs/bcachefs/btree_update_leaf.c | 2097 +++++++++ + fs/bcachefs/btree_write_buffer.c | 372 ++ fs/bcachefs/btree_write_buffer.h | 14 + fs/bcachefs/btree_write_buffer_types.h | 44 + - fs/bcachefs/buckets.c | 2171 +++++++++ - fs/bcachefs/buckets.h | 357 ++ + fs/bcachefs/buckets.c | 2106 +++++++++ + fs/bcachefs/buckets.h | 368 ++ fs/bcachefs/buckets_types.h | 92 + fs/bcachefs/buckets_waiting_for_journal.c | 166 + fs/bcachefs/buckets_waiting_for_journal.h | 15 + @@ -90,7 +90,7 @@ Signed-off-by: Piotr Gorski fs/bcachefs/clock.c | 193 + fs/bcachefs/clock.h | 38 + fs/bcachefs/clock_types.h | 37 + - fs/bcachefs/compress.c | 712 +++ + fs/bcachefs/compress.c | 713 +++ fs/bcachefs/compress.h | 55 + fs/bcachefs/counters.c | 107 + fs/bcachefs/counters.h | 17 + @@ -108,7 +108,7 @@ Signed-off-by: Piotr Gorski fs/bcachefs/ec_types.h | 41 + fs/bcachefs/errcode.c | 63 + fs/bcachefs/errcode.h | 246 + - fs/bcachefs/error.c | 297 ++ + fs/bcachefs/error.c | 294 ++ fs/bcachefs/error.h | 206 + fs/bcachefs/extent_update.c | 173 + fs/bcachefs/extent_update.h | 12 + @@ -125,11 +125,11 @@ Signed-off-by: Piotr Gorski fs/bcachefs/fs-ioctl.h | 81 + fs/bcachefs/fs.c | 1943 ++++++++ fs/bcachefs/fs.h | 208 + - fs/bcachefs/fsck.c | 2452 ++++++++++ + fs/bcachefs/fsck.c | 2471 ++++++++++ fs/bcachefs/fsck.h | 14 + - fs/bcachefs/inode.c | 872 ++++ - fs/bcachefs/inode.h | 196 + - fs/bcachefs/io.c | 3056 +++++++++++++ + fs/bcachefs/inode.c | 925 ++++ + fs/bcachefs/inode.h | 201 + + fs/bcachefs/io.c | 3059 +++++++++++++ fs/bcachefs/io.h | 202 + fs/bcachefs/io_types.h | 165 + fs/bcachefs/journal.c | 1438 ++++++ @@ -146,8 +146,8 @@ Signed-off-by: Piotr Gorski fs/bcachefs/keylist.c | 52 + fs/bcachefs/keylist.h | 74 + fs/bcachefs/keylist_types.h | 16 + - fs/bcachefs/lru.c | 178 + - fs/bcachefs/lru.h | 64 + + fs/bcachefs/lru.c | 162 + + fs/bcachefs/lru.h | 69 + fs/bcachefs/migrate.c | 182 + fs/bcachefs/migrate.h | 7 + fs/bcachefs/move.c | 1168 +++++ @@ -168,7 +168,7 @@ Signed-off-by: Piotr Gorski fs/bcachefs/rebalance.c | 364 ++ fs/bcachefs/rebalance.h | 28 + fs/bcachefs/rebalance_types.h | 26 + - fs/bcachefs/recovery.c | 1669 +++++++ + fs/bcachefs/recovery.c | 1670 +++++++ fs/bcachefs/recovery.h | 60 + fs/bcachefs/reflink.c | 399 ++ fs/bcachefs/reflink.h | 81 + @@ -179,12 +179,12 @@ Signed-off-by: Piotr Gorski fs/bcachefs/siphash.c | 173 + fs/bcachefs/siphash.h | 87 + fs/bcachefs/str_hash.h | 370 ++ - fs/bcachefs/subvolume.c | 1734 +++++++ - fs/bcachefs/subvolume.h | 251 ++ + fs/bcachefs/subvolume.c | 1749 ++++++++ + fs/bcachefs/subvolume.h | 258 ++ fs/bcachefs/subvolume_types.h | 31 + - fs/bcachefs/super-io.c | 1711 +++++++ + fs/bcachefs/super-io.c | 1714 +++++++ fs/bcachefs/super-io.h | 142 + - fs/bcachefs/super.c | 2006 +++++++++ + fs/bcachefs/super.c | 2007 +++++++++ fs/bcachefs/super.h | 266 ++ fs/bcachefs/super_types.h | 51 + fs/bcachefs/sysfs.c | 1064 +++++ @@ -313,7 +313,7 @@ Signed-off-by: Piotr Gorski scripts/Makefile.lib | 2 +- scripts/kallsyms.c | 13 + scripts/module.lds.S | 7 + - 308 files changed, 96643 insertions(+), 930 deletions(-) + 308 files changed, 96733 insertions(+), 930 deletions(-) create mode 100644 fs/bcachefs/Kconfig create mode 100644 fs/bcachefs/Makefile create mode 100644 fs/bcachefs/acl.c @@ -583,15 +583,16 @@ index 7897a7daf..810f851e6 100644 ~~~~~~~ diff --git a/MAINTAINERS b/MAINTAINERS -index 35e195946..48763cc35 100644 +index 35e195946..22c57b3bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -3522,6 +3522,13 @@ W: http://bcache.evilpiepirate.org +@@ -3522,6 +3522,14 @@ W: http://bcache.evilpiepirate.org C: irc://irc.oftc.net/bcache F: drivers/md/bcache/ +BCACHEFS +M: Kent Overstreet ++R: Brian Foster +L: linux-bcachefs@vger.kernel.org +S: Supported +C: irc://irc.oftc.net/bcache @@ -600,7 +601,7 @@ index 35e195946..48763cc35 100644 BDISP ST MEDIA DRIVER M: Fabien Dessenne L: linux-media@vger.kernel.org -@@ -5064,6 +5071,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core +@@ -5064,6 +5072,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: Documentation/devicetree/bindings/timer/ F: drivers/clocksource/ @@ -615,7 +616,7 @@ index 35e195946..48763cc35 100644 CMPC ACPI DRIVER M: Thadeu Lima de Souza Cascardo M: Daniel Oliveira Nascimento -@@ -5114,6 +5129,13 @@ S: Supported +@@ -5114,6 +5130,13 @@ S: Supported F: Documentation/process/code-of-conduct-interpretation.rst F: Documentation/process/code-of-conduct.rst @@ -629,7 +630,7 @@ index 35e195946..48763cc35 100644 COMEDI DRIVERS M: Ian Abbott M: H Hartley Sweeten -@@ -8662,6 +8684,13 @@ F: Documentation/devicetree/bindings/power/power?domain* +@@ -8662,6 +8685,13 @@ F: Documentation/devicetree/bindings/power/power?domain* F: drivers/base/power/domain*.c F: include/linux/pm_domain.h @@ -643,7 +644,7 @@ index 35e195946..48763cc35 100644 GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER M: Eugen Hristev L: linux-input@vger.kernel.org -@@ -12850,6 +12879,15 @@ F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt +@@ -12850,6 +12880,15 @@ F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt F: drivers/net/ieee802154/mcr20a.c F: drivers/net/ieee802154/mcr20a.h @@ -659,7 +660,7 @@ index 35e195946..48763cc35 100644 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER M: William Breathitt Gray L: linux-iio@vger.kernel.org -@@ -13489,6 +13527,15 @@ F: mm/memblock.c +@@ -13489,6 +13528,15 @@ F: mm/memblock.c F: mm/mm_init.c F: tools/testing/memblock/ @@ -675,7 +676,7 @@ index 35e195946..48763cc35 100644 MEMORY CONTROLLER DRIVERS M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org -@@ -19376,6 +19423,14 @@ S: Maintained +@@ -19376,6 +19424,14 @@ S: Maintained W: http://www.winischhofer.at/linuxsisusbvga.shtml F: drivers/usb/misc/sisusbvga/ @@ -706,7 +707,7 @@ index db7b371b3..31823d971 100644 #include #include diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c -index 2297aa764..4f8d43b74 100644 +index e8db8c8ef..1a3bd656f 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -261,7 +261,7 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e @@ -809,7 +810,7 @@ index 043944fd4..70b5c987b 100644 static inline bool bio_remaining_done(struct bio *bio) { diff --git a/block/blk-core.c b/block/blk-core.c -index 1da77e7d6..b7b0237c3 100644 +index 3fc68b944..1f23abb7d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -205,6 +205,7 @@ const char *blk_status_to_str(blk_status_t status) @@ -936,7 +937,7 @@ index aebb7ef10..c8b4914ad 100644 struct bucket { atomic_t pin; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 7e9d19fd2..35c701d54 100644 +index 077149c40..d43079d45 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2911,7 +2911,6 @@ static int __init bcache_init(void) @@ -969,10 +970,10 @@ index 6f3cb7c92..f61ab1bad 100644 #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c -index d920c4178..ae9ab7816 100644 +index e46330815..b5dfaf680 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c -@@ -2503,7 +2503,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, +@@ -2509,7 +2509,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled); @@ -981,7 +982,7 @@ index d920c4178..ae9ab7816 100644 cap_str, sizeof(cap_str)); pr_info("%s: %s %s %s %s\n", md->disk->disk_name, mmc_card_id(card), mmc_card_name(card), -@@ -2699,7 +2699,7 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, +@@ -2705,7 +2705,7 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, list_add(&rpmb->node, &md->rpmbs); @@ -6257,10 +6258,10 @@ index 000000000..c33a29954 +#endif /* _BCACHEFS_ALLOC_TYPES_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c new file mode 100644 -index 000000000..d412bae55 +index 000000000..8747c5e19 --- /dev/null +++ b/fs/bcachefs/backpointers.c -@@ -0,0 +1,889 @@ +@@ -0,0 +1,873 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "bbpos.h" @@ -6397,31 +6398,15 @@ index 000000000..d412bae55 +} + +int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, -+ struct bpos bucket, ++ struct bkey_i_backpointer *bp_k, + struct bch_backpointer bp, + struct bkey_s_c orig_k, + bool insert) +{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i_backpointer *bp_k; + struct btree_iter bp_iter; + struct bkey_s_c k; + int ret; + -+ bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); -+ ret = PTR_ERR_OR_ZERO(bp_k); -+ if (ret) -+ return ret; -+ -+ bkey_backpointer_init(&bp_k->k_i); -+ bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset); -+ bp_k->v = bp; -+ -+ if (!insert) { -+ bp_k->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp_k->k, 0); -+ } -+ + k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, + bp_k->k.p, + BTREE_ITER_INTENT| @@ -6740,7 +6725,7 @@ index 000000000..d412bae55 + prt_printf(&buf, "\nbp pos "); + bch2_bpos_to_text(&buf, bp_iter.pos); + -+ if (c->sb.version < bcachefs_metadata_version_backpointers || ++ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || + c->opts.reconstruct_alloc || + fsck_err(c, "%s", buf.buf)) + ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); @@ -7152,7 +7137,7 @@ index 000000000..d412bae55 +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h new file mode 100644 -index 000000000..87e31aa19 +index 000000000..547e06176 --- /dev/null +++ b/fs/bcachefs/backpointers.h @@ -0,0 +1,131 @@ @@ -7212,7 +7197,7 @@ index 000000000..87e31aa19 + return ret; +} + -+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos, ++int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *, + struct bch_backpointer, struct bkey_s_c, bool); + +static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, @@ -7225,9 +7210,6 @@ index 000000000..87e31aa19 + struct bkey_i_backpointer *bp_k; + int ret; + -+ if (unlikely(bch2_backpointers_no_use_write_buffer)) -+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert); -+ + bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) @@ -7242,6 +7224,9 @@ index 000000000..87e31aa19 + set_bkey_val_u64s(&bp_k->k, 0); + } + ++ if (unlikely(bch2_backpointers_no_use_write_buffer)) ++ return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert); ++ + return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); +} + @@ -7343,10 +7328,10 @@ index 000000000..1fbed1f83 +#endif /* _BCACHEFS_BBPOS_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h new file mode 100644 -index 000000000..445d010c8 +index 000000000..82b0706a8 --- /dev/null +++ b/fs/bcachefs/bcachefs.h -@@ -0,0 +1,1185 @@ +@@ -0,0 +1,1201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_H +#define _BCACHEFS_H @@ -7912,7 +7897,6 @@ index 000000000..445d010c8 + BCH_FS_CLEAN_SHUTDOWN, + + /* fsck passes: */ -+ BCH_FS_TOPOLOGY_REPAIR_DONE, + BCH_FS_FSCK_DONE, + BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ + BCH_FS_NEED_ANOTHER_GC, @@ -8015,6 +7999,7 @@ index 000000000..445d010c8 + x(stripes_read, PASS_ALWAYS) \ + x(initialize_subvolumes, 0) \ + x(snapshots_read, PASS_ALWAYS) \ ++ x(check_topology, 0) \ + x(check_allocations, PASS_FSCK) \ + x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, PASS_ALWAYS) \ @@ -8026,11 +8011,11 @@ index 000000000..445d010c8 + x(check_alloc_to_lru_refs, PASS_FSCK) \ + x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 0) \ -+ x(fs_upgrade_for_subvolumes, 0) \ + x(check_snapshot_trees, PASS_FSCK) \ + x(check_snapshots, PASS_FSCK) \ + x(check_subvols, PASS_FSCK) \ -+ x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN|PASS_SILENT) \ ++ x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ ++ x(fs_upgrade_for_subvolumes, 0) \ + x(check_inodes, PASS_FSCK|PASS_UNCLEAN) \ + x(check_extents, PASS_FSCK) \ + x(check_dirents, PASS_FSCK) \ @@ -8528,6 +8513,22 @@ index 000000000..445d010c8 + return dev < c->sb.nr_devices && c->devs[dev]; +} + ++/* ++ * For when we need to rewind recovery passes and run a pass we skipped: ++ */ ++static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, ++ enum bch_recovery_pass pass) ++{ ++ c->recovery_passes_explicit |= BIT_ULL(pass); ++ ++ if (c->curr_recovery_pass >= pass) { ++ c->curr_recovery_pass = pass; ++ return -BCH_ERR_restart_recovery; ++ } else { ++ return 0; ++ } ++} ++ +#define BKEY_PADDED_ONSTACK(key, pad) \ + struct { struct bkey_i key; __u64 key ## _pad[pad]; } + @@ -13809,10 +13810,10 @@ index 000000000..1381166bf +} diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h new file mode 100644 -index 000000000..0f3dc156a +index 000000000..f4e60d2e6 --- /dev/null +++ b/fs/bcachefs/bkey_methods.h -@@ -0,0 +1,191 @@ +@@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_METHODS_H +#define _BCACHEFS_BKEY_METHODS_H @@ -13912,6 +13913,7 @@ index 000000000..0f3dc156a +enum btree_update_flags { + __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, + __BTREE_UPDATE_NOJOURNAL, ++ __BTREE_UPDATE_PREJOURNAL, + __BTREE_UPDATE_KEY_CACHE_RECLAIM, + + __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ @@ -13926,6 +13928,7 @@ index 000000000..0f3dc156a + +#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) +#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) ++#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL) +#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) + +#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) @@ -17822,10 +17825,10 @@ index 000000000..00c9b9218 +#endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c new file mode 100644 -index 000000000..be537b237 +index 000000000..49e9822dd --- /dev/null +++ b/fs/bcachefs/btree_gc.c -@@ -0,0 +1,2144 @@ +@@ -0,0 +1,2126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Kent Overstreet @@ -17868,6 +17871,12 @@ index 000000000..be537b237 +#define DROP_THIS_NODE 10 +#define DROP_PREV_NODE 11 + ++static bool should_restart_for_topology_repair(struct bch_fs *c) ++{ ++ return c->opts.fix_errors != FSCK_FIX_no && ++ !(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)); ++} ++ +static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) +{ + preempt_disable(); @@ -17924,9 +17933,9 @@ index 000000000..be537b237 + " cur %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + buf1.buf, buf2.buf) && -+ !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { ++ should_restart_for_topology_repair(c)) { + bch_info(c, "Halting mark and sweep to start topology repair pass"); -+ ret = -BCH_ERR_need_topology_repair; ++ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + goto err; + } else { + set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); @@ -17952,9 +17961,9 @@ index 000000000..be537b237 + " expected %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + buf1.buf, buf2.buf) && -+ !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { ++ should_restart_for_topology_repair(c)) { + bch_info(c, "Halting mark and sweep to start topology repair pass"); -+ ret = -BCH_ERR_need_topology_repair; ++ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + goto err; + } else { + set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); @@ -18348,7 +18357,7 @@ index 000000000..be537b237 + return ret; +} + -+static int bch2_repair_topology(struct bch_fs *c) ++int bch2_check_topology(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree *b; @@ -18797,9 +18806,9 @@ index 000000000..be537b237 + b->c.level - 1, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && -+ !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { -+ ret = -BCH_ERR_need_topology_repair; ++ should_restart_for_topology_repair(c)) { + bch_info(c, "Halting mark and sweep to start topology repair pass"); ++ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + goto fsck_err; + } else { + /* Continue marking when opted to not @@ -19636,32 +19645,8 @@ index 000000000..be537b237 + + bch2_mark_superblocks(c); + -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || -+ (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) && -+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations && -+ c->opts.fix_errors != FSCK_FIX_no)) { -+ bch_info(c, "Starting topology repair pass"); -+ ret = bch2_repair_topology(c); -+ if (ret) -+ goto out; -+ bch_info(c, "Topology repair pass done"); -+ -+ set_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags); -+ } -+ + ret = bch2_gc_btrees(c, initial, metadata_only); + -+ if (ret == -BCH_ERR_need_topology_repair && -+ !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) && -+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) { -+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); -+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, true); -+ ret = 0; -+ } -+ -+ if (ret == -BCH_ERR_need_topology_repair) -+ ret = -BCH_ERR_fsck_errors_not_fixed; -+ + if (ret) + goto out; + @@ -19972,16 +19957,17 @@ index 000000000..be537b237 +} diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h new file mode 100644 -index 000000000..95d803b57 +index 000000000..402c69184 --- /dev/null +++ b/fs/bcachefs/btree_gc.h -@@ -0,0 +1,112 @@ +@@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_GC_H +#define _BCACHEFS_BTREE_GC_H + +#include "btree_types.h" + ++int bch2_check_topology(struct bch_fs *); +int bch2_gc(struct bch_fs *, bool, bool); +int bch2_gc_gens(struct bch_fs *); +void bch2_gc_thread_stop(struct bch_fs *); @@ -20090,10 +20076,10 @@ index 000000000..95d803b57 +#endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c new file mode 100644 -index 000000000..fa1229eb1 +index 000000000..c049876ee --- /dev/null +++ b/fs/bcachefs/btree_io.c -@@ -0,0 +1,2266 @@ +@@ -0,0 +1,2267 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -20708,7 +20694,7 @@ index 000000000..fa1229eb1 + case BTREE_ERR_BAD_NODE: + bch2_print_string_as_lines(KERN_ERR, out.buf); + bch2_topology_error(c); -+ ret = -BCH_ERR_need_topology_repair; ++ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO; + break; + case BTREE_ERR_INCOMPATIBLE: + bch2_print_string_as_lines(KERN_ERR, out.buf); @@ -21664,7 +21650,8 @@ index 000000000..fa1229eb1 + btree_pos_to_text(&buf, c, b); + bch_err(c, "%s", buf.buf); + -+ if (test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) ++ if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && ++ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) + bch2_fatal_error(c); + + set_btree_node_read_error(b); @@ -29127,10 +29114,10 @@ index 000000000..f3e58aa27 +#endif /* _BCACHEFS_BTREE_LOCKING_H */ diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h new file mode 100644 -index 000000000..4efc69492 +index 000000000..d95360160 --- /dev/null +++ b/fs/bcachefs/btree_types.h -@@ -0,0 +1,742 @@ +@@ -0,0 +1,743 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_TYPES_H +#define _BCACHEFS_BTREE_TYPES_H @@ -29523,6 +29510,7 @@ index 000000000..4efc69492 + u8 old_btree_u64s; + struct bkey_i *k; + struct btree_path *path; ++ u64 seq; + /* key being overwritten: */ + struct bkey old_k; + const struct bch_val *old_v; @@ -29875,10 +29863,10 @@ index 000000000..4efc69492 +#endif /* _BCACHEFS_BTREE_TYPES_H */ diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h new file mode 100644 -index 000000000..f794c9d10 +index 000000000..d6aec9341 --- /dev/null +++ b/fs/bcachefs/btree_update.h -@@ -0,0 +1,357 @@ +@@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_UPDATE_H +#define _BCACHEFS_BTREE_UPDATE_H @@ -29953,14 +29941,7 @@ index 000000000..f794c9d10 +int bch2_btree_delete_range(struct bch_fs *, enum btree_id, + struct bpos, struct bpos, unsigned, u64 *); + -+int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, -+ struct btree *, unsigned); -+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); -+int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, -+ struct btree *, struct bkey_i *, -+ unsigned, bool); -+int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, -+ struct bkey_i *, unsigned, bool); ++int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); + +int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, + struct bpos, struct bpos); @@ -29992,6 +29973,8 @@ index 000000000..f794c9d10 + +int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, + struct bkey_i *, enum btree_update_flags); ++int __must_check bch2_trans_update_seq(struct btree_trans *, u64, struct btree_iter *, ++ struct bkey_i *, enum btree_update_flags); +int __must_check bch2_trans_update_buffered(struct btree_trans *, + enum btree_id, struct bkey_i *); + @@ -32732,10 +32715,10 @@ index 000000000..3659b2c08 +} diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h new file mode 100644 -index 000000000..221b7ad5d +index 000000000..5e0a467fe --- /dev/null +++ b/fs/bcachefs/btree_update_interior.h -@@ -0,0 +1,328 @@ +@@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H +#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H @@ -32892,6 +32875,15 @@ index 000000000..221b7ad5d + btree_next_sib); +} + ++int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, ++ struct btree *, unsigned); ++void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); ++int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, ++ struct btree *, struct bkey_i *, ++ unsigned, bool); ++int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, ++ struct bkey_i *, unsigned, bool); ++ +void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); +void bch2_btree_root_alloc(struct bch_fs *, enum btree_id); + @@ -33066,10 +33058,10 @@ index 000000000..221b7ad5d +#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c new file mode 100644 -index 000000000..3638cef21 +index 000000000..368972a00 --- /dev/null +++ b/fs/bcachefs/btree_update_leaf.c -@@ -0,0 +1,2065 @@ +@@ -0,0 +1,2097 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -33138,7 +33130,8 @@ index 000000000..3638cef21 + +static int __must_check +bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, -+ struct bkey_i *, enum btree_update_flags); ++ struct bkey_i *, enum btree_update_flags, ++ unsigned long ip); + +static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, + const struct btree_insert_entry *r) @@ -33362,12 +33355,6 @@ index 000000000..3638cef21 + bch2_trans_node_reinit_iter(trans, b); +} + -+static void btree_insert_key_leaf(struct btree_trans *trans, -+ struct btree_insert_entry *insert) -+{ -+ bch2_btree_insert_key_leaf(trans, insert->path, insert->k, trans->journal_res.seq); -+} -+ +/* Cached btree updates: */ + +/* Normal update interface: */ @@ -33824,9 +33811,14 @@ index 000000000..3638cef21 + trans_for_each_update(trans, i) { + i->k->k.needs_whiteout = false; + -+ if (!i->cached) -+ btree_insert_key_leaf(trans, i); -+ else if (!i->key_cache_already_flushed) ++ if (!i->cached) { ++ u64 seq = trans->journal_res.seq; ++ ++ if (i->flags & BTREE_UPDATE_PREJOURNAL) ++ seq = i->seq; ++ ++ bch2_btree_insert_key_leaf(trans, i->path, i->k, seq); ++ } else if (!i->key_cache_already_flushed) + bch2_btree_insert_key_cached(trans, flags, i); + else { + bch2_btree_key_cache_drop(trans, i->path); @@ -33924,12 +33916,11 @@ index 000000000..3638cef21 +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; -+ int ret, u64s_delta = 0; ++ int ret = 0, u64s_delta = 0; + +#ifdef CONFIG_BCACHEFS_DEBUG -+ struct printbuf buf = PRINTBUF; -+ + trans_for_each_update(trans, i) { ++ struct printbuf buf = PRINTBUF; + enum bkey_invalid_flags invalid_flags = 0; + + if (!(flags & BTREE_INSERT_JOURNAL_REPLAY)) @@ -33937,10 +33928,13 @@ index 000000000..3638cef21 + + if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags, &buf))) -+ return bch2_trans_commit_bkey_invalid(trans, flags, i, &buf); ++ ret = bch2_trans_commit_bkey_invalid(trans, flags, i, &buf); + btree_insert_entry_checks(trans, i); ++ printbuf_exit(&buf); ++ ++ if (ret) ++ return ret; + } -+ printbuf_exit(&buf); +#endif + + trans_for_each_update(trans, i) { @@ -34399,7 +34393,7 @@ index 000000000..3638cef21 + struct bpos new_pos) +{ + struct bch_fs *c = trans->c; -+ struct btree_iter old_iter, new_iter; ++ struct btree_iter old_iter, new_iter = { NULL }; + struct bkey_s_c old_k, new_k; + snapshot_id_list s; + struct bkey_i *update; @@ -34449,6 +34443,7 @@ index 000000000..3638cef21 + if (ret) + break; + } ++ bch2_trans_iter_exit(trans, &new_iter); + bch2_trans_iter_exit(trans, &old_iter); + darray_exit(&s); + @@ -34567,7 +34562,7 @@ index 000000000..3638cef21 + + ret = bch2_trans_update_by_path(trans, iter.path, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| -+ flags); ++ flags, _RET_IP_); + if (ret) + goto err; + goto out; @@ -34605,11 +34600,6 @@ index 000000000..3638cef21 + return ret; +} + -+static int __must_check -+bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, -+ struct bkey_i *k, enum btree_update_flags flags, -+ unsigned long ip); -+ +static noinline int flush_new_cached_update(struct btree_trans *trans, + struct btree_path *path, + struct btree_insert_entry *i, @@ -34640,25 +34630,34 @@ index 000000000..3638cef21 + i->flags |= BTREE_TRIGGER_NORUN; + + btree_path_set_should_be_locked(btree_path); -+ ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip); ++ ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); +out: + bch2_path_put(trans, btree_path, true); + return ret; +} + +static int __must_check -+bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, -+ struct bkey_i *k, enum btree_update_flags flags, -+ unsigned long ip) ++bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, ++ struct bkey_i *k, enum btree_update_flags flags, ++ unsigned long ip) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i, n; ++ u64 seq = 0; + int cmp; + + EBUG_ON(!path->should_be_locked); + EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + EBUG_ON(!bpos_eq(k->k.p, path->pos)); + ++ /* ++ * The transaction journal res hasn't been allocated at this point. ++ * That occurs at commit time. Reuse the seq field to pass in the seq ++ * of a prejournaled key. ++ */ ++ if (flags & BTREE_UPDATE_PREJOURNAL) ++ seq = trans->journal_res.seq; ++ + n = (struct btree_insert_entry) { + .flags = flags, + .bkey_type = __btree_node_type(path->level, path->btree_id), @@ -34667,6 +34666,7 @@ index 000000000..3638cef21 + .cached = path->cached, + .path = path, + .k = k, ++ .seq = seq, + .ip_allocated = ip, + }; + @@ -34694,6 +34694,7 @@ index 000000000..3638cef21 + i->cached = n.cached; + i->k = n.k; + i->path = n.path; ++ i->seq = n.seq; + i->ip_allocated = n.ip_allocated; + } else { + array_insert_item(trans->updates, trans->nr_updates, @@ -34728,13 +34729,6 @@ index 000000000..3638cef21 + return 0; +} + -+static inline int __must_check -+bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, -+ struct bkey_i *k, enum btree_update_flags flags) -+{ -+ return bch2_trans_update_by_path_trace(trans, path, k, flags, _RET_IP_); -+} -+ +int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_update_flags flags) +{ @@ -34795,7 +34789,19 @@ index 000000000..3638cef21 + path = iter->key_cache_path; + } + -+ return bch2_trans_update_by_path(trans, path, k, flags); ++ return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); ++} ++ ++/* ++ * Add a transaction update for a key that has already been journaled. ++ */ ++int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, ++ struct btree_iter *iter, struct bkey_i *k, ++ enum btree_update_flags flags) ++{ ++ trans->journal_res.seq = seq; ++ return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| ++ BTREE_UPDATE_PREJOURNAL); +} + +int __must_check bch2_trans_update_buffered(struct btree_trans *trans, @@ -35057,6 +35063,24 @@ index 000000000..3638cef21 + return ret; +} + ++int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, ++ struct bpos pos, bool set) ++{ ++ struct bkey_i *k; ++ int ret = 0; ++ ++ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); ++ ret = PTR_ERR_OR_ZERO(k); ++ if (unlikely(ret)) ++ return ret; ++ ++ bkey_init(&k->k); ++ k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; ++ k->k.p = pos; ++ ++ return bch2_trans_update_buffered(trans, btree, k); ++} ++ +static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) +{ + struct printbuf buf = PRINTBUF; @@ -35137,10 +35161,10 @@ index 000000000..3638cef21 +} diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c new file mode 100644 -index 000000000..6c30a72e6 +index 000000000..5f96db539 --- /dev/null +++ b/fs/bcachefs/btree_write_buffer.c -@@ -0,0 +1,346 @@ +@@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -35218,7 +35242,7 @@ index 000000000..6c30a72e6 + } + return 0; +trans_commit: -+ return bch2_trans_update(trans, iter, &wb->k, 0) ?: ++ return bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + commit_flags| + BTREE_INSERT_NOCHECK_RW| @@ -35246,6 +35270,32 @@ index 000000000..6c30a72e6 + return old; +} + ++/* ++ * Update a btree with a write buffered key using the journal seq of the ++ * original write buffer insert. ++ * ++ * It is not safe to rejournal the key once it has been inserted into the write ++ * buffer because that may break recovery ordering. For example, the key may ++ * have already been modified in the active write buffer in a seq that comes ++ * before the current transaction. If we were to journal this key again and ++ * crash, recovery would process updates in the wrong order. ++ */ ++static int ++btree_write_buffered_insert(struct btree_trans *trans, ++ struct btree_write_buffered_key *wb) ++{ ++ struct btree_iter iter; ++ int ret; ++ ++ bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k), ++ BTREE_ITER_CACHED|BTREE_ITER_INTENT); ++ ++ ret = bch2_btree_iter_traverse(&iter) ?: ++ bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k, 0); ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ +int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags, + bool locked) +{ @@ -35381,7 +35431,7 @@ index 000000000..6c30a72e6 + commit_flags| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_RECLAIM, -+ __bch2_btree_insert(trans, i->btree, &i->k, 0)); ++ btree_write_buffered_insert(trans, i)); + if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) + break; + } @@ -35559,10 +35609,10 @@ index 000000000..99993ba77 +#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c new file mode 100644 -index 000000000..797ef5ece +index 000000000..7bb7f0cae --- /dev/null +++ b/fs/bcachefs/buckets.c -@@ -0,0 +1,2171 @@ +@@ -0,0 +1,2106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for manipulating bucket marks for garbage collection. @@ -35667,18 +35717,6 @@ index 000000000..797ef5ece + } while (read_seqcount_retry(&c->usage_lock, seq)); +} + -+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, -+ unsigned journal_seq, -+ bool gc) -+{ -+ percpu_rwsem_assert_held(&c->mark_lock); -+ BUG_ON(!gc && !journal_seq); -+ -+ return this_cpu_ptr(gc -+ ? c->usage_gc -+ : c->usage[journal_seq & JOURNAL_BUF_MASK]); -+} -+ +u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) +{ + ssize_t offset = v - (u64 *) c->usage_base; @@ -36025,7 +36063,7 @@ index 000000000..797ef5ece + return 0; +} + -+static int replicas_deltas_realloc(struct btree_trans *trans, unsigned more) ++int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more) +{ + return allocate_dropping_locks_errcode(trans, + __replicas_deltas_realloc(trans, more, _gfp)); @@ -36044,7 +36082,7 @@ index 000000000..797ef5ece + return 0; + + b = replicas_entry_bytes(r) + 8; -+ ret = replicas_deltas_realloc(trans, b); ++ ret = bch2_replicas_deltas_realloc(trans, b); + if (ret) + return ret; + @@ -36702,38 +36740,6 @@ index 000000000..797ef5ece + return 0; +} + -+int bch2_mark_inode(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_fs_usage *fs_usage; -+ u64 journal_seq = trans->journal_res.seq; -+ -+ if (flags & BTREE_TRIGGER_INSERT) { -+ struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; -+ -+ BUG_ON(!journal_seq); -+ BUG_ON(new.k->type != KEY_TYPE_inode_v3); -+ -+ v->bi_journal_seq = cpu_to_le64(journal_seq); -+ } -+ -+ if (flags & BTREE_TRIGGER_GC) { -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ -+ fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); -+ fs_usage->nr_inodes += bkey_is_inode(new.k); -+ fs_usage->nr_inodes -= bkey_is_inode(old.k); -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ } -+ return 0; -+} -+ +int bch2_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, @@ -36837,7 +36843,7 @@ index 000000000..797ef5ece + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + -+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) { ++ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { + idx -= le32_to_cpu(p.v->front_pad); + end += le32_to_cpu(p.v->back_pad); + } @@ -37280,27 +37286,6 @@ index 000000000..797ef5ece + return ret; +} + -+int bch2_trans_mark_inode(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, -+ struct bkey_i *new, -+ unsigned flags) -+{ -+ int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); -+ -+ if (nr) { -+ int ret = replicas_deltas_realloc(trans, 0); -+ struct replicas_delta_list *d = trans->fs_usage_deltas; -+ -+ if (ret) -+ return ret; -+ -+ d->nr_inodes += nr; -+ } -+ -+ return 0; -+} -+ +int bch2_trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, @@ -37319,7 +37304,7 @@ index 000000000..797ef5ece + sectors = -sectors; + sectors *= replicas; + -+ ret = replicas_deltas_realloc(trans, 0); ++ ret = bch2_replicas_deltas_realloc(trans, 0); + if (ret) + return ret; + @@ -37736,10 +37721,10 @@ index 000000000..797ef5ece +} diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h new file mode 100644 -index 000000000..f9d7dda07 +index 000000000..a418f6648 --- /dev/null +++ b/fs/bcachefs/buckets.h -@@ -0,0 +1,357 @@ +@@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Code for manipulating bucket marks for garbage collection. @@ -37991,6 +37976,20 @@ index 000000000..f9d7dda07 + +/* key/bucket marking: */ + ++static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, ++ unsigned journal_seq, ++ bool gc) ++{ ++ percpu_rwsem_assert_held(&c->mark_lock); ++ BUG_ON(!gc && !journal_seq); ++ ++ return this_cpu_ptr(gc ++ ? c->usage_gc ++ : c->usage[journal_seq & JOURNAL_BUF_MASK]); ++} ++ ++int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); ++ +void bch2_fs_usage_initialize(struct bch_fs *); + +int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, @@ -38003,8 +38002,6 @@ index 000000000..f9d7dda07 + struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s_c, unsigned); -+int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, @@ -38012,7 +38009,6 @@ index 000000000..f9d7dda07 + +int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); + @@ -40447,10 +40443,10 @@ index 000000000..5fae0012d +#endif /* _BCACHEFS_CLOCK_TYPES_H */ diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c new file mode 100644 -index 000000000..560214c15 +index 000000000..c9ca7cce5 --- /dev/null +++ b/fs/bcachefs/compress.c -@@ -0,0 +1,712 @@ +@@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "checksum.h" @@ -40693,7 +40689,8 @@ index 000000000..560214c15 + data = __bounce_alloc(c, dst_len, WRITE); + + if (__bio_uncompress(c, bio, data.b, *crc)) { -+ bch_err(c, "error rewriting existing data: decompression error"); ++ if (!c->opts.no_data_io) ++ bch_err(c, "error rewriting existing data: decompression error"); + bio_unmap_or_unbounce(c, data); + return -EIO; + } @@ -46744,7 +46741,7 @@ index 000000000..dc906fc91 +} diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h new file mode 100644 -index 000000000..d5277ec73 +index 000000000..735eb2416 --- /dev/null +++ b/fs/bcachefs/errcode.h @@ -0,0 +1,246 @@ @@ -46907,8 +46904,7 @@ index 000000000..d5277ec73 + x(BCH_ERR_fsck, fsck_errors_not_fixed) \ + x(BCH_ERR_fsck, fsck_repair_unimplemented) \ + x(BCH_ERR_fsck, fsck_repair_impossible) \ -+ x(0, need_snapshot_cleanup) \ -+ x(0, need_topology_repair) \ ++ x(0, restart_recovery) \ + x(0, unwritten_extent_update) \ + x(EINVAL, device_state_not_allowed) \ + x(EINVAL, member_info_missing) \ @@ -46921,6 +46917,7 @@ index 000000000..d5277ec73 + x(EINVAL, device_already_online) \ + x(EINVAL, insufficient_devices_to_start) \ + x(EINVAL, invalid) \ ++ x(EINVAL, internal_fsck_err) \ + x(EROFS, erofs_trans_commit) \ + x(EROFS, erofs_no_writes) \ + x(EROFS, erofs_journal_err) \ @@ -46996,10 +46993,10 @@ index 000000000..d5277ec73 +#endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c new file mode 100644 -index 000000000..685464b8c +index 000000000..39009cf0c --- /dev/null +++ b/fs/bcachefs/error.c -@@ -0,0 +1,297 @@ +@@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "error.h" @@ -47029,9 +47026,6 @@ index 000000000..685464b8c + +void bch2_topology_error(struct bch_fs *c) +{ -+ if (!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) -+ return; -+ + set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); + if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) + bch2_inconsistent_error(c); @@ -57753,10 +57747,10 @@ index 000000000..6170d214d +#endif /* _BCACHEFS_FS_H */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c new file mode 100644 -index 000000000..7edd4632d +index 000000000..d3eb3dc1c --- /dev/null +++ b/fs/bcachefs/fsck.c -@@ -0,0 +1,2452 @@ +@@ -0,0 +1,2471 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -58230,28 +58224,6 @@ index 000000000..7edd4632d + memset(s, 0, sizeof(*s)); +} + -+static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) -+{ -+ struct snapshots_seen_entry *i, n = { id, id }; -+ int ret; -+ -+ darray_for_each(s->ids, i) { -+ if (n.equiv < i->equiv) -+ break; -+ -+ if (i->equiv == n.equiv) { -+ bch_err(c, "%s(): adding duplicate snapshot", __func__); -+ return -EINVAL; -+ } -+ } -+ -+ ret = darray_insert_item(&s->ids, i - s->ids.data, n); -+ if (ret) -+ bch_err(c, "error reallocating snapshots_seen table (size %zu)", -+ s->ids.size); -+ return ret; -+} -+ +static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, + enum btree_id btree_id, struct bpos pos) +{ @@ -58264,27 +58236,31 @@ index 000000000..7edd4632d + if (!bkey_eq(s->pos, pos)) + s->ids.nr = 0; + -+ pos.snapshot = n.equiv; + s->pos = pos; ++ s->pos.snapshot = n.equiv; + -+ darray_for_each(s->ids, i) -+ if (i->equiv == n.equiv) { -+ if (fsck_err_on(i->id != n.id, c, -+ "snapshot deletion did not run correctly:\n" -+ " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", -+ bch2_btree_ids[btree_id], -+ pos.inode, pos.offset, -+ i->id, n.id, n.equiv)) -+ return -BCH_ERR_need_snapshot_cleanup; -+ ++ darray_for_each(s->ids, i) { ++ if (i->id == n.id) + return 0; ++ ++ /* ++ * We currently don't rigorously track for snapshot cleanup ++ * needing to be run, so it shouldn't be a fsck error yet: ++ */ ++ if (i->equiv == n.equiv) { ++ bch_err(c, "snapshot deletion did not finish:\n" ++ " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", ++ bch2_btree_ids[btree_id], ++ pos.inode, pos.offset, ++ i->id, n.id, n.equiv); ++ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); + } ++ } + + ret = darray_push(&s->ids, n); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); -+fsck_err: + return ret; +} + @@ -58298,15 +58274,14 @@ index 000000000..7edd4632d + u32 id, u32 ancestor) +{ + ssize_t i; -+ u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0; + -+ BUG_ON(id > ancestor); -+ BUG_ON(!bch2_snapshot_is_equiv(c, id)); -+ BUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); ++ EBUG_ON(id > ancestor); ++ EBUG_ON(!bch2_snapshot_is_equiv(c, id)); ++ EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); + + /* @ancestor should be the snapshot most recently added to @seen */ -+ BUG_ON(ancestor != seen->pos.snapshot); -+ BUG_ON(ancestor != top); ++ EBUG_ON(ancestor != seen->pos.snapshot); ++ EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv); + + if (id == ancestor) + return true; @@ -58314,11 +58289,20 @@ index 000000000..7edd4632d + if (!bch2_snapshot_is_ancestor(c, id, ancestor)) + return false; + ++ /* ++ * We know that @id is a descendant of @ancestor, we're checking if ++ * we've seen a key that overwrote @ancestor - i.e. also a descendent of ++ * @ascestor and with @id as a descendent. ++ * ++ * But we already know that we're scanning IDs between @id and @ancestor ++ * numerically, since snapshot ID lists are kept sorted, so if we find ++ * an id that's an ancestor of @id we're done: ++ */ ++ + for (i = seen->ids.nr - 2; + i >= 0 && seen->ids.data[i].equiv >= id; + --i) -+ if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) && -+ bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor)) ++ if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv)) + return false; + + return true; @@ -58365,12 +58349,14 @@ index 000000000..7edd4632d +struct inode_walker_entry { + struct bch_inode_unpacked inode; + u32 snapshot; ++ bool seen_this_pos; + u64 count; +}; + +struct inode_walker { + bool first_this_inode; -+ u64 cur_inum; ++ bool recalculate_sums; ++ struct bpos last_pos; + + DARRAY(struct inode_walker_entry) inodes; +}; @@ -58407,9 +58393,7 @@ index 000000000..7edd4632d + u32 restart_count = trans->restart_count; + int ret; + -+ if (w->cur_inum == inum) -+ return 0; -+ ++ w->recalculate_sums = false; + w->inodes.nr = 0; + + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), @@ -58425,8 +58409,7 @@ index 000000000..7edd4632d + if (ret) + return ret; + -+ w->cur_inum = inum; -+ w->first_this_inode = true; ++ w->first_this_inode = true; + + if (trans_was_restarted(trans, restart_count)) + return -BCH_ERR_transaction_restart_nested; @@ -58435,8 +58418,8 @@ index 000000000..7edd4632d +} + +static struct inode_walker_entry * -+lookup_inode_for_snapshot(struct bch_fs *c, -+ struct inode_walker *w, u32 snapshot) ++lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, ++ u32 snapshot, bool is_whiteout) +{ + struct inode_walker_entry *i; + @@ -58450,35 +58433,50 @@ index 000000000..7edd4632d +found: + BUG_ON(snapshot > i->snapshot); + -+ if (snapshot != i->snapshot) { ++ if (snapshot != i->snapshot && !is_whiteout) { + struct inode_walker_entry new = *i; ++ size_t pos; + int ret; + + new.snapshot = snapshot; + new.count = 0; + + bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u", -+ w->cur_inum, snapshot, i->snapshot); ++ w->last_pos.inode, snapshot, i->snapshot); + + while (i > w->inodes.data && i[-1].snapshot > snapshot) + --i; + -+ ret = darray_insert_item(&w->inodes, i - w->inodes.data, new); ++ pos = i - w->inodes.data; ++ ret = darray_insert_item(&w->inodes, pos, new); + if (ret) + return ERR_PTR(ret); ++ ++ i = w->inodes.data + pos; + } + + return i; +} + +static struct inode_walker_entry *walk_inode(struct btree_trans *trans, -+ struct inode_walker *w, struct bpos pos) ++ struct inode_walker *w, struct bpos pos, ++ bool is_whiteout) +{ -+ int ret = get_inodes_all_snapshots(trans, w, pos.inode); -+ if (ret) -+ return ERR_PTR(ret); ++ if (w->last_pos.inode != pos.inode) { ++ int ret = get_inodes_all_snapshots(trans, w, pos.inode); ++ if (ret) ++ return ERR_PTR(ret); ++ } else if (bkey_cmp(w->last_pos, pos)) { ++ struct inode_walker_entry *i; + -+ return lookup_inode_for_snapshot(trans->c, w, pos.snapshot); ++ darray_for_each(w->inodes, i) ++ i->seen_this_pos = false; ++ ++ } ++ ++ w->last_pos = pos; ++ ++ return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout); +} + +static int __get_visible_inodes(struct btree_trans *trans, @@ -58793,47 +58791,6 @@ index 000000000..7edd4632d + return ret; +} + -+/* -+ * Checking for overlapping extents needs to be reimplemented -+ */ -+#if 0 -+static int fix_overlapping_extent(struct btree_trans *trans, -+ struct bkey_s_c k, struct bpos cut_at) -+{ -+ struct btree_iter iter; -+ struct bkey_i *u; -+ int ret; -+ -+ u = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(u, k); -+ bch2_cut_front(cut_at, u); -+ -+ -+ /* -+ * We don't want to go through the extent_handle_overwrites path: -+ * -+ * XXX: this is going to screw up disk accounting, extent triggers -+ * assume things about extent overwrites - we should be running the -+ * triggers manually here -+ */ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p, -+ BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); -+ -+ BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS); -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+#endif -+ +static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos pos) @@ -58887,19 +58844,20 @@ index 000000000..7edd4632d + if (i->inode.bi_sectors == i->count) + continue; + -+ count2 = bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot); ++ count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); ++ ++ if (w->recalculate_sums) ++ i->count = count2; + + if (i->count != count2) { -+ bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu", -+ i->count, count2); -+ i->count = count2; -+ if (i->inode.bi_sectors == i->count) -+ continue; ++ bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", ++ w->last_pos.inode, i->snapshot, i->count, count2); ++ return -BCH_ERR_internal_fsck_err; + } + + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, + "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", -+ w->cur_inum, i->snapshot, ++ w->last_pos.inode, i->snapshot, + i->inode.bi_sectors, i->count)) { + i->inode.bi_sectors = i->count; + ret = write_inode(trans, &i->inode, i->snapshot); @@ -58921,85 +58879,40 @@ index 000000000..7edd4632d + struct snapshots_seen seen; +}; + -+typedef DARRAY(struct extent_end) extent_ends; ++struct extent_ends { ++ struct bpos last_pos; ++ DARRAY(struct extent_end) e; ++}; + -+static int get_print_extent(struct btree_trans *trans, struct bpos pos, struct printbuf *out) ++static void extent_ends_reset(struct extent_ends *extent_ends) +{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_extents, pos, -+ BTREE_ITER_SLOTS| -+ BTREE_ITER_ALL_SNAPSHOTS| -+ BTREE_ITER_NOT_EXTENTS); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ bch2_bkey_val_to_text(out, trans->c, k); -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; -+} -+ -+static int check_overlapping_extents(struct btree_trans *trans, -+ struct snapshots_seen *seen, -+ extent_ends *extent_ends, -+ struct bkey_s_c k, -+ struct btree_iter *iter) -+{ -+ struct bch_fs *c = trans->c; + struct extent_end *i; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; + -+ darray_for_each(*extent_ends, i) { -+ /* duplicate, due to transaction restart: */ -+ if (i->offset == k.k->p.offset && -+ i->snapshot == k.k->p.snapshot) -+ continue; ++ darray_for_each(extent_ends->e, i) ++ snapshots_seen_exit(&i->seen); + -+ if (!ref_visible2(c, -+ k.k->p.snapshot, seen, -+ i->snapshot, &i->seen)) -+ continue; -+ -+ if (i->offset <= bkey_start_offset(k.k)) -+ continue; -+ -+ printbuf_reset(&buf); -+ prt_str(&buf, "overlapping extents:\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ prt_str(&buf, "\n "); -+ -+ ret = get_print_extent(trans, SPOS(k.k->p.inode, i->offset, i->snapshot), &buf); -+ if (ret) -+ break; -+ -+ if (fsck_err(c, "%s", buf.buf)) { -+ struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); -+ if ((ret = PTR_ERR_OR_ZERO(update))) -+ goto err; -+ bkey_reassemble(update, k); -+ ret = bch2_trans_update_extent(trans, iter, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ if (ret) -+ goto err; -+ } -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; ++ extent_ends->e.nr = 0; +} + -+static int extent_ends_at(extent_ends *extent_ends, ++static void extent_ends_exit(struct extent_ends *extent_ends) ++{ ++ extent_ends_reset(extent_ends); ++ darray_exit(&extent_ends->e); ++} ++ ++static void extent_ends_init(struct extent_ends *extent_ends) ++{ ++ memset(extent_ends, 0, sizeof(*extent_ends)); ++} ++ ++static int extent_ends_at(struct bch_fs *c, ++ struct extent_ends *extent_ends, + struct snapshots_seen *seen, + struct bkey_s_c k) +{ + struct extent_end *i, n = (struct extent_end) { -+ .snapshot = k.k->p.snapshot, + .offset = k.k->p.offset, ++ .snapshot = k.k->p.snapshot, + .seen = *seen, + }; + @@ -59009,7 +58922,7 @@ index 000000000..7edd4632d + if (!n.seen.ids.data) + return -BCH_ERR_ENOMEM_fsck_extent_ends_at; + -+ darray_for_each(*extent_ends, i) { ++ darray_for_each(extent_ends->e, i) { + if (i->snapshot == k.k->p.snapshot) { + snapshots_seen_exit(&i->seen); + *i = n; @@ -59020,136 +58933,232 @@ index 000000000..7edd4632d + break; + } + -+ return darray_insert_item(extent_ends, i - extent_ends->data, n); ++ return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); +} + -+static void extent_ends_reset(extent_ends *extent_ends) ++static int overlapping_extents_found(struct btree_trans *trans, ++ enum btree_id btree, ++ struct bpos pos1, struct bkey pos2, ++ bool *fixed) +{ ++ struct bch_fs *c = trans->c; ++ struct printbuf buf = PRINTBUF; ++ struct btree_iter iter; ++ struct bkey_s_c k; ++ u32 snapshot = min(pos1.snapshot, pos2.p.snapshot); ++ int ret; ++ ++ BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); ++ ++ bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0); ++ k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX)); ++ ret = bkey_err(k); ++ if (ret) ++ goto err; ++ ++ prt_str(&buf, "\n "); ++ bch2_bkey_val_to_text(&buf, c, k); ++ ++ if (!bpos_eq(pos1, k.k->p)) { ++ bch_err(c, "%s: error finding first overlapping extent when repairing%s", ++ __func__, buf.buf); ++ ret = -BCH_ERR_internal_fsck_err; ++ goto err; ++ } ++ ++ while (1) { ++ bch2_btree_iter_advance(&iter); ++ ++ k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX)); ++ ret = bkey_err(k); ++ if (ret) ++ goto err; ++ ++ if (bkey_ge(k.k->p, pos2.p)) ++ break; ++ ++ } ++ ++ prt_str(&buf, "\n "); ++ bch2_bkey_val_to_text(&buf, c, k); ++ ++ if (bkey_gt(k.k->p, pos2.p) || ++ pos2.size != k.k->size) { ++ bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", ++ __func__, buf.buf); ++ ret = -BCH_ERR_internal_fsck_err; ++ goto err; ++ } ++ ++ if (fsck_err(c, "overlapping extents%s", buf.buf)) { ++ struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p; ++ struct btree_iter update_iter; ++ ++ struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter, ++ btree, update_pos, ++ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); ++ bch2_trans_iter_exit(trans, &update_iter); ++ if ((ret = PTR_ERR_OR_ZERO(update))) ++ goto err; ++ ++ *fixed = true; ++ } ++fsck_err: ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ printbuf_exit(&buf); ++ return ret; ++} ++ ++static int check_overlapping_extents(struct btree_trans *trans, ++ struct snapshots_seen *seen, ++ struct extent_ends *extent_ends, ++ struct bkey_s_c k, ++ u32 equiv, ++ struct btree_iter *iter) ++{ ++ struct bch_fs *c = trans->c; + struct extent_end *i; ++ bool fixed = false; ++ int ret = 0; + -+ darray_for_each(*extent_ends, i) -+ snapshots_seen_exit(&i->seen); ++ /* transaction restart, running again */ ++ if (bpos_eq(extent_ends->last_pos, k.k->p)) ++ return 0; + -+ extent_ends->nr = 0; ++ if (extent_ends->last_pos.inode != k.k->p.inode) ++ extent_ends_reset(extent_ends); ++ ++ darray_for_each(extent_ends->e, i) { ++ if (i->offset <= bkey_start_offset(k.k)) ++ continue; ++ ++ if (!ref_visible2(c, ++ k.k->p.snapshot, seen, ++ i->snapshot, &i->seen)) ++ continue; ++ ++ ret = overlapping_extents_found(trans, iter->btree_id, ++ SPOS(iter->pos.inode, ++ i->offset, ++ i->snapshot), ++ *k.k, &fixed); ++ if (ret) ++ goto err; ++ } ++ ++ ret = extent_ends_at(c, extent_ends, seen, k); ++ if (ret) ++ goto err; ++ ++ extent_ends->last_pos = k.k->p; ++err: ++ return ret ?: fixed; +} + +static int check_extent(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, + struct inode_walker *inode, + struct snapshots_seen *s, -+ extent_ends *extent_ends) ++ struct extent_ends *extent_ends) +{ + struct bch_fs *c = trans->c; + struct inode_walker_entry *i; + struct printbuf buf = PRINTBUF; -+ struct bpos equiv; ++ struct bpos equiv = k.k->p; + int ret = 0; + ++ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); ++ + ret = check_key_has_snapshot(trans, iter, k); + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } + -+ equiv = k.k->p; -+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); ++ if (inode->last_pos.inode != k.k->p.inode) { ++ ret = check_i_sectors(trans, inode); ++ if (ret) ++ goto err; ++ } ++ ++ i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout); ++ ret = PTR_ERR_OR_ZERO(i); ++ if (ret) ++ goto err; + + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); + if (ret) + goto err; + -+ if (k.k->type == KEY_TYPE_whiteout) -+ goto out; ++ if (k.k->type != KEY_TYPE_whiteout) { ++ if (fsck_err_on(!i, c, ++ "extent in missing inode:\n %s", ++ (printbuf_reset(&buf), ++ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ++ goto delete; + -+ if (inode->cur_inum != k.k->p.inode) { -+ ret = check_i_sectors(trans, inode); -+ if (ret) ++ if (fsck_err_on(i && ++ !S_ISREG(i->inode.bi_mode) && ++ !S_ISLNK(i->inode.bi_mode), c, ++ "extent in non regular inode mode %o:\n %s", ++ i->inode.bi_mode, ++ (printbuf_reset(&buf), ++ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ++ goto delete; ++ ++ ret = check_overlapping_extents(trans, s, extent_ends, k, ++ equiv.snapshot, iter); ++ if (ret < 0) + goto err; + -+ extent_ends_reset(extent_ends); -+ } -+ -+ BUG_ON(!iter->path->should_be_locked); -+ -+ ret = check_overlapping_extents(trans, s, extent_ends, k, iter); -+ if (ret) -+ goto err; -+ -+ ret = extent_ends_at(extent_ends, s, k); -+ if (ret) -+ goto err; -+ -+ i = walk_inode(trans, inode, equiv); -+ ret = PTR_ERR_OR_ZERO(i); -+ if (ret) -+ goto err; -+ -+ if (fsck_err_on(!i, c, -+ "extent in missing inode:\n %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ goto out; -+ } -+ -+ if (!i) -+ goto out; -+ -+ if (fsck_err_on(!S_ISREG(i->inode.bi_mode) && -+ !S_ISLNK(i->inode.bi_mode), c, -+ "extent in non regular inode mode %o:\n %s", -+ i->inode.bi_mode, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ goto out; ++ if (ret) ++ inode->recalculate_sums = true; ++ ret = 0; + } + + /* -+ * Check inodes in reverse order, from oldest snapshots to newest, so -+ * that we emit the fewest number of whiteouts necessary: ++ * Check inodes in reverse order, from oldest snapshots to newest, ++ * starting from the inode that matches this extent's snapshot. If we ++ * didn't have one, iterate over all inodes: + */ -+ for (i = inode->inodes.data + inode->inodes.nr - 1; -+ i >= inode->inodes.data; ++ if (!i) ++ i = inode->inodes.data + inode->inodes.nr - 1; ++ ++ for (; ++ inode->inodes.data && i >= inode->inodes.data; + --i) { + if (i->snapshot > equiv.snapshot || + !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot)) + continue; + -+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && -+ k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && -+ !bkey_extent_is_reservation(k), c, -+ "extent type past end of inode %llu:%u, i_size %llu\n %s", -+ i->inode.bi_inum, i->snapshot, i->inode.bi_size, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ struct btree_iter iter2; ++ if (k.k->type != KEY_TYPE_whiteout) { ++ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && ++ k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && ++ !bkey_extent_is_reservation(k), c, ++ "extent type past end of inode %llu:%u, i_size %llu\n %s", ++ i->inode.bi_inum, i->snapshot, i->inode.bi_size, ++ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ++ struct btree_iter iter2; + -+ bch2_trans_copy_iter(&iter2, iter); -+ bch2_btree_iter_set_snapshot(&iter2, i->snapshot); -+ ret = bch2_btree_iter_traverse(&iter2) ?: -+ bch2_btree_delete_at(trans, &iter2, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch2_trans_iter_exit(trans, &iter2); -+ if (ret) -+ goto err; -+ -+ if (i->snapshot != equiv.snapshot) { -+ ret = snapshots_seen_add(c, s, i->snapshot); ++ bch2_trans_copy_iter(&iter2, iter); ++ bch2_btree_iter_set_snapshot(&iter2, i->snapshot); ++ ret = bch2_btree_iter_traverse(&iter2) ?: ++ bch2_btree_delete_at(trans, &iter2, ++ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); ++ bch2_trans_iter_exit(trans, &iter2); + if (ret) + goto err; ++ ++ iter->k.type = KEY_TYPE_whiteout; + } ++ ++ if (bkey_extent_is_allocation(k.k)) ++ i->count += k.k->size; + } ++ ++ i->seen_this_pos = true; + } -+ -+ if (bkey_extent_is_allocation(k.k)) -+ for_each_visible_inode(c, s, inode, equiv.snapshot, i) -+ i->count += k.k->size; -+#if 0 -+ bch2_bkey_buf_reassemble(&prev, c, k); -+#endif -+ +out: +err: +fsck_err: @@ -59158,6 +59167,9 @@ index 000000000..7edd4632d + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err_fn(c, ret); + return ret; ++delete: ++ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); ++ goto out; +} + +/* @@ -59171,11 +59183,12 @@ index 000000000..7edd4632d + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; -+ extent_ends extent_ends = { 0 }; ++ struct extent_ends extent_ends; + struct disk_reservation res = { 0 }; + int ret = 0; + + snapshots_seen_init(&s); ++ extent_ends_init(&extent_ends); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents, @@ -59185,11 +59198,11 @@ index 000000000..7edd4632d + BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ + bch2_disk_reservation_put(c, &res); + check_extent(&trans, &iter, k, &w, &s, &extent_ends); -+ })); ++ })) ?: ++ check_i_sectors(&trans, &w); + + bch2_disk_reservation_put(c, &res); -+ extent_ends_reset(&extent_ends); -+ darray_exit(&extent_ends); ++ extent_ends_exit(&extent_ends); + inode_walker_exit(&w); + bch2_trans_exit(&trans); + snapshots_seen_exit(&s); @@ -59211,7 +59224,7 @@ index 000000000..7edd4632d + if (i->inode.bi_nlink == i->count) + continue; + -+ count2 = bch2_count_subdirs(trans, w->cur_inum, i->snapshot); ++ count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); + if (count2 < 0) + return count2; + @@ -59225,7 +59238,7 @@ index 000000000..7edd4632d + + if (fsck_err_on(i->inode.bi_nlink != i->count, c, + "directory %llu:%u with wrong i_nlink: got %u, should be %llu", -+ w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) { ++ w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { + i->inode.bi_nlink = i->count; + ret = write_inode(trans, &i->inode, i->snapshot); + if (ret) @@ -59389,7 +59402,7 @@ index 000000000..7edd4632d + if (k.k->type == KEY_TYPE_whiteout) + goto out; + -+ if (dir->cur_inum != k.k->p.inode) { ++ if (dir->last_pos.inode != k.k->p.inode) { + ret = check_subdir_count(trans, dir); + if (ret) + goto err; @@ -59397,12 +59410,12 @@ index 000000000..7edd4632d + + BUG_ON(!iter->path->should_be_locked); + -+ i = walk_inode(trans, dir, equiv); ++ i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout); + ret = PTR_ERR_OR_ZERO(i); + if (ret < 0) + goto err; + -+ if (dir->first_this_inode) ++ if (dir->first_this_inode && dir->inodes.nr) + *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); + dir->first_this_inode = false; + @@ -59574,12 +59587,12 @@ index 000000000..7edd4632d + if (ret) + return ret; + -+ i = walk_inode(trans, inode, k.k->p); ++ i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout); + ret = PTR_ERR_OR_ZERO(i); + if (ret) + return ret; + -+ if (inode->first_this_inode) ++ if (inode->first_this_inode && inode->inodes.nr) + *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); + inode->first_this_inode = false; + @@ -60231,10 +60244,10 @@ index 000000000..90c87b508 +#endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c new file mode 100644 -index 000000000..fa435d865 +index 000000000..8834809d4 --- /dev/null +++ b/fs/bcachefs/inode.c -@@ -0,0 +1,872 @@ +@@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -60756,6 +60769,59 @@ index 000000000..fa435d865 + __bch2_inode_unpacked_to_text(out, &inode); +} + ++int bch2_trans_mark_inode(struct btree_trans *trans, ++ enum btree_id btree_id, unsigned level, ++ struct bkey_s_c old, ++ struct bkey_i *new, ++ unsigned flags) ++{ ++ int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); ++ ++ if (nr) { ++ int ret = bch2_replicas_deltas_realloc(trans, 0); ++ struct replicas_delta_list *d = trans->fs_usage_deltas; ++ ++ if (ret) ++ return ret; ++ ++ d->nr_inodes += nr; ++ } ++ ++ return 0; ++} ++ ++int bch2_mark_inode(struct btree_trans *trans, ++ enum btree_id btree_id, unsigned level, ++ struct bkey_s_c old, struct bkey_s_c new, ++ unsigned flags) ++{ ++ struct bch_fs *c = trans->c; ++ struct bch_fs_usage *fs_usage; ++ u64 journal_seq = trans->journal_res.seq; ++ ++ if (flags & BTREE_TRIGGER_INSERT) { ++ struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; ++ ++ BUG_ON(!journal_seq); ++ BUG_ON(new.k->type != KEY_TYPE_inode_v3); ++ ++ v->bi_journal_seq = cpu_to_le64(journal_seq); ++ } ++ ++ if (flags & BTREE_TRIGGER_GC) { ++ percpu_down_read(&c->mark_lock); ++ preempt_disable(); ++ ++ fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); ++ fs_usage->nr_inodes += bkey_is_inode(new.k); ++ fs_usage->nr_inodes -= bkey_is_inode(old.k); ++ ++ preempt_enable(); ++ percpu_up_read(&c->mark_lock); ++ } ++ return 0; ++} ++ +int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k, + enum bkey_invalid_flags flags, + struct printbuf *err) @@ -61109,10 +61175,10 @@ index 000000000..fa435d865 +} diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h new file mode 100644 -index 000000000..8f9be5e58 +index 000000000..7809d1b6d --- /dev/null +++ b/fs/bcachefs/inode.h -@@ -0,0 +1,196 @@ +@@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_INODE_H +#define _BCACHEFS_INODE_H @@ -61131,6 +61197,11 @@ index 000000000..8f9be5e58 + enum bkey_invalid_flags, struct printbuf *); +void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + ++int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, ++ struct bkey_s_c, struct bkey_i *, unsigned); ++int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, ++ struct bkey_s_c, struct bkey_s_c, unsigned); ++ +#define bch2_bkey_ops_inode ((struct bkey_ops) { \ + .key_invalid = bch2_inode_invalid, \ + .val_to_text = bch2_inode_to_text, \ @@ -61311,10 +61382,10 @@ index 000000000..8f9be5e58 +#endif /* _BCACHEFS_INODE_H */ diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c new file mode 100644 -index 000000000..8604df80a +index 000000000..5bacc6a9d --- /dev/null +++ b/fs/bcachefs/io.c -@@ -0,0 +1,3056 @@ +@@ -0,0 +1,3059 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Some low level IO code, and hacks for various block layer limitations @@ -62399,7 +62470,8 @@ index 000000000..8604df80a + op->incompressible)) { + if (!crc_is_compressed(op->crc) && + op->csum_type != op->crc.csum_type && -+ bch2_write_rechecksum(c, op, op->csum_type)) ++ bch2_write_rechecksum(c, op, op->csum_type) && ++ !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + return PREP_ENCODED_DO_WRITE; @@ -62419,7 +62491,7 @@ index 000000000..8604df80a + csum = bch2_checksum_bio(c, op->crc.csum_type, + extent_nonce(op->version, op->crc), + bio); -+ if (bch2_crc_cmp(op->crc.csum, csum)) ++ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) @@ -62437,7 +62509,8 @@ index 000000000..8604df80a + */ + if ((op->crc.live_size != op->crc.uncompressed_size || + op->crc.csum_type != op->csum_type) && -+ bch2_write_rechecksum(c, op, op->csum_type)) ++ bch2_write_rechecksum(c, op, op->csum_type) && ++ !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + /* @@ -63733,7 +63806,8 @@ index 000000000..8604df80a + if (ret) + goto decrypt_err; + -+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc)) ++ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && ++ !c->opts.no_data_io) + goto decompression_err; + } else { + /* don't need to decrypt the entire bio: */ @@ -70760,10 +70834,10 @@ index 000000000..4b3ff7d8a +#endif /* _BCACHEFS_KEYLIST_TYPES_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c new file mode 100644 -index 000000000..07d192953 +index 000000000..3e8b8f2f3 --- /dev/null +++ b/fs/bcachefs/lru.c -@@ -0,0 +1,178 @@ +@@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -70807,28 +70881,12 @@ index 000000000..07d192953 +} + +static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, -+ u64 dev_bucket, u64 time, unsigned key_type) ++ u64 dev_bucket, u64 time, bool set) +{ -+ struct bkey_i *k; -+ int ret = 0; -+ -+ if (!time) -+ return 0; -+ -+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); -+ ret = PTR_ERR_OR_ZERO(k); -+ if (unlikely(ret)) -+ return ret; -+ -+ bkey_init(&k->k); -+ k->k.type = key_type; -+ k->k.p = lru_pos(lru_id, dev_bucket, time); -+ -+ EBUG_ON(lru_pos_id(k->k.p) != lru_id); -+ EBUG_ON(lru_pos_time(k->k.p) != time); -+ EBUG_ON(k->k.p.offset != dev_bucket); -+ -+ return bch2_trans_update_buffered(trans, BTREE_ID_lru, k); ++ return time ++ ? bch2_btree_bit_mod(trans, BTREE_ID_lru, ++ lru_pos(lru_id, dev_bucket, time), set) ++ : 0; +} + +int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) @@ -70944,10 +71002,10 @@ index 000000000..07d192953 +} diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h new file mode 100644 -index 000000000..7a3be20a8 +index 000000000..be66bf9ad --- /dev/null +++ b/fs/bcachefs/lru.h -@@ -0,0 +1,64 @@ +@@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_LRU_H +#define _BCACHEFS_LRU_H @@ -70955,13 +71013,6 @@ index 000000000..7a3be20a8 +#define LRU_TIME_BITS 48 +#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) + -+static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) -+{ -+ EBUG_ON(time > LRU_TIME_MAX); -+ -+ return POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket); -+} -+ +static inline u64 lru_pos_id(struct bpos pos) +{ + return pos.inode >> LRU_TIME_BITS; @@ -70972,6 +71023,18 @@ index 000000000..7a3be20a8 + return pos.inode & ~(~0ULL << LRU_TIME_BITS); +} + ++static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) ++{ ++ struct bpos pos = POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket); ++ ++ EBUG_ON(time > LRU_TIME_MAX); ++ EBUG_ON(lru_pos_id(pos) != lru_id); ++ EBUG_ON(lru_pos_time(pos) != time); ++ EBUG_ON(pos.offset != dev_bucket); ++ ++ return pos; ++} ++ +#define BCH_LRU_TYPES() \ + x(read) \ + x(fragmentation) @@ -76618,10 +76681,10 @@ index 000000000..7462a92e9 +#endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c new file mode 100644 -index 000000000..63b385d88 +index 000000000..dcd4f9f41 --- /dev/null +++ b/fs/bcachefs/recovery.c -@@ -0,0 +1,1669 @@ +@@ -0,0 +1,1670 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -77886,20 +77949,16 @@ index 000000000..63b385d88 +static int bch2_run_recovery_passes(struct bch_fs *c) +{ + int ret = 0; -+again: ++ + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_passes)) { + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); ++ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) ++ continue; + if (ret) + break; + c->curr_recovery_pass++; + } + -+ if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) { -+ set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_delete_dead_snapshots; -+ goto again; -+ } -+ + return ret; +} + @@ -78077,6 +78136,11 @@ index 000000000..63b385d88 + if (ret) + goto err; + ++ if (c->opts.fsck && ++ (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || ++ BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))) ++ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); ++ + ret = bch2_run_recovery_passes(c); + if (ret) + goto err; @@ -80748,10 +80812,10 @@ index 000000000..ae21a8cca +#endif /* _BCACHEFS_STR_HASH_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c new file mode 100644 -index 000000000..7e6b416d3 +index 000000000..811a6f428 --- /dev/null +++ b/fs/bcachefs/subvolume.c -@@ -0,0 +1,1734 @@ +@@ -0,0 +1,1749 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -80779,7 +80843,7 @@ index 000000000..7e6b416d3 + return s->parent; +} + -+bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) ++bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) +{ + struct snapshot_table *t; + bool ret; @@ -80948,6 +81012,13 @@ index 000000000..7e6b416d3 + le32_to_cpu(s.v->children[1]), + le32_to_cpu(s.v->subvol), + le32_to_cpu(s.v->tree)); ++ ++ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) ++ prt_printf(out, " depth %u skiplist %u %u %u", ++ le32_to_cpu(s.v->depth), ++ le32_to_cpu(s.v->skip[0]), ++ le32_to_cpu(s.v->skip[1]), ++ le32_to_cpu(s.v->skip[2])); +} + +int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, @@ -81038,21 +81109,31 @@ index 000000000..7e6b416d3 + u32 parent = id; + + t->parent = le32_to_cpu(s.v->parent); -+ t->skip[0] = le32_to_cpu(s.v->skip[0]); -+ t->skip[1] = le32_to_cpu(s.v->skip[1]); -+ t->skip[2] = le32_to_cpu(s.v->skip[2]); -+ t->depth = le32_to_cpu(s.v->depth); + t->children[0] = le32_to_cpu(s.v->children[0]); + t->children[1] = le32_to_cpu(s.v->children[1]); + t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; + t->tree = le32_to_cpu(s.v->tree); + ++ if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { ++ t->depth = le32_to_cpu(s.v->depth); ++ t->skip[0] = le32_to_cpu(s.v->skip[0]); ++ t->skip[1] = le32_to_cpu(s.v->skip[1]); ++ t->skip[2] = le32_to_cpu(s.v->skip[2]); ++ } else { ++ t->depth = 0; ++ t->skip[0] = 0; ++ t->skip[1] = 0; ++ t->skip[2] = 0; ++ } ++ + while ((parent = bch2_snapshot_parent_early(c, parent)) && + parent - id - 1 < IS_ANCESTOR_BITMAP) + __set_bit(parent - id - 1, t->is_ancestor); + -+ if (BCH_SNAPSHOT_DELETED(s.v)) ++ if (BCH_SNAPSHOT_DELETED(s.v)) { + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); ++ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots); ++ } + } else { + memset(t, 0, sizeof(*t)); + } @@ -81538,10 +81619,10 @@ index 000000000..7e6b416d3 + + real_depth = bch2_snapshot_depth(c, parent_id); + -+ if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c, -+ "snapshot with incorrect depth fields, should be %u:\n %s", -+ real_depth, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ++ if (le32_to_cpu(s.depth) != real_depth && ++ (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || ++ fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", ++ real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) @@ -82017,9 +82098,6 @@ index 000000000..7e6b416d3 + u32 i, id; + int ret = 0; + -+ if (!test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) -+ return 0; -+ + if (!test_bit(BCH_FS_STARTED, &c->flags)) { + ret = bch2_fs_read_write_early(c); + if (ret) { @@ -82114,7 +82192,8 @@ index 000000000..7e6b416d3 +{ + struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); + -+ bch2_delete_dead_snapshots(c); ++ if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) ++ bch2_delete_dead_snapshots(c); + bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); +} + @@ -82488,10 +82567,10 @@ index 000000000..7e6b416d3 +} diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h new file mode 100644 -index 000000000..12a08a34e +index 000000000..6905e91a9 --- /dev/null +++ b/fs/bcachefs/subvolume.h -@@ -0,0 +1,251 @@ +@@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUBVOLUME_H +#define _BCACHEFS_SUBVOLUME_H @@ -82660,7 +82739,14 @@ index 000000000..12a08a34e + return 0; +} + -+bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); ++bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); ++ ++static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) ++{ ++ return id == ancestor ++ ? true ++ : __bch2_snapshot_is_ancestor(c, id, ancestor); ++} + +static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) +{ @@ -82782,10 +82868,10 @@ index 000000000..86833445a +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c new file mode 100644 -index 000000000..e9ce3f332 +index 000000000..d2d3eba4d --- /dev/null +++ b/fs/bcachefs/super-io.c -@@ -0,0 +1,1711 @@ +@@ -0,0 +1,1714 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -83206,6 +83292,9 @@ index 000000000..e9ce3f332 + SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); + if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) + SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000); ++ ++ if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) ++ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + } + + for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { @@ -83280,7 +83369,7 @@ index 000000000..e9ce3f332 + c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); + c->sb.version_min = le16_to_cpu(src->version_min); -+ c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src) ?: c->sb.version; ++ c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); + c->sb.nr_devices = src->nr_devices; + c->sb.clean = BCH_SB_CLEAN(src); + c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -84647,10 +84736,10 @@ index 000000000..904adea6a +#endif /* _BCACHEFS_SUPER_IO_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c new file mode 100644 -index 000000000..9f1047a76 +index 000000000..eee56969c --- /dev/null +++ b/fs/bcachefs/super.c -@@ -0,0 +1,2006 @@ +@@ -0,0 +1,2007 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bcachefs setup/teardown code, and some metadata io - read a superblock and @@ -85538,7 +85627,7 @@ index 000000000..9f1047a76 + struct printbuf p = PRINTBUF; + bool first = true; + -+ prt_str(&p, "mounted version "); ++ prt_str(&p, "mounting version "); + bch2_version_to_text(&p, c->sb.version); + + if (c->opts.read_only) { @@ -85574,6 +85663,8 @@ index 000000000..9f1047a76 + unsigned i; + int ret; + ++ print_mount_opts(c); ++ + down_write(&c->state_lock); + + BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); @@ -85627,7 +85718,6 @@ index 000000000..9f1047a76 + goto err; + } + -+ print_mount_opts(c); + ret = 0; +out: + up_write(&c->state_lock); @@ -93383,7 +93473,7 @@ index 52e6d5fda..dbdafa261 100644 } EXPORT_SYMBOL(d_tmpfile); diff --git a/fs/inode.c b/fs/inode.c -index 577799b78..7a32d6aa4 100644 +index b9d498032..6bb7646cb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -57,8 +57,23 @@ @@ -93569,7 +93659,7 @@ index 577799b78..7a32d6aa4 100644 goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { -@@ -1155,25 +1207,25 @@ EXPORT_SYMBOL(unlock_two_nondirectories); +@@ -1197,25 +1249,25 @@ EXPORT_SYMBOL(unlock_two_nondirectories); * return it locked, hashed, and with the I_NEW flag set. The file system gets * to fill it in before unlocking it via unlock_new_inode(). * @@ -93601,7 +93691,7 @@ index 577799b78..7a32d6aa4 100644 if (IS_ERR(old)) return NULL; wait_on_inode(old); -@@ -1195,7 +1247,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, +@@ -1237,7 +1289,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, */ spin_lock(&inode->i_lock); inode->i_state |= I_NEW; @@ -93610,7 +93700,7 @@ index 577799b78..7a32d6aa4 100644 spin_unlock(&inode->i_lock); /* -@@ -1205,7 +1257,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, +@@ -1247,7 +1299,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); unlock: @@ -93619,7 +93709,7 @@ index 577799b78..7a32d6aa4 100644 return inode; } -@@ -1266,12 +1318,12 @@ EXPORT_SYMBOL(iget5_locked); +@@ -1308,12 +1360,12 @@ EXPORT_SYMBOL(iget5_locked); */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { @@ -93636,7 +93726,7 @@ index 577799b78..7a32d6aa4 100644 if (inode) { if (IS_ERR(inode)) return NULL; -@@ -1287,17 +1339,17 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) +@@ -1329,17 +1381,17 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) if (inode) { struct inode *old; @@ -93658,7 +93748,7 @@ index 577799b78..7a32d6aa4 100644 /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents -@@ -1310,7 +1362,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) +@@ -1352,7 +1404,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) * us. Use the old inode instead of the one we just * allocated. */ @@ -93667,7 +93757,7 @@ index 577799b78..7a32d6aa4 100644 destroy_inode(inode); if (IS_ERR(old)) return NULL; -@@ -1334,10 +1386,11 @@ EXPORT_SYMBOL(iget_locked); +@@ -1376,10 +1428,11 @@ EXPORT_SYMBOL(iget_locked); */ static int test_inode_iunique(struct super_block *sb, unsigned long ino) { @@ -93681,7 +93771,7 @@ index 577799b78..7a32d6aa4 100644 if (inode->i_ino == ino && inode->i_sb == sb) return 0; } -@@ -1421,12 +1474,12 @@ EXPORT_SYMBOL(igrab); +@@ -1463,12 +1516,12 @@ EXPORT_SYMBOL(igrab); struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { @@ -93698,7 +93788,7 @@ index 577799b78..7a32d6aa4 100644 return IS_ERR(inode) ? NULL : inode; } -@@ -1476,12 +1529,12 @@ EXPORT_SYMBOL(ilookup5); +@@ -1518,12 +1571,12 @@ EXPORT_SYMBOL(ilookup5); */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { @@ -93715,7 +93805,7 @@ index 577799b78..7a32d6aa4 100644 if (inode) { if (IS_ERR(inode)) -@@ -1525,12 +1578,13 @@ struct inode *find_inode_nowait(struct super_block *sb, +@@ -1567,12 +1620,13 @@ struct inode *find_inode_nowait(struct super_block *sb, void *), void *data) { @@ -93732,7 +93822,7 @@ index 577799b78..7a32d6aa4 100644 if (inode->i_sb != sb) continue; mval = match(inode, hashval, data); -@@ -1541,7 +1595,7 @@ struct inode *find_inode_nowait(struct super_block *sb, +@@ -1583,7 +1637,7 @@ struct inode *find_inode_nowait(struct super_block *sb, goto out; } out: @@ -93741,7 +93831,7 @@ index 577799b78..7a32d6aa4 100644 return ret_inode; } EXPORT_SYMBOL(find_inode_nowait); -@@ -1570,13 +1624,14 @@ EXPORT_SYMBOL(find_inode_nowait); +@@ -1612,13 +1666,14 @@ EXPORT_SYMBOL(find_inode_nowait); struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { @@ -93758,7 +93848,7 @@ index 577799b78..7a32d6aa4 100644 if (inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && test(inode, data)) -@@ -1608,13 +1663,14 @@ EXPORT_SYMBOL(find_inode_rcu); +@@ -1650,13 +1705,14 @@ EXPORT_SYMBOL(find_inode_rcu); struct inode *find_inode_by_ino_rcu(struct super_block *sb, unsigned long ino) { @@ -93775,7 +93865,7 @@ index 577799b78..7a32d6aa4 100644 if (inode->i_ino == ino && inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) -@@ -1628,39 +1684,42 @@ int insert_inode_locked(struct inode *inode) +@@ -1670,39 +1726,42 @@ int insert_inode_locked(struct inode *inode) { struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; @@ -93831,7 +93921,7 @@ index 577799b78..7a32d6aa4 100644 wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); -@@ -2185,17 +2244,18 @@ EXPORT_SYMBOL(inode_needs_sync); +@@ -2227,17 +2286,18 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ @@ -93853,7 +93943,7 @@ index 577799b78..7a32d6aa4 100644 } static __initdata unsigned long ihash_entries; -@@ -2221,7 +2281,7 @@ void __init inode_init_early(void) +@@ -2263,7 +2323,7 @@ void __init inode_init_early(void) inode_hashtable = alloc_large_system_hash("Inode-cache", @@ -93862,7 +93952,7 @@ index 577799b78..7a32d6aa4 100644 ihash_entries, 14, HASH_EARLY | HASH_ZERO, -@@ -2247,7 +2307,7 @@ void __init inode_init(void) +@@ -2289,7 +2349,7 @@ void __init inode_init(void) inode_hashtable = alloc_large_system_hash("Inode-cache", @@ -94338,10 +94428,10 @@ index b3e7529ff..f2620f8d1 100644 static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index c0ffe203a..7a32dc98e 100644 +index 67e942d77..10d30c0bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -854,6 +854,7 @@ extern const char *blk_op_str(enum req_op op); +@@ -855,6 +855,7 @@ extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -95854,7 +95944,7 @@ index 3c6c4c836..88b45fb4f 100644 init_page_count(page); __free_page(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index 306a3d1a0..e79303e1e 100644 +index de10fc797..888b87b3c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -194,7 +194,7 @@ struct page { @@ -100543,7 +100633,7 @@ index c8bcdea15..09dd56a94 100644 * This is a migrate-callback that "frees" freepages back to the isolated * freelist. All pages on the freelist are from the same zone, so there is no diff --git a/mm/filemap.c b/mm/filemap.c -index 83dda76d1..e5c81c0cf 100644 +index 8abce63b2..e38eec523 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -958,7 +958,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, diff --git a/scripts/build.sh b/scripts/build.sh index 9a08f96..c4c2314 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -2,4 +2,4 @@ echo "Pika Kernel - Building" -make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-24 +make -j`nproc` bindeb-pkg LOCALVERSION=-pikaos KDEB_PKGVERSION=$(make kernelversion)-1 diff --git a/scripts/source.sh b/scripts/source.sh index 1547b7d..18017e8 100755 --- a/scripts/source.sh +++ b/scripts/source.sh @@ -2,7 +2,7 @@ echo "Pika Kernel - Getting source" -wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.4.3.tar.gz -tar -xf ./linux-6.4.3.tar.gz +wget -nv https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.4.6.tar.gz +tar -xf ./linux-6.4.6.tar.gz -cd linux-6.4.3 +cd linux-6.4.6