diff --git a/patches/0001-cachyos-base-all.patch b/patches/0001-cachyos-base-all.patch index 8abb8ad..39926bf 100644 --- a/patches/0001-cachyos-base-all.patch +++ b/patches/0001-cachyos-base-all.patch @@ -1,6 +1,6 @@ -From 9829288846e128cf9d409facdfb6df3f17bf7693 Mon Sep 17 00:00:00 2001 +From 887efc68ad8a5ea19f8df4bc544c6d09c38e1ec5 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:21:37 +0100 +Date: Thu, 21 Nov 2024 21:59:13 +0100 Subject: [PATCH 01/12] amd-cache-optimizer Signed-off-by: Peter Jung @@ -272,9 +272,9 @@ index 000000000000..0f6d3c54d879 -- 2.47.0 -From 64f207fa646bd3a493a4e8930b5a52ed40288a54 Mon Sep 17 00:00:00 2001 +From a04984088594eef7e446ab1b93b116f638e9bc37 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:21:50 +0100 +Date: Thu, 21 Nov 2024 21:59:23 +0100 Subject: [PATCH 02/12] amd-pstate Signed-off-by: Peter Jung @@ -288,12 +288,12 @@ Signed-off-by: Peter Jung arch/x86/kernel/cpu/scattered.c | 3 +- arch/x86/kernel/cpu/topology_amd.c | 3 + arch/x86/kernel/cpu/topology_common.c | 34 ++++ - arch/x86/kernel/smpboot.c | 5 +- + arch/x86/kernel/smpboot.c | 14 +- arch/x86/mm/init.c | 23 ++- drivers/cpufreq/amd-pstate-ut.c | 6 +- drivers/cpufreq/amd-pstate.c | 231 ++++++++++------------- tools/arch/x86/include/asm/cpufeatures.h | 2 +- - 14 files changed, 214 insertions(+), 153 deletions(-) + 14 files changed, 224 insertions(+), 152 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 913fd3a7bac6..a7c93191b7c6 100644 @@ -536,18 +536,34 @@ index 9a6069e7133c..8277c64f88db 100644 case X86_VENDOR_HYGON: if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c -index 766f092dab80..b5a8f0891135 100644 +index 766f092dab80..419e7ae09639 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c -@@ -497,8 +497,9 @@ static int x86_cluster_flags(void) +@@ -62,6 +62,8 @@ + #include + #include ++#include ++ + #include + #include + #include +@@ -498,7 +500,17 @@ static int x86_cluster_flags(void) static int x86_die_flags(void) { -- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - return x86_sched_itmt_flags(); -+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) || -+ cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) + return x86_sched_itmt_flags(); ++ ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_AMD: ++ case X86_VENDOR_HYGON: ++ bool prefcore = false; ++ ++ amd_detect_prefcore(&prefcore); ++ if (prefcore || cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) ++ return x86_sched_itmt_flags(); ++ }; return 0; } @@ -1118,9 +1134,9 @@ index dd4682857c12..23698d0f4bb4 100644 -- 2.47.0 -From 2a829d44283c52f7d5ae3026bb693b9496b99b54 Mon Sep 17 00:00:00 2001 +From 39068aed1309d7e263f7f3a04334dd367c2885df Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:22:13 +0100 +Date: Thu, 21 Nov 2024 21:59:35 +0100 Subject: [PATCH 03/12] autofdo Signed-off-by: Peter Jung @@ -1864,9 +1880,9 @@ index 3d27983dc908..6f64d611faea 100644 -- 2.47.0 -From 26f905b0fe8e4c5aa80548f4b568fbc578a583d1 Mon Sep 17 00:00:00 2001 +From e068cbe064ee528baae88081fade4839076b1681 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:22:27 +0100 +Date: Thu, 21 Nov 2024 21:59:48 +0100 Subject: [PATCH 04/12] bbr3 Signed-off-by: Peter Jung @@ -5250,14 +5266,15 @@ index 79064580c8c0..697270ce1ea6 100644 -- 2.47.0 -From f4ec451a5b3565cd1d9a64cf7775d47c207b2200 Mon Sep 17 00:00:00 2001 +From f3a6d6d71bfb323d55631f442c8ef5a33448bf92 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:22:41 +0100 +Date: Thu, 21 Nov 2024 21:59:58 +0100 Subject: [PATCH 05/12] cachy Signed-off-by: Peter Jung --- .../admin-guide/kernel-parameters.txt | 12 + + Documentation/gpu/amdgpu/thermal.rst | 12 + Makefile | 8 + arch/x86/Kconfig.cpu | 359 +- arch/x86/Makefile | 87 +- @@ -5275,8 +5292,13 @@ Signed-off-by: Peter Jung .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +- - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 + - drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 +- + .../gpu/drm/amd/include/kgd_pp_interface.h | 4 + + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 130 + + drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 4 + + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 18 +- + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 2 + + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 108 +- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 108 +- drivers/input/evdev.c | 19 +- drivers/md/dm-crypt.c | 5 + drivers/media/v4l2-core/Kconfig | 5 + @@ -5287,6 +5309,11 @@ Signed-off-by: Peter Jung drivers/pci/controller/Makefile | 6 + drivers/pci/controller/intel-nvme-remap.c | 462 +++ drivers/pci/quirks.c | 101 + + drivers/scsi/Kconfig | 2 + + drivers/scsi/Makefile | 1 + + drivers/scsi/vhba/Kconfig | 9 + + drivers/scsi/vhba/Makefile | 4 + + drivers/scsi/vhba/vhba.c | 1130 ++++++ include/linux/pagemap.h | 2 +- include/linux/user_namespace.h | 4 + include/linux/wait.h | 2 + @@ -5309,11 +5336,14 @@ Signed-off-by: Peter Jung mm/vmpressure.c | 4 + mm/vmscan.c | 8 + net/ipv4/inet_connection_sock.c | 2 +- - 52 files changed, 5078 insertions(+), 65 deletions(-) + 63 files changed, 6591 insertions(+), 67 deletions(-) create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h create mode 100644 drivers/pci/controller/intel-nvme-remap.c + create mode 100644 drivers/scsi/vhba/Kconfig + create mode 100644 drivers/scsi/vhba/Makefile + create mode 100644 drivers/scsi/vhba/vhba.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d401577b5a6a..e6ec15a89924 100644 @@ -5345,6 +5375,29 @@ index d401577b5a6a..e6ec15a89924 100644 noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. +diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst +index 6d942b5c58f0..1768a106aab1 100644 +--- a/Documentation/gpu/amdgpu/thermal.rst ++++ b/Documentation/gpu/amdgpu/thermal.rst +@@ -100,6 +100,18 @@ fan_minimum_pwm + .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_minimum_pwm + ++fan_zero_rpm_enable ++---------------------- ++ ++.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c ++ :doc: fan_zero_rpm_enable ++ ++fan_zero_rpm_stop_temperature ++----------------------------- ++ ++.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c ++ :doc: fan_zero_rpm_stop_temperature ++ + GFXOFF + ====== + diff --git a/Makefile b/Makefile index 5ccec99bf086..5c6151566fd3 100644 --- a/Makefile @@ -6310,8 +6363,30 @@ index 495e3cd70426..704a48209657 100644 dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ +diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h +index 19a48d98830a..2fca138419d4 100644 +--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h +@@ -119,6 +119,8 @@ enum pp_clock_type { + OD_ACOUSTIC_TARGET, + OD_FAN_TARGET_TEMPERATURE, + OD_FAN_MINIMUM_PWM, ++ OD_FAN_ZERO_RPM_ENABLE, ++ OD_FAN_ZERO_RPM_STOP_TEMP, + }; + + enum amd_pp_sensors { +@@ -199,6 +201,8 @@ enum PP_OD_DPM_TABLE_COMMAND { + PP_OD_EDIT_ACOUSTIC_TARGET, + PP_OD_EDIT_FAN_TARGET_TEMPERATURE, + PP_OD_EDIT_FAN_MINIMUM_PWM, ++ PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, ++ PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, + }; + + struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -index d5d6ab484e5a..dccba7bcdf97 100644 +index d5d6ab484e5a..d4a6cf6e98e8 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3272,6 +3272,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, @@ -6324,8 +6399,164 @@ index d5d6ab484e5a..dccba7bcdf97 100644 return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); } +@@ -4213,6 +4216,117 @@ static umode_t fan_minimum_pwm_visible(struct amdgpu_device *adev) + return umode; + } + ++/** ++ * DOC: fan_zero_rpm_enable ++ * ++ * The amdgpu driver provides a sysfs API for checking and adjusting the ++ * zero RPM feature. ++ * ++ * Reading back the file shows you the current setting and the permitted ++ * ranges if changable. ++ * ++ * Writing an integer to the file, change the setting accordingly. ++ * ++ * When you have finished the editing, write "c" (commit) to the file to commit ++ * your changes. ++ * ++ * If you want to reset to the default value, write "r" (reset) to the file to ++ * reset them. ++ */ ++static ssize_t fan_zero_rpm_enable_show(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ char *buf) ++{ ++ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); ++ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; ++ ++ return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_ENABLE, buf); ++} ++ ++static ssize_t fan_zero_rpm_enable_store(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); ++ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; ++ ++ return (ssize_t)amdgpu_distribute_custom_od_settings(adev, ++ PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, ++ buf, ++ count); ++} ++ ++static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) ++{ ++ umode_t umode = 0000; ++ ++ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE) ++ umode |= S_IRUSR | S_IRGRP | S_IROTH; ++ ++ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET) ++ umode |= S_IWUSR; ++ ++ return umode; ++} ++ ++/** ++ * DOC: fan_zero_rpm_stop_temperature ++ * ++ * The amdgpu driver provides a sysfs API for checking and adjusting the ++ * zero RPM stop temperature feature. ++ * ++ * Reading back the file shows you the current setting and the permitted ++ * ranges if changable. ++ * ++ * Writing an integer to the file, change the setting accordingly. ++ * ++ * When you have finished the editing, write "c" (commit) to the file to commit ++ * your changes. ++ * ++ * If you want to reset to the default value, write "r" (reset) to the file to ++ * reset them. ++ * ++ * This setting works only if the Zero RPM setting is enabled. It adjusts the ++ * temperature below which the fan can stop. ++ */ ++static ssize_t fan_zero_rpm_stop_temp_show(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ char *buf) ++{ ++ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); ++ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; ++ ++ return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_STOP_TEMP, buf); ++} ++ ++static ssize_t fan_zero_rpm_stop_temp_store(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); ++ struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; ++ ++ return (ssize_t)amdgpu_distribute_custom_od_settings(adev, ++ PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, ++ buf, ++ count); ++} ++ ++static umode_t fan_zero_rpm_stop_temp_visible(struct amdgpu_device *adev) ++{ ++ umode_t umode = 0000; ++ ++ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE) ++ umode |= S_IRUSR | S_IRGRP | S_IROTH; ++ ++ if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET) ++ umode |= S_IWUSR; ++ ++ return umode; ++} ++ + static struct od_feature_set amdgpu_od_set = { + .containers = { + [0] = { +@@ -4258,6 +4372,22 @@ static struct od_feature_set amdgpu_od_set = { + .store = fan_minimum_pwm_store, + }, + }, ++ [5] = { ++ .name = "fan_zero_rpm_enable", ++ .ops = { ++ .is_visible = fan_zero_rpm_enable_visible, ++ .show = fan_zero_rpm_enable_show, ++ .store = fan_zero_rpm_enable_store, ++ }, ++ }, ++ [6] = { ++ .name = "fan_zero_rpm_stop_temperature", ++ .ops = { ++ .is_visible = fan_zero_rpm_stop_temp_visible, ++ .show = fan_zero_rpm_stop_temp_show, ++ .store = fan_zero_rpm_stop_temp_store, ++ }, ++ }, + }, + }, + }, +diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +index f5bf41f21c41..363af8990aa2 100644 +--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h ++++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +@@ -328,6 +328,10 @@ struct config_table_setting + #define OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET BIT(7) + #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE BIT(8) + #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) ++#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) ++#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) ++#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE BIT(12) ++#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET BIT(13) + + struct amdgpu_pm { + struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index 80e60ea2d11e..51dea35848f6 100644 +index 80e60ea2d11e..44f0b159d232 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2775,7 +2775,10 @@ int smu_get_power_limit(void *handle, @@ -6356,6 +6587,352 @@ index 80e60ea2d11e..51dea35848f6 100644 dev_err(smu->adev->dev, "New power limit (%d) is out of range [%d,%d]\n", limit, smu->min_power_limit, smu->max_power_limit); +@@ -2878,6 +2888,10 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) + clk_type = SMU_OD_FAN_TARGET_TEMPERATURE; break; + case OD_FAN_MINIMUM_PWM: + clk_type = SMU_OD_FAN_MINIMUM_PWM; break; ++ case OD_FAN_ZERO_RPM_ENABLE: ++ clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; ++ case OD_FAN_ZERO_RPM_STOP_TEMP: ++ clk_type = SMU_OD_FAN_ZERO_RPM_STOP_TEMP; break; + default: + clk_type = SMU_CLK_COUNT; break; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +index e71a721c12b9..a299dc4a8071 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +@@ -313,6 +313,8 @@ enum smu_clk_type { + SMU_OD_ACOUSTIC_TARGET, + SMU_OD_FAN_TARGET_TEMPERATURE, + SMU_OD_FAN_MINIMUM_PWM, ++ SMU_OD_FAN_ZERO_RPM_ENABLE, ++ SMU_OD_FAN_ZERO_RPM_STOP_TEMP, + SMU_CLK_COUNT, + }; + +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +index d53e162dcd8d..24675a1d98db 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +@@ -107,6 +107,8 @@ + #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 + #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 + #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 ++#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 ++#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 + + #define LINK_SPEED_MAX 3 + +@@ -1143,6 +1145,14 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; ++ case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: ++ od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; ++ od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; ++ break; ++ case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: ++ od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; ++ od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; ++ break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; +@@ -1463,6 +1473,42 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, + min_value, max_value); + break; + ++ case SMU_OD_FAN_ZERO_RPM_ENABLE: ++ if (!smu_v13_0_0_is_od_feature_supported(smu, ++ PP_OD_FEATURE_ZERO_FAN_BIT)) ++ break; ++ ++ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); ++ size += sysfs_emit_at(buf, size, "%d\n", ++ (int)od_table->OverDriveTable.FanZeroRpmEnable); ++ ++ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); ++ smu_v13_0_0_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, ++ &min_value, ++ &max_value); ++ size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", ++ min_value, max_value); ++ break; ++ ++ case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: ++ if (!smu_v13_0_0_is_od_feature_supported(smu, ++ PP_OD_FEATURE_ZERO_FAN_BIT)) ++ break; ++ ++ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); ++ size += sysfs_emit_at(buf, size, "%d\n", ++ (int)od_table->OverDriveTable.FanZeroRpmStopTemp); ++ ++ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); ++ smu_v13_0_0_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, ++ &min_value, ++ &max_value); ++ size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", ++ min_value, max_value); ++ break; ++ + case SMU_OD_RANGE: + if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && +@@ -1560,6 +1606,16 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; ++ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: ++ od_table->OverDriveTable.FanZeroRpmEnable = ++ boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: ++ od_table->OverDriveTable.FanZeroRpmStopTemp = ++ boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; +@@ -1853,6 +1909,48 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + ++ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: ++ if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { ++ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); ++ return -ENOTSUPP; ++ } ++ ++ smu_v13_0_0_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, ++ &minimum, ++ &maximum); ++ if (input[0] < minimum || ++ input[0] > maximum) { ++ dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", ++ input[0], minimum, maximum); ++ return -EINVAL; ++ } ++ ++ od_table->OverDriveTable.FanZeroRpmEnable = input[0]; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ ++ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: ++ if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { ++ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); ++ return -ENOTSUPP; ++ } ++ ++ smu_v13_0_0_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, ++ &minimum, ++ &maximum); ++ if (input[0] < minimum || ++ input[0] > maximum) { ++ dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", ++ input[0], minimum, maximum); ++ return -EINVAL; ++ } ++ ++ od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); +@@ -2122,7 +2220,11 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | +- OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; ++ OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; + } + + static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) +@@ -2188,6 +2290,10 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; ++ user_od_table->OverDriveTable.FanZeroRpmEnable = ++ user_od_table_bak.OverDriveTable.FanZeroRpmEnable; ++ user_od_table->OverDriveTable.FanZeroRpmStopTemp = ++ user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; + } + + smu_v13_0_0_set_supported_od_feature_mask(smu); +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +index b891a5e0a396..50d16301f3eb 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +@@ -83,6 +83,8 @@ + #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 + #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 + #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 ++#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 ++#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 + + #define LINK_SPEED_MAX 3 + +@@ -1132,6 +1134,14 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; ++ case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: ++ od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; ++ od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; ++ break; ++ case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: ++ od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; ++ od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; ++ break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; +@@ -1452,6 +1462,42 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, + min_value, max_value); + break; + ++ case SMU_OD_FAN_ZERO_RPM_ENABLE: ++ if (!smu_v13_0_7_is_od_feature_supported(smu, ++ PP_OD_FEATURE_ZERO_FAN_BIT)) ++ break; ++ ++ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); ++ size += sysfs_emit_at(buf, size, "%d\n", ++ (int)od_table->OverDriveTable.FanZeroRpmEnable); ++ ++ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); ++ smu_v13_0_7_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, ++ &min_value, ++ &max_value); ++ size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", ++ min_value, max_value); ++ break; ++ ++ case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: ++ if (!smu_v13_0_7_is_od_feature_supported(smu, ++ PP_OD_FEATURE_ZERO_FAN_BIT)) ++ break; ++ ++ size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); ++ size += sysfs_emit_at(buf, size, "%d\n", ++ (int)od_table->OverDriveTable.FanZeroRpmStopTemp); ++ ++ size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); ++ smu_v13_0_7_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, ++ &min_value, ++ &max_value); ++ size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", ++ min_value, max_value); ++ break; ++ + case SMU_OD_RANGE: + if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && +@@ -1548,6 +1594,16 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; ++ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: ++ od_table->OverDriveTable.FanZeroRpmEnable = ++ boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: ++ od_table->OverDriveTable.FanZeroRpmStopTemp = ++ boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; +@@ -1841,6 +1897,48 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + ++ case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: ++ if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { ++ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); ++ return -ENOTSUPP; ++ } ++ ++ smu_v13_0_7_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, ++ &minimum, ++ &maximum); ++ if (input[0] < minimum || ++ input[0] > maximum) { ++ dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", ++ input[0], minimum, maximum); ++ return -EINVAL; ++ } ++ ++ od_table->OverDriveTable.FanZeroRpmEnable = input[0]; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ ++ case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: ++ if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { ++ dev_warn(adev->dev, "Zero RPM setting not supported!\n"); ++ return -ENOTSUPP; ++ } ++ ++ smu_v13_0_7_get_od_setting_limits(smu, ++ PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, ++ &minimum, ++ &maximum); ++ if (input[0] < minimum || ++ input[0] > maximum) { ++ dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", ++ input[0], minimum, maximum); ++ return -EINVAL; ++ } ++ ++ od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; ++ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); ++ break; ++ + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); +@@ -2106,7 +2204,11 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | +- OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; ++ OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | ++ OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; + } + + static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) +@@ -2172,6 +2274,10 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; ++ user_od_table->OverDriveTable.FanZeroRpmEnable = ++ user_od_table_bak.OverDriveTable.FanZeroRpmEnable; ++ user_od_table->OverDriveTable.FanZeroRpmStopTemp = ++ user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; + } + + smu_v13_0_7_set_supported_od_feature_mask(smu); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index b5cbb57ee5f6..a0f7fa1518c6 100644 --- a/drivers/input/evdev.c @@ -10811,6 +11388,1190 @@ index dccb60c1d9cc..d9a8af789de8 100644 { 0 } }; +diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig +index 37c24ffea65c..bd52d1e081b7 100644 +--- a/drivers/scsi/Kconfig ++++ b/drivers/scsi/Kconfig +@@ -1522,4 +1522,6 @@ endif # SCSI_LOWLEVEL + + source "drivers/scsi/device_handler/Kconfig" + ++source "drivers/scsi/vhba/Kconfig" ++ + endmenu +diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile +index 1313ddf2fd1a..5942e8f79159 100644 +--- a/drivers/scsi/Makefile ++++ b/drivers/scsi/Makefile +@@ -153,6 +153,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o + obj-$(CONFIG_SCSI_ENCLOSURE) += ses.o + + obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/ ++obj-$(CONFIG_VHBA) += vhba/ + + # This goes last, so that "real" scsi devices probe earlier + obj-$(CONFIG_SCSI_DEBUG) += scsi_debug.o +diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig +new file mode 100644 +index 000000000000..e70a381fe3df +--- /dev/null ++++ b/drivers/scsi/vhba/Kconfig +@@ -0,0 +1,9 @@ ++config VHBA ++ tristate "Virtual (SCSI) Host Bus Adapter" ++ depends on SCSI ++ help ++ This is the in-kernel part of CDEmu, a CD/DVD-ROM device ++ emulator. ++ ++ This driver can also be built as a module. If so, the module ++ will be called vhba. +diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile +new file mode 100644 +index 000000000000..2d7524b66199 +--- /dev/null ++++ b/drivers/scsi/vhba/Makefile +@@ -0,0 +1,4 @@ ++VHBA_VERSION := 20240917 ++ ++obj-$(CONFIG_VHBA) += vhba.o ++ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror +diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c +new file mode 100644 +index 000000000000..7531223355e5 +--- /dev/null ++++ b/drivers/scsi/vhba/vhba.c +@@ -0,0 +1,1130 @@ ++/* ++ * vhba.c ++ * ++ * Copyright (C) 2007-2012 Chia-I Wu ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#define pr_fmt(fmt) "vhba: " fmt ++ ++#include ++ ++#include ++#include ++#include ++#include ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++#include ++#else ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_COMPAT ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++MODULE_AUTHOR("Chia-I Wu"); ++MODULE_VERSION(VHBA_VERSION); ++MODULE_DESCRIPTION("Virtual SCSI HBA"); ++MODULE_LICENSE("GPL"); ++ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) ++#define sdev_dbg(sdev, fmt, a...) \ ++ dev_dbg(&(sdev)->sdev_gendev, fmt, ##a) ++#define scmd_dbg(scmd, fmt, a...) \ ++ dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a) ++#endif ++ ++#define VHBA_MAX_SECTORS_PER_IO 256 ++#define VHBA_MAX_BUS 16 ++#define VHBA_MAX_ID 16 ++#define VHBA_MAX_DEVICES (VHBA_MAX_BUS * (VHBA_MAX_ID-1)) ++#define VHBA_KBUF_SIZE PAGE_SIZE ++ ++#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL) ++#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL) ++ ++ ++static int vhba_can_queue = 32; ++module_param_named(can_queue, vhba_can_queue, int, 0); ++ ++ ++enum vhba_req_state { ++ VHBA_REQ_FREE, ++ VHBA_REQ_PENDING, ++ VHBA_REQ_READING, ++ VHBA_REQ_SENT, ++ VHBA_REQ_WRITING, ++}; ++ ++struct vhba_command { ++ struct scsi_cmnd *cmd; ++ /* metatags are per-host. not to be confused with ++ queue tags that are usually per-lun */ ++ unsigned long metatag; ++ int status; ++ struct list_head entry; ++}; ++ ++struct vhba_device { ++ unsigned int num; ++ spinlock_t cmd_lock; ++ struct list_head cmd_list; ++ wait_queue_head_t cmd_wq; ++ atomic_t refcnt; ++ ++ unsigned char *kbuf; ++ size_t kbuf_size; ++}; ++ ++struct vhba_host { ++ struct Scsi_Host *shost; ++ spinlock_t cmd_lock; ++ int cmd_next; ++ struct vhba_command *commands; ++ spinlock_t dev_lock; ++ struct vhba_device *devices[VHBA_MAX_DEVICES]; ++ int num_devices; ++ DECLARE_BITMAP(chgmap, VHBA_MAX_DEVICES); ++ int chgtype[VHBA_MAX_DEVICES]; ++ struct work_struct scan_devices; ++}; ++ ++#define MAX_COMMAND_SIZE 16 ++ ++struct vhba_request { ++ __u32 metatag; ++ __u32 lun; ++ __u8 cdb[MAX_COMMAND_SIZE]; ++ __u8 cdb_len; ++ __u32 data_len; ++}; ++ ++struct vhba_response { ++ __u32 metatag; ++ __u32 status; ++ __u32 data_len; ++}; ++ ++ ++ ++static struct vhba_command *vhba_alloc_command (void); ++static void vhba_free_command (struct vhba_command *vcmd); ++ ++static struct platform_device vhba_platform_device; ++ ++ ++ ++/* These functions define a symmetric 1:1 mapping between device numbers and ++ the bus and id. We have reserved the last id per bus for the host itself. */ ++static void devnum_to_bus_and_id(unsigned int devnum, unsigned int *bus, unsigned int *id) ++{ ++ *bus = devnum / (VHBA_MAX_ID-1); ++ *id = devnum % (VHBA_MAX_ID-1); ++} ++ ++static unsigned int bus_and_id_to_devnum(unsigned int bus, unsigned int id) ++{ ++ return (bus * (VHBA_MAX_ID-1)) + id; ++} ++ ++static struct vhba_device *vhba_device_alloc (void) ++{ ++ struct vhba_device *vdev; ++ ++ vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL); ++ if (!vdev) { ++ return NULL; ++ } ++ ++ spin_lock_init(&vdev->cmd_lock); ++ INIT_LIST_HEAD(&vdev->cmd_list); ++ init_waitqueue_head(&vdev->cmd_wq); ++ atomic_set(&vdev->refcnt, 1); ++ ++ vdev->kbuf = NULL; ++ vdev->kbuf_size = 0; ++ ++ return vdev; ++} ++ ++static void vhba_device_put (struct vhba_device *vdev) ++{ ++ if (atomic_dec_and_test(&vdev->refcnt)) { ++ kfree(vdev); ++ } ++} ++ ++static struct vhba_device *vhba_device_get (struct vhba_device *vdev) ++{ ++ atomic_inc(&vdev->refcnt); ++ ++ return vdev; ++} ++ ++static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd) ++{ ++ struct vhba_host *vhost; ++ struct vhba_command *vcmd; ++ unsigned long flags; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ vcmd = vhba_alloc_command(); ++ if (!vcmd) { ++ return SCSI_MLQUEUE_HOST_BUSY; ++ } ++ ++ vcmd->cmd = cmd; ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) ++ vcmd->metatag = scsi_cmd_to_rq(vcmd->cmd)->tag; ++#else ++ vcmd->metatag = vcmd->cmd->request->tag; ++#endif ++ list_add_tail(&vcmd->entry, &vdev->cmd_list); ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ wake_up_interruptible(&vdev->cmd_wq); ++ ++ return 0; ++} ++ ++static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd) ++{ ++ struct vhba_command *vcmd; ++ int retval; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { ++ if (vcmd->cmd == cmd) { ++ list_del_init(&vcmd->entry); ++ break; ++ } ++ } ++ ++ /* command not found */ ++ if (&vcmd->entry == &vdev->cmd_list) { ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ return SUCCESS; ++ } ++ ++ while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) { ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ scmd_dbg(cmd, "wait for I/O before aborting\n"); ++ schedule_timeout(1); ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ } ++ ++ retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS; ++ ++ vhba_free_command(vcmd); ++ ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ return retval; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) ++static int vhba_slave_alloc(struct scsi_device *sdev) ++{ ++ struct Scsi_Host *shost = sdev->host; ++ ++ sdev_dbg(sdev, "enabling tagging (queue depth: %i).\n", sdev->queue_depth); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) ++ if (!shost_use_blk_mq(shost) && shost->bqt) { ++#else ++ if (shost->bqt) { ++#endif ++ blk_queue_init_tags(sdev->request_queue, sdev->queue_depth, shost->bqt); ++ } ++ scsi_adjust_queue_depth(sdev, 0, sdev->queue_depth); ++ ++ return 0; ++} ++#endif ++ ++static void vhba_scan_devices_add (struct vhba_host *vhost, int bus, int id) ++{ ++ struct scsi_device *sdev; ++ ++ sdev = scsi_device_lookup(vhost->shost, bus, id, 0); ++ if (!sdev) { ++ scsi_add_device(vhost->shost, bus, id, 0); ++ } else { ++ dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device %d:%d:0!\n", bus, id); ++ scsi_device_put(sdev); ++ } ++} ++ ++static void vhba_scan_devices_remove (struct vhba_host *vhost, int bus, int id) ++{ ++ struct scsi_device *sdev; ++ ++ sdev = scsi_device_lookup(vhost->shost, bus, id, 0); ++ if (sdev) { ++ scsi_remove_device(sdev); ++ scsi_device_put(sdev); ++ } else { ++ dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device %d:%d:0!\n", bus, id); ++ } ++} ++ ++static void vhba_scan_devices (struct work_struct *work) ++{ ++ struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices); ++ unsigned long flags; ++ int change, exists; ++ unsigned int devnum; ++ unsigned int bus, id; ++ ++ for (;;) { ++ spin_lock_irqsave(&vhost->dev_lock, flags); ++ ++ devnum = find_first_bit(vhost->chgmap, VHBA_MAX_DEVICES); ++ if (devnum >= VHBA_MAX_DEVICES) { ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ break; ++ } ++ change = vhost->chgtype[devnum]; ++ exists = vhost->devices[devnum] != NULL; ++ ++ vhost->chgtype[devnum] = 0; ++ clear_bit(devnum, vhost->chgmap); ++ ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ ++ devnum_to_bus_and_id(devnum, &bus, &id); ++ ++ if (change < 0) { ++ dev_dbg(&vhost->shost->shost_gendev, "trying to remove target %d:%d:0\n", bus, id); ++ vhba_scan_devices_remove(vhost, bus, id); ++ } else if (change > 0) { ++ dev_dbg(&vhost->shost->shost_gendev, "trying to add target %d:%d:0\n", bus, id); ++ vhba_scan_devices_add(vhost, bus, id); ++ } else { ++ /* quick sequence of add/remove or remove/add; we determine ++ which one it was by checking if device structure exists */ ++ if (exists) { ++ /* remove followed by add: remove and (re)add */ ++ dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target %d:%d:0\n", bus, id); ++ vhba_scan_devices_remove(vhost, bus, id); ++ vhba_scan_devices_add(vhost, bus, id); ++ } else { ++ /* add followed by remove: no-op */ ++ dev_dbg(&vhost->shost->shost_gendev, "no-op for target %d:%d:0\n", bus, id); ++ } ++ } ++ } ++} ++ ++static int vhba_add_device (struct vhba_device *vdev) ++{ ++ struct vhba_host *vhost; ++ unsigned int devnum; ++ unsigned long flags; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ vhba_device_get(vdev); ++ ++ spin_lock_irqsave(&vhost->dev_lock, flags); ++ if (vhost->num_devices >= VHBA_MAX_DEVICES) { ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ vhba_device_put(vdev); ++ return -EBUSY; ++ } ++ ++ for (devnum = 0; devnum < VHBA_MAX_DEVICES; devnum++) { ++ if (vhost->devices[devnum] == NULL) { ++ vdev->num = devnum; ++ vhost->devices[devnum] = vdev; ++ vhost->num_devices++; ++ set_bit(devnum, vhost->chgmap); ++ vhost->chgtype[devnum]++; ++ break; ++ } ++ } ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ ++ schedule_work(&vhost->scan_devices); ++ ++ return 0; ++} ++ ++static int vhba_remove_device (struct vhba_device *vdev) ++{ ++ struct vhba_host *vhost; ++ unsigned long flags; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ spin_lock_irqsave(&vhost->dev_lock, flags); ++ set_bit(vdev->num, vhost->chgmap); ++ vhost->chgtype[vdev->num]--; ++ vhost->devices[vdev->num] = NULL; ++ vhost->num_devices--; ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ ++ vhba_device_put(vdev); ++ ++ schedule_work(&vhost->scan_devices); ++ ++ return 0; ++} ++ ++static struct vhba_device *vhba_lookup_device (int devnum) ++{ ++ struct vhba_host *vhost; ++ struct vhba_device *vdev = NULL; ++ unsigned long flags; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ if (likely(devnum < VHBA_MAX_DEVICES)) { ++ spin_lock_irqsave(&vhost->dev_lock, flags); ++ vdev = vhost->devices[devnum]; ++ if (vdev) { ++ vdev = vhba_device_get(vdev); ++ } ++ ++ spin_unlock_irqrestore(&vhost->dev_lock, flags); ++ } ++ ++ return vdev; ++} ++ ++static struct vhba_command *vhba_alloc_command (void) ++{ ++ struct vhba_host *vhost; ++ struct vhba_command *vcmd; ++ unsigned long flags; ++ int i; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ spin_lock_irqsave(&vhost->cmd_lock, flags); ++ ++ vcmd = vhost->commands + vhost->cmd_next++; ++ if (vcmd->status != VHBA_REQ_FREE) { ++ for (i = 0; i < vhba_can_queue; i++) { ++ vcmd = vhost->commands + i; ++ ++ if (vcmd->status == VHBA_REQ_FREE) { ++ vhost->cmd_next = i + 1; ++ break; ++ } ++ } ++ ++ if (i == vhba_can_queue) { ++ vcmd = NULL; ++ } ++ } ++ ++ if (vcmd) { ++ vcmd->status = VHBA_REQ_PENDING; ++ } ++ ++ vhost->cmd_next %= vhba_can_queue; ++ ++ spin_unlock_irqrestore(&vhost->cmd_lock, flags); ++ ++ return vcmd; ++} ++ ++static void vhba_free_command (struct vhba_command *vcmd) ++{ ++ struct vhba_host *vhost; ++ unsigned long flags; ++ ++ vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ spin_lock_irqsave(&vhost->cmd_lock, flags); ++ vcmd->status = VHBA_REQ_FREE; ++ spin_unlock_irqrestore(&vhost->cmd_lock, flags); ++} ++ ++static int vhba_queuecommand (struct Scsi_Host *shost, struct scsi_cmnd *cmd) ++{ ++ struct vhba_device *vdev; ++ int retval; ++ unsigned int devnum; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) ++ scmd_dbg(cmd, "queue %p tag %i\n", cmd, scsi_cmd_to_rq(cmd)->tag); ++#else ++ scmd_dbg(cmd, "queue %p tag %i\n", cmd, cmd->request->tag); ++#endif ++ ++ devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); ++ vdev = vhba_lookup_device(devnum); ++ if (!vdev) { ++ scmd_dbg(cmd, "no such device\n"); ++ ++ cmd->result = DID_NO_CONNECT << 16; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) ++ scsi_done(cmd); ++#else ++ cmd->scsi_done(cmd); ++#endif ++ ++ return 0; ++ } ++ ++ retval = vhba_device_queue(vdev, cmd); ++ ++ vhba_device_put(vdev); ++ ++ return retval; ++} ++ ++static int vhba_abort (struct scsi_cmnd *cmd) ++{ ++ struct vhba_device *vdev; ++ int retval = SUCCESS; ++ unsigned int devnum; ++ ++ scmd_dbg(cmd, "abort %p\n", cmd); ++ ++ devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); ++ vdev = vhba_lookup_device(devnum); ++ if (vdev) { ++ retval = vhba_device_dequeue(vdev, cmd); ++ vhba_device_put(vdev); ++ } else { ++ cmd->result = DID_NO_CONNECT << 16; ++ } ++ ++ return retval; ++} ++ ++static struct scsi_host_template vhba_template = { ++ .module = THIS_MODULE, ++ .name = "vhba", ++ .proc_name = "vhba", ++ .queuecommand = vhba_queuecommand, ++ .eh_abort_handler = vhba_abort, ++ .this_id = -1, ++ .max_sectors = VHBA_MAX_SECTORS_PER_IO, ++ .sg_tablesize = 256, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) ++ .slave_alloc = vhba_slave_alloc, ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) ++ .tag_alloc_policy = BLK_TAG_ALLOC_RR, ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) ++ .use_blk_tags = 1, ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) ++ .max_segment_size = VHBA_KBUF_SIZE, ++#endif ++}; ++ ++static ssize_t do_request (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, char __user *buf, size_t buf_len) ++{ ++ struct vhba_request vreq; ++ ssize_t ret; ++ ++ scmd_dbg(cmd, "request %lu (%p), cdb 0x%x, bufflen %d, sg count %d\n", ++ metatag, cmd, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd)); ++ ++ ret = sizeof(vreq); ++ if (DATA_TO_DEVICE(cmd->sc_data_direction)) { ++ ret += scsi_bufflen(cmd); ++ } ++ ++ if (ret > buf_len) { ++ scmd_dbg(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret); ++ return -EIO; ++ } ++ ++ vreq.metatag = metatag; ++ vreq.lun = cmd->device->lun; ++ memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE); ++ vreq.cdb_len = cmd->cmd_len; ++ vreq.data_len = scsi_bufflen(cmd); ++ ++ if (copy_to_user(buf, &vreq, sizeof(vreq))) { ++ return -EFAULT; ++ } ++ ++ if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) { ++ buf += sizeof(vreq); ++ ++ if (scsi_sg_count(cmd)) { ++ unsigned char *kaddr, *uaddr; ++ struct scatterlist *sglist = scsi_sglist(cmd); ++ struct scatterlist *sg; ++ int i; ++ ++ uaddr = (unsigned char *) buf; ++ ++ for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { ++ size_t len = sg->length; ++ ++ if (len > vdev->kbuf_size) { ++ scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); ++ len = vdev->kbuf_size; ++ } ++ ++ kaddr = kmap_atomic(sg_page(sg)); ++ memcpy(vdev->kbuf, kaddr + sg->offset, len); ++ kunmap_atomic(kaddr); ++ ++ if (copy_to_user(uaddr, vdev->kbuf, len)) { ++ return -EFAULT; ++ } ++ uaddr += len; ++ } ++ } else { ++ if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) { ++ return -EFAULT; ++ } ++ } ++ } ++ ++ return ret; ++} ++ ++static ssize_t do_response (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res) ++{ ++ ssize_t ret = 0; ++ ++ scmd_dbg(cmd, "response %lu (%p), status %x, data len %d, sg count %d\n", ++ metatag, cmd, res->status, res->data_len, scsi_sg_count(cmd)); ++ ++ if (res->status) { ++ if (res->data_len > SCSI_SENSE_BUFFERSIZE) { ++ scmd_dbg(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len); ++ res->data_len = SCSI_SENSE_BUFFERSIZE; ++ } ++ ++ if (copy_from_user(cmd->sense_buffer, buf, res->data_len)) { ++ return -EFAULT; ++ } ++ ++ cmd->result = res->status; ++ ++ ret += res->data_len; ++ } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) { ++ size_t to_read; ++ ++ if (res->data_len > scsi_bufflen(cmd)) { ++ scmd_dbg(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len); ++ res->data_len = scsi_bufflen(cmd); ++ } ++ ++ to_read = res->data_len; ++ ++ if (scsi_sg_count(cmd)) { ++ unsigned char *kaddr, *uaddr; ++ struct scatterlist *sglist = scsi_sglist(cmd); ++ struct scatterlist *sg; ++ int i; ++ ++ uaddr = (unsigned char *)buf; ++ ++ for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { ++ size_t len = (sg->length < to_read) ? sg->length : to_read; ++ ++ if (len > vdev->kbuf_size) { ++ scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); ++ len = vdev->kbuf_size; ++ } ++ ++ if (copy_from_user(vdev->kbuf, uaddr, len)) { ++ return -EFAULT; ++ } ++ uaddr += len; ++ ++ kaddr = kmap_atomic(sg_page(sg)); ++ memcpy(kaddr + sg->offset, vdev->kbuf, len); ++ kunmap_atomic(kaddr); ++ ++ to_read -= len; ++ if (to_read == 0) { ++ break; ++ } ++ } ++ } else { ++ if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) { ++ return -EFAULT; ++ } ++ ++ to_read -= res->data_len; ++ } ++ ++ scsi_set_resid(cmd, to_read); ++ ++ ret += res->data_len - to_read; ++ } ++ ++ return ret; ++} ++ ++static struct vhba_command *next_command (struct vhba_device *vdev) ++{ ++ struct vhba_command *vcmd; ++ ++ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { ++ if (vcmd->status == VHBA_REQ_PENDING) { ++ break; ++ } ++ } ++ ++ if (&vcmd->entry == &vdev->cmd_list) { ++ vcmd = NULL; ++ } ++ ++ return vcmd; ++} ++ ++static struct vhba_command *match_command (struct vhba_device *vdev, __u32 metatag) ++{ ++ struct vhba_command *vcmd; ++ ++ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { ++ if (vcmd->metatag == metatag) { ++ break; ++ } ++ } ++ ++ if (&vcmd->entry == &vdev->cmd_list) { ++ vcmd = NULL; ++ } ++ ++ return vcmd; ++} ++ ++static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags) ++{ ++ struct vhba_command *vcmd; ++ DEFINE_WAIT(wait); ++ ++ while (!(vcmd = next_command(vdev))) { ++ if (signal_pending(current)) { ++ break; ++ } ++ ++ prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE); ++ ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ schedule(); ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ } ++ ++ finish_wait(&vdev->cmd_wq, &wait); ++ if (vcmd) { ++ vcmd->status = VHBA_REQ_READING; ++ } ++ ++ return vcmd; ++} ++ ++static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset) ++{ ++ struct vhba_device *vdev; ++ struct vhba_command *vcmd; ++ ssize_t ret; ++ unsigned long flags; ++ ++ vdev = file->private_data; ++ ++ /* Get next command */ ++ if (file->f_flags & O_NONBLOCK) { ++ /* Non-blocking variant */ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ vcmd = next_command(vdev); ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ if (!vcmd) { ++ return -EWOULDBLOCK; ++ } ++ } else { ++ /* Blocking variant */ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ vcmd = wait_command(vdev, flags); ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ if (!vcmd) { ++ return -ERESTARTSYS; ++ } ++ } ++ ++ ret = do_request(vdev, vcmd->metatag, vcmd->cmd, buf, buf_len); ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ if (ret >= 0) { ++ vcmd->status = VHBA_REQ_SENT; ++ *offset += ret; ++ } else { ++ vcmd->status = VHBA_REQ_PENDING; ++ } ++ ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ return ret; ++} ++ ++static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset) ++{ ++ struct vhba_device *vdev; ++ struct vhba_command *vcmd; ++ struct vhba_response res; ++ ssize_t ret; ++ unsigned long flags; ++ ++ if (buf_len < sizeof(res)) { ++ return -EIO; ++ } ++ ++ if (copy_from_user(&res, buf, sizeof(res))) { ++ return -EFAULT; ++ } ++ ++ vdev = file->private_data; ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ vcmd = match_command(vdev, res.metatag); ++ if (!vcmd || vcmd->status != VHBA_REQ_SENT) { ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ pr_debug("ctl dev #%u not expecting response\n", vdev->num); ++ return -EIO; ++ } ++ vcmd->status = VHBA_REQ_WRITING; ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ ret = do_response(vdev, vcmd->metatag, vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res); ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ if (ret >= 0) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) ++ scsi_done(vcmd->cmd); ++#else ++ vcmd->cmd->scsi_done(vcmd->cmd); ++#endif ++ ret += sizeof(res); ++ ++ /* don't compete with vhba_device_dequeue */ ++ if (!list_empty(&vcmd->entry)) { ++ list_del_init(&vcmd->entry); ++ vhba_free_command(vcmd); ++ } ++ } else { ++ vcmd->status = VHBA_REQ_SENT; ++ } ++ ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ return ret; ++} ++ ++static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ struct vhba_device *vdev = file->private_data; ++ struct vhba_host *vhost = platform_get_drvdata(&vhba_platform_device); ++ ++ switch (cmd) { ++ case 0xBEEF001: { ++ unsigned int ident[4]; /* host, channel, id, lun */ ++ ++ ident[0] = vhost->shost->host_no; ++ devnum_to_bus_and_id(vdev->num, &ident[1], &ident[2]); ++ ident[3] = 0; /* lun */ ++ ++ if (copy_to_user((void *) arg, ident, sizeof(ident))) { ++ return -EFAULT; ++ } ++ ++ return 0; ++ } ++ case 0xBEEF002: { ++ unsigned int devnum = vdev->num; ++ ++ if (copy_to_user((void *) arg, &devnum, sizeof(devnum))) { ++ return -EFAULT; ++ } ++ ++ return 0; ++ } ++ } ++ ++ return -ENOTTY; ++} ++ ++#ifdef CONFIG_COMPAT ++static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ unsigned long compat_arg = (unsigned long)compat_ptr(arg); ++ return vhba_ctl_ioctl(file, cmd, compat_arg); ++} ++#endif ++ ++static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait) ++{ ++ struct vhba_device *vdev = file->private_data; ++ unsigned int mask = 0; ++ unsigned long flags; ++ ++ poll_wait(file, &vdev->cmd_wq, wait); ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ if (next_command(vdev)) { ++ mask |= POLLIN | POLLRDNORM; ++ } ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ return mask; ++} ++ ++static int vhba_ctl_open (struct inode *inode, struct file *file) ++{ ++ struct vhba_device *vdev; ++ int retval; ++ ++ pr_debug("ctl dev open\n"); ++ ++ /* check if vhba is probed */ ++ if (!platform_get_drvdata(&vhba_platform_device)) { ++ return -ENODEV; ++ } ++ ++ vdev = vhba_device_alloc(); ++ if (!vdev) { ++ return -ENOMEM; ++ } ++ ++ vdev->kbuf_size = VHBA_KBUF_SIZE; ++ vdev->kbuf = kzalloc(vdev->kbuf_size, GFP_KERNEL); ++ if (!vdev->kbuf) { ++ return -ENOMEM; ++ } ++ ++ if (!(retval = vhba_add_device(vdev))) { ++ file->private_data = vdev; ++ } ++ ++ vhba_device_put(vdev); ++ ++ return retval; ++} ++ ++static int vhba_ctl_release (struct inode *inode, struct file *file) ++{ ++ struct vhba_device *vdev; ++ struct vhba_command *vcmd; ++ unsigned long flags; ++ ++ vdev = file->private_data; ++ ++ pr_debug("ctl dev release\n"); ++ ++ vhba_device_get(vdev); ++ vhba_remove_device(vdev); ++ ++ spin_lock_irqsave(&vdev->cmd_lock, flags); ++ list_for_each_entry(vcmd, &vdev->cmd_list, entry) { ++ WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING); ++ ++ scmd_dbg(vcmd->cmd, "device released with command %lu (%p)\n", vcmd->metatag, vcmd->cmd); ++ vcmd->cmd->result = DID_NO_CONNECT << 16; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) ++ scsi_done(vcmd->cmd); ++#else ++ vcmd->cmd->scsi_done(vcmd->cmd); ++#endif ++ vhba_free_command(vcmd); ++ } ++ INIT_LIST_HEAD(&vdev->cmd_list); ++ spin_unlock_irqrestore(&vdev->cmd_lock, flags); ++ ++ kfree(vdev->kbuf); ++ vdev->kbuf = NULL; ++ ++ vhba_device_put(vdev); ++ ++ return 0; ++} ++ ++static struct file_operations vhba_ctl_fops = { ++ .owner = THIS_MODULE, ++ .open = vhba_ctl_open, ++ .release = vhba_ctl_release, ++ .read = vhba_ctl_read, ++ .write = vhba_ctl_write, ++ .poll = vhba_ctl_poll, ++ .unlocked_ioctl = vhba_ctl_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = vhba_ctl_compat_ioctl, ++#endif ++}; ++ ++static struct miscdevice vhba_miscdev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "vhba_ctl", ++ .fops = &vhba_ctl_fops, ++}; ++ ++static int vhba_probe (struct platform_device *pdev) ++{ ++ struct Scsi_Host *shost; ++ struct vhba_host *vhost; ++ int i; ++ ++ vhba_can_queue = clamp(vhba_can_queue, 1, 256); ++ ++ shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host)); ++ if (!shost) { ++ return -ENOMEM; ++ } ++ ++ shost->max_channel = VHBA_MAX_BUS-1; ++ shost->max_id = VHBA_MAX_ID; ++ /* we don't support lun > 0 */ ++ shost->max_lun = 1; ++ shost->max_cmd_len = MAX_COMMAND_SIZE; ++ shost->can_queue = vhba_can_queue; ++ shost->cmd_per_lun = vhba_can_queue; ++ ++ vhost = (struct vhba_host *)shost->hostdata; ++ memset(vhost, 0, sizeof(struct vhba_host)); ++ ++ vhost->shost = shost; ++ vhost->num_devices = 0; ++ spin_lock_init(&vhost->dev_lock); ++ spin_lock_init(&vhost->cmd_lock); ++ INIT_WORK(&vhost->scan_devices, vhba_scan_devices); ++ vhost->cmd_next = 0; ++ vhost->commands = kzalloc(vhba_can_queue * sizeof(struct vhba_command), GFP_KERNEL); ++ if (!vhost->commands) { ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < vhba_can_queue; i++) { ++ vhost->commands[i].status = VHBA_REQ_FREE; ++ } ++ ++ platform_set_drvdata(pdev, vhost); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) ++ i = scsi_init_shared_tag_map(shost, vhba_can_queue); ++ if (i) return i; ++#endif ++ ++ if (scsi_add_host(shost, &pdev->dev)) { ++ scsi_host_put(shost); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) ++static int vhba_remove (struct platform_device *pdev) ++#else ++static void vhba_remove (struct platform_device *pdev) ++#endif ++{ ++ struct vhba_host *vhost; ++ struct Scsi_Host *shost; ++ ++ vhost = platform_get_drvdata(pdev); ++ shost = vhost->shost; ++ ++ scsi_remove_host(shost); ++ scsi_host_put(shost); ++ ++ kfree(vhost->commands); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) ++ return 0; ++#endif ++} ++ ++static void vhba_release (struct device * dev) ++{ ++ return; ++} ++ ++static struct platform_device vhba_platform_device = { ++ .name = "vhba", ++ .id = -1, ++ .dev = { ++ .release = vhba_release, ++ }, ++}; ++ ++static struct platform_driver vhba_platform_driver = { ++ .driver = { ++ .owner = THIS_MODULE, ++ .name = "vhba", ++ }, ++ .probe = vhba_probe, ++ .remove = vhba_remove, ++}; ++ ++static int __init vhba_init (void) ++{ ++ int ret; ++ ++ ret = platform_device_register(&vhba_platform_device); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ ret = platform_driver_register(&vhba_platform_driver); ++ if (ret < 0) { ++ platform_device_unregister(&vhba_platform_device); ++ return ret; ++ } ++ ++ ret = misc_register(&vhba_miscdev); ++ if (ret < 0) { ++ platform_driver_unregister(&vhba_platform_driver); ++ platform_device_unregister(&vhba_platform_device); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void __exit vhba_exit(void) ++{ ++ misc_deregister(&vhba_miscdev); ++ platform_driver_unregister(&vhba_platform_driver); ++ platform_device_unregister(&vhba_platform_device); ++} ++ ++module_init(vhba_init); ++module_exit(vhba_exit); ++ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 68a5f1ff3301..291873a34079 100644 --- a/include/linux/pagemap.h @@ -11343,9 +13104,9 @@ index 2b698f8419fe..fd039c41d1c8 100644 -- 2.47.0 -From 51fb944d4a6a9614b40bfd6faf5c9874cf8c714c Mon Sep 17 00:00:00 2001 +From 938a1998dd5f44d955fb93e9764abda4ca66a057 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:23:23 +0100 +Date: Thu, 21 Nov 2024 22:00:08 +0100 Subject: [PATCH 06/12] crypto Signed-off-by: Peter Jung @@ -12948,16 +14709,16 @@ index bbcff1fb78cb..752812bc4991 100644 -- 2.47.0 -From e6de96946ecba842fbe80d4bd92801f00bb7bf76 Mon Sep 17 00:00:00 2001 +From 8c7a03d045e3dcbbbc6c21a4ed733cc4dcbf91ad Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:24:27 +0100 +Date: Thu, 21 Nov 2024 22:00:19 +0100 Subject: [PATCH 07/12] fixes Signed-off-by: Peter Jung --- arch/Kconfig | 4 +- arch/x86/kernel/alternative.c | 10 +- - arch/x86/mm/tlb.c | 20 +-- + arch/x86/mm/tlb.c | 22 +-- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 30 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 +- @@ -12989,8 +14750,9 @@ Signed-off-by: Peter Jung init/Kconfig | 8 + kernel/workqueue.c | 22 ++- lib/overflow_kunit.c | 2 +- + mm/mmap.c | 1 + scripts/package/PKGBUILD | 5 + - 35 files changed, 865 insertions(+), 726 deletions(-) + 36 files changed, 867 insertions(+), 727 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 00551f340dbe..833b2344ce79 100644 @@ -13048,9 +14810,18 @@ index d17518ca19b8..8b66a555d2f0 100644 { memcpy(dst, src, len); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 86593d1b787d..9d0d34576928 100644 +index 86593d1b787d..1aac4fa90d3d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c +@@ -568,7 +568,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, + * mm_cpumask. The TLB shootdown code can figure out from + * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. + */ +- if (WARN_ON_ONCE(prev != &init_mm && ++ if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm && + !cpumask_test_cpu(cpu, mm_cpumask(next)))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + @@ -606,18 +606,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); @@ -13195,7 +14966,7 @@ index f1ffab5a1eae..15614e43be5a 100644 } } else { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c -index 51dea35848f6..95f61b48373e 100644 +index 44f0b159d232..953fcd37749b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -72,6 +72,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit); @@ -13400,7 +15171,7 @@ index 51dea35848f6..95f61b48373e 100644 return 0; } -@@ -3059,12 +3063,48 @@ static int smu_set_power_profile_mode(void *handle, +@@ -3063,12 +3067,48 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; @@ -14135,10 +15906,10 @@ index cc0504b063fa..70dd631c46dc 100644 static int renoir_set_peak_clock_by_device(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -index d53e162dcd8d..dc08f8fd0f31 100644 +index 24675a1d98db..9dfa01db0ec0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -@@ -2477,82 +2477,76 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu, +@@ -2583,82 +2583,76 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu, return size; } @@ -14278,7 +16049,7 @@ index d53e162dcd8d..dc08f8fd0f31 100644 /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && -@@ -2564,15 +2558,26 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, +@@ -2670,15 +2664,26 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); if (workload_type >= 0) @@ -14312,10 +16083,10 @@ index d53e162dcd8d..dc08f8fd0f31 100644 return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -index b891a5e0a396..a10e66a691ec 100644 +index 50d16301f3eb..3ae328348d6f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -@@ -2434,78 +2434,87 @@ do { \ +@@ -2540,78 +2540,87 @@ do { \ return result; } @@ -15595,6 +17366,18 @@ index 2abc78367dd1..5222c6393f11 100644 int expected_raw_size = sizeof(struct foo); #else int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16); +diff --git a/mm/mmap.c b/mm/mmap.c +index 79d541f1502b..2f01f1a8e304 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -901,6 +901,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + if (get_area) { + addr = get_area(file, addr, len, pgoff, flags); + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ++ && !addr /* no hint */ + && IS_ALIGNED(len, PMD_SIZE)) { + /* Ensures that larger anonymous mappings are THP aligned. */ + addr = thp_get_unmapped_area_vmflags(file, addr, len, diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index f83493838cf9..4010899652b8 100644 --- a/scripts/package/PKGBUILD @@ -15614,9 +17397,47 @@ index f83493838cf9..4010899652b8 100644 -- 2.47.0 -From b1cd5f38df8ffaf4555bbb9bd1f5f0da04f0f181 Mon Sep 17 00:00:00 2001 + +From 13bbddce3f91aced56362376c07e7c1a6ecf89b0 Mon Sep 17 00:00:00 2001 +From: Hao Qin +Date: Thu, 22 Aug 2024 13:23:10 +0800 +Subject: [PATCH] Bluetooth: btmtk: Remove resetting mt7921 before downloading + the fw + +Remove resetting mt7921 before downloading the fw, as it may cause +command timeout when performing the reset. + +Signed-off-by: Hao Qin +--- + drivers/bluetooth/btmtk.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c +index 9bbf205021634f..67219ac262f131 100644 +--- a/drivers/bluetooth/btmtk.c ++++ b/drivers/bluetooth/btmtk.c +@@ -1326,7 +1326,6 @@ int btmtk_usb_setup(struct hci_dev *hdev) + fwname = FIRMWARE_MT7668; + break; + case 0x7922: +- case 0x7961: + case 0x7925: + /* Reset the device to ensure it's in the initial state before + * downloading the firmware to ensure. +@@ -1334,7 +1333,8 @@ int btmtk_usb_setup(struct hci_dev *hdev) + + if (!test_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags)) + btmtk_usb_subsys_reset(hdev, dev_id); +- ++ fallthrough; ++ case 0x7961: + btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, + fw_version, fw_flavor); + + +From 5aa831042dc384bd259eb368b4a02ae807ce9d39 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:24:49 +0100 +Date: Thu, 21 Nov 2024 22:00:34 +0100 Subject: [PATCH 08/12] ntsync Signed-off-by: Peter Jung @@ -18703,20 +20524,21 @@ index 000000000000..5fa2c9a0768c -- 2.47.0 -From ebc7783bf7cb1cd5026930a23092d7fbacdd22e5 Mon Sep 17 00:00:00 2001 +From d16820bc5addd0ce3fed3e018234022553529bcb Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:25:32 +0100 +Date: Thu, 21 Nov 2024 22:00:57 +0100 Subject: [PATCH 09/12] perf-per-core Signed-off-by: Peter Jung --- Documentation/arch/x86/topology.rst | 4 + - arch/x86/events/rapl.c | 408 ++++++++++++++++++-------- + arch/x86/events/rapl.c | 507 ++++++++++++++------------ arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/debugfs.c | 1 + arch/x86/kernel/cpu/topology_common.c | 1 + - 6 files changed, 288 insertions(+), 128 deletions(-) + include/linux/cpuhotplug.h | 1 - + 7 files changed, 288 insertions(+), 228 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 7352ab89a55a..c12837e61bda 100644 @@ -18734,21 +20556,21 @@ index 7352ab89a55a..c12837e61bda 100644 System topology examples diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c -index a481a939862e..6b405bf46781 100644 +index a481a939862e..d3bb3865c1b1 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -39,6 +39,10 @@ * event: rapl_energy_psys * perf code: 0x5 * -+ * per_core counter: consumption of a single physical core -+ * event: rapl_energy_per_core (power_per_core PMU) ++ * core counter: consumption of a single physical core ++ * event: rapl_energy_core (power_core PMU) + * perf code: 0x1 + * * We manage those counters as free running (read-only). They may be * use simultaneously by other tools, such as turbostat. * -@@ -70,18 +74,25 @@ MODULE_LICENSE("GPL"); +@@ -70,18 +74,22 @@ MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ @@ -18764,43 +20586,46 @@ index a481a939862e..6b405bf46781 100644 - NR_RAPL_DOMAINS = PERF_RAPL_MAX, + PERF_RAPL_PKG_EVENTS_MAX, + NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX, -+}; -+ -+enum perf_rapl_core_events { -+ PERF_RAPL_PER_CORE = 0, /* per-core */ -+ -+ PERF_RAPL_CORE_EVENTS_MAX, -+ NR_RAPL_CORE_DOMAINS = PERF_RAPL_CORE_EVENTS_MAX, }; -static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { ++#define PERF_RAPL_CORE 0 /* single core */ ++#define PERF_RAPL_CORE_EVENTS_MAX 1 ++#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX ++ +static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = { "pp0-core", "package", "dram", -@@ -89,6 +100,10 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { +@@ -89,6 +97,8 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; -+static const char *const rapl_core_domain_names[NR_RAPL_CORE_DOMAINS] __initconst = { -+ "per-core", -+}; ++static const char *const rapl_core_domain_name __initconst = "core"; + /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved -@@ -128,8 +143,9 @@ struct rapl_pmu { +@@ -112,7 +122,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \ + * considered as either pkg-scope or die-scope, and we are considering + * them as die-scope. + */ +-#define rapl_pmu_is_pkg_scope() \ ++#define rapl_pkg_pmu_is_pkg_scope() \ + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +@@ -129,7 +139,8 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; -+ cpumask_t cpumask; unsigned int nr_rapl_pmu; - struct rapl_pmu *pmus[] __counted_by(nr_rapl_pmu); ++ unsigned int cntr_mask; + struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu); }; enum rapl_unit_quirk { -@@ -139,19 +155,22 @@ enum rapl_unit_quirk { +@@ -139,45 +150,43 @@ enum rapl_unit_quirk { }; struct rapl_model { @@ -18819,37 +20644,94 @@ index a481a939862e..6b405bf46781 100644 -static struct rapl_pmus *rapl_pmus; -static cpumask_t rapl_cpu_mask; -static unsigned int rapl_cntr_mask; -+static int rapl_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; ++static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; ++static int rapl_core_hw_unit __read_mostly; +static struct rapl_pmus *rapl_pmus_pkg; +static struct rapl_pmus *rapl_pmus_core; -+static unsigned int rapl_pkg_cntr_mask; -+static unsigned int rapl_core_cntr_mask; static u64 rapl_timer_ms; -static struct perf_msr *rapl_msrs; +static struct rapl_model *rapl_model; /* - * Helper functions to get the correct topology macros according to the -@@ -177,7 +196,8 @@ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) - * The unsigned check also catches the '-1' return value for non - * existent mappings in the topology map. +- * Helper functions to get the correct topology macros according to the ++ * Helper function to get the correct topology id according to the + * RAPL PMU scope. + */ +-static inline unsigned int get_rapl_pmu_idx(int cpu) +-{ +- return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) : +- topology_logical_die_id(cpu); +-} +- +-static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu) ++static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) + { +- return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) : +- topology_die_cpumask(cpu); +-} +- +-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +-{ +- unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu); +- + /* +- * The unsigned check also catches the '-1' return value for non +- * existent mappings in the topology map. ++ * Returns unsigned int, which converts the '-1' return value ++ * (for non-existent mappings in topology map) to UINT_MAX, so ++ * the error check in the caller is simplified. */ - return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL; -+ return rapl_pmu_idx < rapl_pmus_pkg->nr_rapl_pmu ? -+ rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx] : NULL; ++ switch (scope) { ++ case PERF_PMU_SCOPE_PKG: ++ return topology_logical_package_id(cpu); ++ case PERF_PMU_SCOPE_DIE: ++ return topology_logical_die_id(cpu); ++ case PERF_PMU_SCOPE_CORE: ++ return topology_logical_core_id(cpu); ++ default: ++ return -EINVAL; ++ } } static inline u64 rapl_read_counter(struct perf_event *event) -@@ -189,7 +209,7 @@ static inline u64 rapl_read_counter(struct perf_event *event) +@@ -187,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event) + return raw; + } - static inline u64 rapl_scale(u64 v, int cfg) +-static inline u64 rapl_scale(u64 v, int cfg) ++static inline u64 rapl_scale(u64 v, struct perf_event *event) { - if (cfg > NR_RAPL_DOMAINS) { -+ if (cfg > NR_RAPL_PKG_DOMAINS) { - pr_warn("Invalid domain %d, failed to scale data\n", cfg); - return v; - } -@@ -241,34 +261,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) +- pr_warn("Invalid domain %d, failed to scale data\n", cfg); +- return v; +- } ++ int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1]; ++ ++ if (event->pmu->scope == PERF_PMU_SCOPE_CORE) ++ hw_unit = rapl_core_hw_unit; ++ + /* + * scale delta to smallest unit (1/2^32) + * users must then scale back: count * 1/(1e9*2^32) to get Joules + * or use ldexp(count, -32). + * Watts = Joules/Time delta + */ +- return v << (32 - rapl_hw_unit[cfg - 1]); ++ return v << (32 - hw_unit); + } + + static u64 rapl_event_update(struct perf_event *event) +@@ -226,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event) + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + +- sdelta = rapl_scale(delta, event->hw.config); ++ sdelta = rapl_scale(delta, event); + + local64_add(sdelta, &event->count); + +@@ -241,34 +251,34 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { @@ -18893,7 +20775,7 @@ index a481a939862e..6b405bf46781 100644 struct perf_event *event) { if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) -@@ -276,39 +296,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, +@@ -276,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, event->hw.state = 0; @@ -18947,7 +20829,7 @@ index a481a939862e..6b405bf46781 100644 list_del(&event->active_entry); -@@ -326,23 +346,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) +@@ -326,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } @@ -18976,160 +20858,170 @@ index a481a939862e..6b405bf46781 100644 return 0; } -@@ -356,10 +376,14 @@ static int rapl_pmu_event_init(struct perf_event *event) +@@ -355,12 +365,14 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) + static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, ret = 0; +- int bit, ret = 0; - struct rapl_pmu *pmu; ++ int bit, rapl_pmus_scope, ret = 0; + struct rapl_pmu *rapl_pmu; -+ struct rapl_pmus *curr_rapl_pmus; ++ unsigned int rapl_pmu_idx; ++ struct rapl_pmus *rapl_pmus; - /* only look at RAPL events */ +- /* only look at RAPL events */ - if (event->attr.type != rapl_pmus->pmu.type) -+ if (event->attr.type == rapl_pmus_pkg->pmu.type || -+ (rapl_pmus_core && event->attr.type == rapl_pmus_core->pmu.type)) -+ curr_rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); -+ else - return -ENOENT; +- return -ENOENT; ++ /* unsupported modes and filters */ ++ if (event->attr.sample_period) /* no sampling */ ++ return -EINVAL; /* check only supported bits are set */ -@@ -369,16 +393,18 @@ static int rapl_pmu_event_init(struct perf_event *event) + if (event->attr.config & ~RAPL_EVENT_MASK) +@@ -369,29 +381,49 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; -+ if (curr_rapl_pmus == rapl_pmus_pkg) -+ event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - +- - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) -+ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) ++ rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); ++ if (!rapl_pmus) ++ return -EINVAL; ++ rapl_pmus_scope = rapl_pmus->pmu.scope; ++ ++ if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { ++ /* only look at RAPL package events */ ++ if (event->attr.type != rapl_pmus_pkg->pmu.type) ++ return -ENOENT; ++ ++ cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); ++ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) ++ return -EINVAL; ++ ++ bit = cfg - 1; ++ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; ++ } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { ++ /* only look at RAPL core events */ ++ if (event->attr.type != rapl_pmus_core->pmu.type) ++ return -ENOENT; ++ ++ cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); ++ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) ++ return -EINVAL; ++ ++ bit = cfg - 1; ++ event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr; ++ } else return -EINVAL; - +- - cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); -+ cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); - bit = cfg - 1; +- bit = cfg - 1; /* check event supported */ - if (!(rapl_cntr_mask & (1 << bit))) -+ if (!(rapl_pkg_cntr_mask & (1 << bit)) && -+ !(rapl_core_cntr_mask & (1 << bit))) ++ if (!(rapl_pmus->cntr_mask & (1 << bit))) return -EINVAL; - /* unsupported modes and filters */ -@@ -386,12 +412,18 @@ static int rapl_pmu_event_init(struct perf_event *event) +- /* unsupported modes and filters */ +- if (event->attr.sample_period) /* no sampling */ ++ rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope); ++ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu) return -EINVAL; - +- /* must be done before validate_group */ - pmu = cpu_to_rapl_pmu(event->cpu); - if (!pmu) -+ if (curr_rapl_pmus == rapl_pmus_core) { -+ rapl_pmu = curr_rapl_pmus->rapl_pmu[topology_logical_core_id(event->cpu)]; -+ event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr; -+ } else { -+ rapl_pmu = curr_rapl_pmus->rapl_pmu[get_rapl_pmu_idx(event->cpu)]; -+ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; -+ } -+ ++ rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; + if (!rapl_pmu) return -EINVAL; - event->cpu = pmu->cpu; - event->pmu_private = pmu; - event->hw.event_base = rapl_msrs[bit].msr; -+ event->cpu = rapl_pmu->cpu; ++ + event->pmu_private = rapl_pmu; event->hw.config = cfg; event->hw.idx = bit; -@@ -406,7 +438,7 @@ static void rapl_pmu_event_read(struct perf_event *event) - static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) - { -- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -+ return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_pkg->cpumask); +@@ -403,34 +435,19 @@ static void rapl_pmu_event_read(struct perf_event *event) + rapl_event_update(event); } - static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); -@@ -420,17 +452,38 @@ static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, - }; - -+static ssize_t rapl_get_attr_per_core_cpumask(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return cpumap_print_to_pagebuf(true, buf, &rapl_pmus_core->cpumask); -+} -+ -+static struct device_attribute dev_attr_per_core_cpumask = __ATTR(cpumask, 0444, -+ rapl_get_attr_per_core_cpumask, -+ NULL); -+ -+static struct attribute *rapl_pmu_per_core_attrs[] = { -+ &dev_attr_per_core_cpumask.attr, -+ NULL, -+}; -+ -+static struct attribute_group rapl_pmu_per_core_attr_group = { -+ .attrs = rapl_pmu_per_core_attrs, -+}; -+ +-static ssize_t rapl_get_attr_cpumask(struct device *dev, +- struct device_attribute *attr, char *buf) +-{ +- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); +-} +- +-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); +- +-static struct attribute *rapl_pmu_attrs[] = { +- &dev_attr_cpumask.attr, +- NULL, +-}; +- +-static struct attribute_group rapl_pmu_attr_group = { +- .attrs = rapl_pmu_attrs, +-}; +- RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); -+RAPL_EVENT_ATTR_STR(energy-per-core, rapl_per_core, "event=0x01"); ++RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); -+RAPL_EVENT_ATTR_STR(energy-per-core.unit, rapl_per_core_unit, "Joules"); ++RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules"); /* * we compute in 0.23 nJ increments regardless of MSR -@@ -440,6 +493,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 +@@ -440,6 +457,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); -+RAPL_EVENT_ATTR_STR(energy-per-core.scale, rapl_per_core_scale, "2.3283064365386962890625e-10"); ++RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10"); /* * There are no default events, but we need to create -@@ -473,6 +527,13 @@ static const struct attribute_group *rapl_attr_groups[] = { - NULL, +@@ -467,7 +485,12 @@ static struct attribute_group rapl_pmu_format_group = { }; -+static const struct attribute_group *rapl_per_core_attr_groups[] = { -+ &rapl_pmu_per_core_attr_group, + static const struct attribute_group *rapl_attr_groups[] = { +- &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + - static struct attribute *rapl_events_cores[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_cores_unit), -@@ -533,6 +594,18 @@ static struct attribute_group rapl_events_psys_group = { ++static const struct attribute_group *rapl_core_attr_groups[] = { + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +@@ -533,6 +556,18 @@ static struct attribute_group rapl_events_psys_group = { .attrs = rapl_events_psys, }; -+static struct attribute *rapl_events_per_core[] = { -+ EVENT_PTR(rapl_per_core), -+ EVENT_PTR(rapl_per_core_unit), -+ EVENT_PTR(rapl_per_core_scale), ++static struct attribute *rapl_events_core[] = { ++ EVENT_PTR(rapl_core), ++ EVENT_PTR(rapl_core_unit), ++ EVENT_PTR(rapl_core_scale), + NULL, +}; + -+static struct attribute_group rapl_events_per_core_group = { ++static struct attribute_group rapl_events_core_group = { + .name = "events", -+ .attrs = rapl_events_per_core, ++ .attrs = rapl_events_core, +}; + static bool test_msr(int idx, void *data) { return test_bit(idx, (unsigned long *) data); -@@ -558,11 +631,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { +@@ -558,11 +593,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { }; /* @@ -19144,124 +21036,74 @@ index a481a939862e..6b405bf46781 100644 [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, -@@ -570,77 +643,104 @@ static struct perf_msr amd_rapl_msrs[] = { +@@ -570,77 +605,25 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; -static int rapl_cpu_offline(unsigned int cpu) -+static struct perf_msr amd_rapl_core_msrs[] = { -+ [PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group, -+ test_msr, false, RAPL_MSR_MASK }, -+}; -+ -+static int __rapl_cpu_offline(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx, -+ const struct cpumask *event_cpumask, unsigned int cpu) - { +-{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -+ struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; - int target; - - /* Check if exiting cpu is used for collecting rapl events */ +- int target; +- +- /* Check if exiting cpu is used for collecting rapl events */ - if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) -+ if (!cpumask_test_and_clear_cpu(cpu, &rapl_pmus->cpumask)) - return 0; - +- return 0; +- - pmu->cpu = -1; -+ rapl_pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ +- /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu); -+ target = cpumask_any_but(event_cpumask, cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { +- +- /* Migrate rapl events to the new target */ +- if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &rapl_cpu_mask); - pmu->cpu = target; - perf_pmu_migrate_context(pmu->pmu, cpu, target); -+ cpumask_set_cpu(target, &rapl_pmus->cpumask); -+ rapl_pmu->cpu = target; -+ perf_pmu_migrate_context(rapl_pmu->pmu, cpu, target); - } - return 0; - } - +- } +- return 0; +-} +- -static int rapl_cpu_online(unsigned int cpu) -+static int rapl_cpu_offline(unsigned int cpu) - { +-{ - s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu); - if (rapl_pmu_idx < 0) { - pr_err("topology_logical_(package/die)_id() returned a negative value"); - return -EINVAL; - } - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); -+ int ret = __rapl_cpu_offline(rapl_pmus_pkg, get_rapl_pmu_idx(cpu), -+ get_rapl_pmu_cpumask(cpu), cpu); -+ -+ if (ret == 0 && rapl_model->core_events) -+ ret = __rapl_cpu_offline(rapl_pmus_core, topology_logical_core_id(cpu), -+ topology_sibling_cpumask(cpu), cpu); -+ -+ return ret; -+} -+ -+static int __rapl_cpu_online(struct rapl_pmus *rapl_pmus, unsigned int rapl_pmu_idx, -+ const struct cpumask *event_cpumask, unsigned int cpu) -+{ -+ struct rapl_pmu *rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; - int target; - +- int target; +- - if (!pmu) { - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) -+ if (!rapl_pmu) { -+ rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu)); -+ if (!rapl_pmu) - return -ENOMEM; - +- return -ENOMEM; +- - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - rapl_hrtimer_init(pmu); -+ raw_spin_lock_init(&rapl_pmu->lock); -+ INIT_LIST_HEAD(&rapl_pmu->active_list); -+ rapl_pmu->pmu = &rapl_pmus->pmu; -+ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); -+ rapl_hrtimer_init(rapl_pmu); - +- - rapl_pmus->pmus[rapl_pmu_idx] = pmu; -+ rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ +- } +- +- /* +- * Check if there is an online cpu in the package which collects rapl +- * events already. +- */ - target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu)); -+ target = cpumask_any_and(&rapl_pmus->cpumask, event_cpumask); - if (target < nr_cpu_ids) - return 0; - +- if (target < nr_cpu_ids) +- return 0; +- - cpumask_set_cpu(cpu, &rapl_cpu_mask); - pmu->cpu = cpu; -+ cpumask_set_cpu(cpu, &rapl_pmus->cpumask); -+ rapl_pmu->cpu = cpu; - return 0; - } +- return 0; +-} ++static struct perf_msr amd_rapl_core_msrs[] = { ++ [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group, ++ test_msr, false, RAPL_MSR_MASK }, ++}; -static int rapl_check_hw_unit(struct rapl_model *rm) -+static int rapl_cpu_online(unsigned int cpu) -+{ -+ int ret = __rapl_cpu_online(rapl_pmus_pkg, get_rapl_pmu_idx(cpu), -+ get_rapl_pmu_cpumask(cpu), cpu); -+ -+ if (ret == 0 && rapl_model->core_events) -+ ret = __rapl_cpu_online(rapl_pmus_core, topology_logical_core_id(cpu), -+ topology_sibling_cpumask(cpu), cpu); -+ -+ return ret; -+} -+ -+ +static int rapl_check_hw_unit(void) { u64 msr_rapl_power_unit_bits; @@ -19272,37 +21114,75 @@ index a481a939862e..6b405bf46781 100644 + if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) return -1; - for (i = 0; i < NR_RAPL_DOMAINS; i++) +- rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; ++ rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; ++ ++ rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - switch (rm->unit_quirk) { + switch (rapl_model->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See -@@ -679,22 +779,29 @@ static void __init rapl_advertise(void) +@@ -648,17 +631,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm) + * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + */ + case RAPL_UNIT_QUIRK_INTEL_HSW: +- rapl_hw_unit[PERF_RAPL_RAM] = 16; ++ rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16; + break; + /* SPR uses a fixed energy unit for Psys domain. */ + case RAPL_UNIT_QUIRK_INTEL_SPR: +- rapl_hw_unit[PERF_RAPL_PSYS] = 0; ++ rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0; + break; + default: + break; + } + +- + /* + * Calculate the timer rate: + * Use reference of 200W for scaling the timeout to avoid counter +@@ -667,9 +649,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + rapl_timer_ms = 2; +- if (rapl_hw_unit[0] < 32) { ++ if (rapl_pkg_hw_unit[0] < 32) { + rapl_timer_ms = (1000 / (2 * 100)); +- rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); ++ rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1)); + } + return 0; + } +@@ -677,24 +659,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm) + static void __init rapl_advertise(void) + { int i; ++ int num_counters = hweight32(rapl_pmus_pkg->cntr_mask); ++ ++ if (rapl_pmus_core) ++ num_counters += hweight32(rapl_pmus_core->cntr_mask); pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), rapl_timer_ms); -+ hweight32(rapl_pkg_cntr_mask) + hweight32(rapl_core_cntr_mask), rapl_timer_ms); -+ -+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { -+ if (rapl_pkg_cntr_mask & (1 << i)) { -+ pr_info("hw unit of domain %s 2^-%d Joules\n", -+ rapl_pkg_domain_names[i], rapl_hw_unit[i]); -+ } -+ } ++ num_counters, rapl_timer_ms); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { -+ for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) { -+ if (rapl_core_cntr_mask & (1 << i)) { ++ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { ++ if (rapl_pmus_pkg->cntr_mask & (1 << i)) { pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); -+ rapl_core_domain_names[i], rapl_hw_unit[i]); ++ rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]); } } ++ ++ if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE))) ++ pr_info("hw unit of domain %s 2^-%d Joules\n", ++ rapl_core_domain_name, rapl_core_hw_unit); } -static void cleanup_rapl_pmus(void) @@ -19316,38 +21196,81 @@ index a481a939862e..6b405bf46781 100644 kfree(rapl_pmus); } -@@ -707,14 +814,17 @@ static const struct attribute_group *rapl_attr_update[] = { +@@ -707,17 +697,60 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; -static int __init init_rapl_pmus(void) --{ -- int nr_rapl_pmu = topology_max_packages(); -+static const struct attribute_group *rapl_per_core_attr_update[] = { -+ &rapl_events_per_core_group, ++static const struct attribute_group *rapl_core_attr_update[] = { ++ &rapl_events_core_group, ++ NULL, +}; ++ ++static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus) ++{ ++ struct rapl_pmu *rapl_pmu; ++ int idx; ++ ++ for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { ++ rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL); ++ if (!rapl_pmu) ++ goto free; ++ ++ raw_spin_lock_init(&rapl_pmu->lock); ++ INIT_LIST_HEAD(&rapl_pmu->active_list); ++ rapl_pmu->pmu = &rapl_pmus->pmu; ++ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); ++ rapl_hrtimer_init(rapl_pmu); ++ ++ rapl_pmus->rapl_pmu[idx] = rapl_pmu; ++ } ++ ++ return 0; ++free: ++ for (; idx > 0; idx--) ++ kfree(rapl_pmus->rapl_pmu[idx - 1]); ++ return -ENOMEM; ++} ++ ++static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope, ++ const struct attribute_group **rapl_attr_groups, ++ const struct attribute_group **rapl_attr_update) + { + int nr_rapl_pmu = topology_max_packages(); ++ struct rapl_pmus *rapl_pmus; - if (!rapl_pmu_is_pkg_scope()) - nr_rapl_pmu *= topology_max_dies_per_package(); -+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int nr_rapl_pmu, -+ const struct attribute_group **rapl_attr_groups, -+ const struct attribute_group **rapl_attr_update) -+{ -+ struct rapl_pmus *rapl_pmus; ++ /* ++ * rapl_pmu_scope must be either PKG, DIE or CORE ++ */ ++ if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE) ++ nr_rapl_pmu *= topology_max_dies_per_package(); ++ else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE) ++ nr_rapl_pmu *= topology_num_cores_per_package(); ++ else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG) ++ return -EINVAL; - rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); + rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; -@@ -730,75 +840,80 @@ static int __init init_rapl_pmus(void) - rapl_pmus->pmu.read = rapl_pmu_event_read; - rapl_pmus->pmu.module = THIS_MODULE; - rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; -+ + *rapl_pmus_ptr = rapl_pmus; + - return 0; + rapl_pmus->nr_rapl_pmu = nr_rapl_pmu; + rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.attr_update = rapl_attr_update; +@@ -728,77 +761,81 @@ static int __init init_rapl_pmus(void) + rapl_pmus->pmu.start = rapl_pmu_event_start; + rapl_pmus->pmu.stop = rapl_pmu_event_stop; + rapl_pmus->pmu.read = rapl_pmu_event_read; ++ rapl_pmus->pmu.scope = rapl_pmu_scope; + rapl_pmus->pmu.module = THIS_MODULE; + rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; +- return 0; ++ ++ return init_rapl_pmu(rapl_pmus); } static struct rapl_model model_snb = { @@ -19411,7 +21334,7 @@ index a481a939862e..6b405bf46781 100644 BIT(PERF_RAPL_PSYS), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, -+ .rapl_pkg_msrs = intel_rapl_msrs, ++ .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_spr = { @@ -19429,7 +21352,7 @@ index a481a939862e..6b405bf46781 100644 static struct rapl_model model_amd_hygon = { - .events = BIT(PERF_RAPL_PKG), + .pkg_events = BIT(PERF_RAPL_PKG), -+ .core_events = BIT(PERF_RAPL_PER_CORE), ++ .core_events = BIT(PERF_RAPL_CORE), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, - .rapl_msrs = amd_rapl_msrs, + .rapl_pkg_msrs = amd_rapl_pkg_msrs, @@ -19437,18 +21360,17 @@ index a481a939862e..6b405bf46781 100644 }; static const struct x86_cpu_id rapl_model_match[] __initconst = { -@@ -854,28 +969,47 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); +@@ -854,57 +891,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct rapl_model *rm; ++ int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE; int ret; -+ int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package(); -+ int nr_cores = topology_max_packages() * topology_num_cores_per_package(); -+ -+ if (rapl_pmu_is_pkg_scope()) -+ nr_rapl_pmu = topology_max_packages(); ++ if (rapl_pkg_pmu_is_pkg_scope()) ++ rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG; ++ id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; @@ -19457,9 +21379,7 @@ index a481a939862e..6b405bf46781 100644 + rapl_model = (struct rapl_model *) id->driver_data; - rapl_msrs = rm->rapl_msrs; -+ rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX, -+ false, (void *) &rapl_model->pkg_events); - +- - rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, - false, (void *) &rm->events); - @@ -19469,58 +21389,54 @@ index a481a939862e..6b405bf46781 100644 return ret; - ret = init_rapl_pmus(); -+ ret = init_rapl_pmus(&rapl_pmus_pkg, nr_rapl_pmu, rapl_attr_groups, rapl_attr_update); ++ ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups, ++ rapl_attr_update); if (ret) return ret; -+ if (rapl_model->core_events) { -+ rapl_core_cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs, -+ PERF_RAPL_CORE_EVENTS_MAX, false, -+ (void *) &rapl_model->core_events); +- /* +- * Install callbacks. Core will call them for each online cpu. +- */ +- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, +- "perf/x86/rapl:online", +- rapl_cpu_online, rapl_cpu_offline); ++ rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, ++ PERF_RAPL_PKG_EVENTS_MAX, false, ++ (void *) &rapl_model->pkg_events); + -+ ret = init_rapl_pmus(&rapl_pmus_core, nr_cores, -+ rapl_per_core_attr_groups, rapl_per_core_attr_update); -+ if (ret) { -+ /* -+ * If initialization of per_core PMU fails, reset per_core -+ * flag, and continue with power PMU initialization. -+ */ -+ pr_warn("Per-core PMU initialization failed (%d)\n", ret); -+ rapl_model->core_events = 0UL; -+ } -+ } -+ - /* - * Install callbacks. Core will call them for each online cpu. - */ -@@ -885,10 +1019,24 @@ static int __init rapl_pmu_init(void) ++ ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1); if (ret) goto out; - ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); -+ ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1); - if (ret) - goto out1; - +- if (ret) +- goto out1; + if (rapl_model->core_events) { -+ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1); ++ ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE, ++ rapl_core_attr_groups, ++ rapl_core_attr_update); + if (ret) { -+ /* -+ * If registration of per_core PMU fails, cleanup per_core PMU -+ * variables, reset the per_core flag and keep the -+ * power PMU untouched. -+ */ -+ pr_warn("Per-core PMU registration failed (%d)\n", ret); ++ pr_warn("power-core PMU initialization failed (%d)\n", ret); ++ goto core_init_failed; ++ } ++ ++ rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs, ++ PERF_RAPL_CORE_EVENTS_MAX, false, ++ (void *) &rapl_model->core_events); ++ ++ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1); ++ if (ret) { ++ pr_warn("power-core PMU registration failed (%d)\n", ret); + cleanup_rapl_pmus(rapl_pmus_core); -+ rapl_model->core_events = 0UL; + } + } -+ + ++core_init_failed: rapl_advertise(); return 0; -@@ -896,7 +1044,7 @@ static int __init rapl_pmu_init(void) - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); +-out1: +- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); - cleanup_rapl_pmus(); @@ -19528,18 +21444,18 @@ index a481a939862e..6b405bf46781 100644 return ret; } module_init(rapl_pmu_init); -@@ -904,7 +1052,11 @@ module_init(rapl_pmu_init); + static void __exit intel_rapl_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); +- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - perf_pmu_unregister(&rapl_pmus->pmu); - cleanup_rapl_pmus(); -+ perf_pmu_unregister(&rapl_pmus_pkg->pmu); -+ cleanup_rapl_pmus(rapl_pmus_pkg); -+ if (rapl_model->core_events) { ++ if (rapl_pmus_core) { + perf_pmu_unregister(&rapl_pmus_core->pmu); + cleanup_rapl_pmus(rapl_pmus_core); + } ++ perf_pmu_unregister(&rapl_pmus_pkg->pmu); ++ cleanup_rapl_pmus(rapl_pmus_pkg); } module_exit(intel_rapl_exit); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h @@ -19590,12 +21506,24 @@ index 8277c64f88db..b5a5e1411469 100644 } /* Package relative core ID */ +diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h +index 2361ed4d2b15..37a9afffb59e 100644 +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -208,7 +208,6 @@ enum cpuhp_state { + CPUHP_AP_PERF_X86_UNCORE_ONLINE, + CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, +- CPUHP_AP_PERF_X86_RAPL_ONLINE, + CPUHP_AP_PERF_S390_CF_ONLINE, + CPUHP_AP_PERF_S390_SF_ONLINE, + CPUHP_AP_PERF_ARM_CCI_ONLINE, -- 2.47.0 -From fe6edd05ea01c2a1b677489dfe6a1b5a4d6bd4c6 Mon Sep 17 00:00:00 2001 +From 56c63974139208510b818be3671cd81f53477e9b Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:26:10 +0100 +Date: Thu, 21 Nov 2024 22:01:17 +0100 Subject: [PATCH 10/12] pksm Signed-off-by: Peter Jung @@ -20026,9 +21954,9 @@ index 01071182763e..7394bad8178e 100644 -- 2.47.0 -From 770d2e8fd2693929d69488c27b284466c2cda390 Mon Sep 17 00:00:00 2001 +From 0302b0fc0ae9349f48c609d54572001d7ab44fad Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:26:31 +0100 +Date: Thu, 21 Nov 2024 22:01:43 +0100 Subject: [PATCH 11/12] t2 Signed-off-by: Peter Jung @@ -30275,9 +32203,9 @@ index 4427572b2477..b60c99d61882 100755 -- 2.47.0 -From e2b61e68148654e850dc0dc004907522f3f6eea1 Mon Sep 17 00:00:00 2001 +From 418eb317bb2568c00f1c2bb620ded17db45f9383 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Mon, 18 Nov 2024 13:26:48 +0100 +Date: Thu, 21 Nov 2024 22:01:56 +0100 Subject: [PATCH 12/12] zstd Signed-off-by: Peter Jung @@ -48926,3 +50854,4 @@ index 469fc3059be0..0ae819f0c927 100644 -- 2.47.0 +