Re: [PATCH v2 1/6] selftests/resctrl: Extend signal handler coverage to unmount on receiving signal

From: Reinette Chatre
Date: Wed Sep 27 2023 - 00:23:53 EST


Hi Ilpo,

On 9/15/2023 8:44 AM, Ilpo Järvinen wrote:
> Unmounting resctrl FS has been moved into the per test functions in
> resctrl_tests.c by commit caddc0fbe495 ("selftests/resctrl: Move
> resctrl FS mount/umount to higher level"). In case a signal (SIGINT,
> SIGTERM, or SIGHUP) is received, the running selftest is aborted by
> ctrlc_handler() which then unmounts resctrl fs before exiting. The
> current section between signal_handler_register() and
> signal_handler_unregister(), however, does not cover the entire
> duration when resctrl FS is mounted.
>
> Move signal_handler_register() and signal_handler_unregister() calls
> from per test files into resctrl_tests.c to properly unmount resctrl
> fs. In order to not add signal_handler_register()/unregister() n times,
> create helpers test_prepare() and test_cleanup().
>
> Adjust child process kill() call in ctrlc_handler() to only be invoked
> if the child was already forked.
>
> Fixes: caddc0fbe495 ("selftests/resctrl: Move resctrl FS mount/umount to higher level")
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx>
> ---
> tools/testing/selftests/resctrl/cat_test.c | 8 ---
> .../testing/selftests/resctrl/resctrl_tests.c | 65 +++++++++++--------
> tools/testing/selftests/resctrl/resctrl_val.c | 22 +++----
> 3 files changed, 48 insertions(+), 47 deletions(-)
>
> diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
> index 97b87285ab2a..224ba8544d8a 100644
> --- a/tools/testing/selftests/resctrl/cat_test.c
> +++ b/tools/testing/selftests/resctrl/cat_test.c
> @@ -167,12 +167,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
> strcpy(param.filename, RESULT_FILE_NAME1);
> param.num_of_runs = 0;
> param.cpu_no = sibling_cpu_no;
> - } else {
> - ret = signal_handler_register();
> - if (ret) {
> - kill(bm_pid, SIGKILL);
> - goto out;
> - }
> }
>
> remove(param.filename);
> @@ -209,10 +203,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
> }
> close(pipefd[0]);
> kill(bm_pid, SIGKILL);
> - signal_handler_unregister();
> }
>
> -out:
> cat_test_cleanup();
>
> return ret;
> diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
> index 823672a20a43..524ba83d7568 100644
> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> @@ -67,21 +67,41 @@ void tests_cleanup(void)
> cat_test_cleanup();
> }
>
> -static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
> +static int test_prepare()
> {
> int res;
>
> - ksft_print_msg("Starting MBM BW change ...\n");
> + res = signal_handler_register();
> + if (res)
> + return res;
>
> res = mount_resctrlfs();
> if (res) {
> + signal_handler_unregister();
> ksft_exit_fail_msg("Failed to mount resctrl FS\n");
> - return;
> + return res;
> }
> + return 0;
> +}
> +
> +static void test_cleanup()
> +{
> + umount_resctrlfs();
> + signal_handler_unregister();
> +}

Thank you for adding these.

> +
> +static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
> +{
> + int res;
> +
> + ksft_print_msg("Starting MBM BW change ...\n");
> +
> + if (test_prepare())
> + return;
>

I am not sure about this. With this exit the kselftest machinery is not
aware of the test passing or failing. I wonder if there should not rather
be a "goto" here that triggers ksft_test_result()? This needs some more
thought though. First, with this change test_prepare() officially gains
responsibility to determine if a failure is transient (just a single test
fails) or permanent (no use trying any other tests if this fails). For
the former it would then be up to the caller to call ksft_test_result()
and for the latter test_prepare() will call ksft_exit_fail_msg().
Second, that SNC warning may be an inconvenience with a new goto. Here
it may be ok to print that message before the test failure?

> if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) {
> ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
> - goto umount;
> + goto cleanup;
> }
>
> res = mbm_bw_change(cpu_no, benchmark_cmd);
> @@ -89,8 +109,8 @@ static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
> if ((get_vendor() == ARCH_INTEL) && res)
> ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
>
> -umount:
> - umount_resctrlfs();
> +cleanup:
> + test_cleanup();
> }
>
> static void run_mba_test(const char * const *benchmark_cmd, int cpu_no)
> @@ -99,22 +119,19 @@ static void run_mba_test(const char * const *benchmark_cmd, int cpu_no)
>
> ksft_print_msg("Starting MBA Schemata change ...\n");
>
> - res = mount_resctrlfs();
> - if (res) {
> - ksft_exit_fail_msg("Failed to mount resctrl FS\n");
> + if (test_prepare())
> return;
> - }
>
> if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) {
> ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
> - goto umount;
> + goto cleanup;
> }
>
> res = mba_schemata_change(cpu_no, benchmark_cmd);
> ksft_test_result(!res, "MBA: schemata change\n");
>
> -umount:
> - umount_resctrlfs();
> +cleanup:
> + test_cleanup();
> }
>
> static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
> @@ -123,15 +140,12 @@ static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
>
> ksft_print_msg("Starting CMT test ...\n");
>
> - res = mount_resctrlfs();
> - if (res) {
> - ksft_exit_fail_msg("Failed to mount resctrl FS\n");
> + if (test_prepare())
> return;
> - }
>
> if (!validate_resctrl_feature_request(CMT_STR)) {
> ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
> - goto umount;
> + goto cleanup;
> }
>
> res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
> @@ -139,8 +153,8 @@ static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
> if ((get_vendor() == ARCH_INTEL) && res)
> ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
>
> -umount:
> - umount_resctrlfs();
> +cleanup:
> + test_cleanup();
> }
>
> static void run_cat_test(int cpu_no, int no_of_bits)
> @@ -149,22 +163,19 @@ static void run_cat_test(int cpu_no, int no_of_bits)
>
> ksft_print_msg("Starting CAT test ...\n");
>
> - res = mount_resctrlfs();
> - if (res) {
> - ksft_exit_fail_msg("Failed to mount resctrl FS\n");
> + if (test_prepare())
> return;
> - }
>
> if (!validate_resctrl_feature_request(CAT_STR)) {
> ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
> - goto umount;
> + goto cleanup;
> }
>
> res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
> ksft_test_result(!res, "CAT: test\n");
>
> -umount:
> - umount_resctrlfs();
> +cleanup:
> + test_cleanup();
> }
>
> int main(int argc, char **argv)
> diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
> index 51963a6f2186..a9fe61133119 100644
> --- a/tools/testing/selftests/resctrl/resctrl_val.c
> +++ b/tools/testing/selftests/resctrl/resctrl_val.c
> @@ -468,7 +468,9 @@ pid_t bm_pid, ppid;
>
> void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
> {
> - kill(bm_pid, SIGKILL);
> + /* Only kill child after bm_pid is set after fork() */
> + if (bm_pid)
> + kill(bm_pid, SIGKILL);
> umount_resctrlfs();
> tests_cleanup();
> ksft_print_msg("Ending\n\n");
> @@ -485,6 +487,8 @@ int signal_handler_register(void)
> struct sigaction sigact;
> int ret = 0;
>
> + bm_pid = 0;
> +

Since this is an initialization fix in this area ... what
do you think of also initializing sigact? It could just be
a change to
struct sigaction sigact = {};

This will prevent registering a signal handler with
uninitialized sa_flags.

Reinette