Re: tracer_init_tracefs really slow

From: Lucas Stach
Date: Wed Dec 09 2020 - 09:39:42 EST


Hi Steven,

Am Montag, den 07.12.2020, 14:47 -0500 schrieb Steven Rostedt:
On Mon, 07 Dec 2020 17:24:58 +0100
Lucas Stach <l.stach@xxxxxxxxxxxxxx> wrote:

> I would be happy to test a patch on our whimpy machines, though. :)

Let me know if this helps:

-- Steve

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 06134189e9a7..48aa5b6a7d15 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9062,7 +9062,10 @@ int tracing_init_dentry(void)
 extern struct trace_eval_map *__start_ftrace_eval_maps[];
 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
 

-static void __init trace_eval_init(void)
+static struct workqueue_struct *eval_map_wq __initdata;
+struct work_struct eval_map_work __initdata;
+
+static void __init eval_map_work_func(struct work_struct *work)
 {
  int len;
 

@@ -9070,6 +9073,34 @@ static void __init trace_eval_init(void)
  trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
 }
 

+static int __init trace_eval_init(void)
+{
+ INIT_WORK(&eval_map_work, eval_map_work_func);
+
+ eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
+ if (!eval_map_wq) {
+ pr_err("Unable to allocate eval_map_wq\n");
+ /* Do work here */
+ eval_map_work_func(&eval_map_work);
+ return -ENOMEM;
+ }
+
+ queue_work(eval_map_wq, &eval_map_work);
+ return 0;
+}
+
+static int __init trace_eval_sync(void)
+{
+ /* Make sure the eval map updates are finished */
+ if (eval_map_wq)
+ destroy_workqueue(eval_map_wq);
+ return 0;
+}
+
+subsys_initcall(trace_eval_init);

You don't want to do this...

+late_initcall_sync(trace_eval_sync);
+
+
 #ifdef CONFIG_MODULES
 static void trace_module_add_evals(struct module *mod)
 {
@@ -9171,8 +9202,6 @@ static __init int tracer_init_tracefs(void)
  trace_create_file("saved_tgids", 0444, NULL,
  NULL, &tracing_saved_tgids_fops);
 

- trace_eval_init();
-
... and this. Moving the trace_eval_init into its own initcall means it
gets started before tracer_init_tracefs. As it holds the
trace_event_sem while updating the eval maps, event_trace_init() then
blocks further initcall execution when it tries to grab this semaphore
a bit down the line, killing the parallelism we are trying to unlock
here.

With those 2 lines dropped, the change seems to work as intended and
shaves ~830ms from the kernel boot time on this system.

Regards,
Lucas

  trace_create_eval_file(NULL);
 

 #ifdef CONFIG_MODULES