[for-next][PATCH 1/7] tracing: Expand all ring buffers individually

From: Steven Rostedt
Date: Wed Oct 04 2023 - 21:52:47 EST


From: Zheng Yejian <zhengyejian1@xxxxxxxxxx>

The ring buffer of global_trace is set to the minimum size in
order to save memory on boot up and then it will be expand when
some trace feature enabled.

However currently operations under an instance can also cause
global_trace ring buffer being expanded, and the expanded memory
would be wasted if global_trace then not being used.

See following case, we enable 'sched_switch' event in instance 'A', then
ring buffer of global_trace is unexpectedly expanded to be 1410KB, also
the '(expanded: 1408)' from 'buffer_size_kb' of instance is confusing.

# cd /sys/kernel/tracing
# mkdir instances/A
# cat buffer_size_kb
7 (expanded: 1408)
# cat instances/A/buffer_size_kb
1410 (expanded: 1408)
# echo sched:sched_switch > instances/A/set_event
# cat buffer_size_kb
1410
# cat instances/A/buffer_size_kb
1410

To fix it, we can:
- Make 'ring_buffer_expanded' as a member of 'struct trace_array';
- Make 'ring_buffer_expanded' of instance is defaultly true,
global_trace is defaultly false;
- In order not to expose 'global_trace' outside of file
'kernel/trace/trace.c', introduce trace_set_ring_buffer_expanded()
to set 'ring_buffer_expanded' as 'true';
- Pass the expected trace_array to tracing_update_buffers().

Link: https://lore.kernel.org/linux-trace-kernel/20230906091837.3998020-1-zhengyejian1@xxxxxxxxxx

Signed-off-by: Zheng Yejian <zhengyejian1@xxxxxxxxxx>
Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
kernel/trace/trace.c | 47 ++++++++++++++++++++-----------------
kernel/trace/trace.h | 9 +++++--
kernel/trace/trace_events.c | 22 +++++++++--------
3 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index abaaf516fcae..dd6395692ff9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -54,12 +54,6 @@
#include "trace.h"
#include "trace_output.h"

-/*
- * On boot up, the ring buffer is set to the minimum size, so that
- * we do not waste memory on systems that are not using tracing.
- */
-bool ring_buffer_expanded;
-
#ifdef CONFIG_FTRACE_STARTUP_TEST
/*
* We need to change this state when a selftest is running.
@@ -202,7 +196,7 @@ static int __init set_cmdline_ftrace(char *str)
strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
default_bootup_tracer = bootup_tracer_buf;
/* We are using ftrace early, expand it */
- ring_buffer_expanded = true;
+ trace_set_ring_buffer_expanded(NULL);
return 1;
}
__setup("ftrace=", set_cmdline_ftrace);
@@ -247,7 +241,7 @@ static int __init boot_alloc_snapshot(char *str)
} else {
allocate_snapshot = true;
/* We also need the main ring buffer expanded */
- ring_buffer_expanded = true;
+ trace_set_ring_buffer_expanded(NULL);
}
return 1;
}
@@ -490,6 +484,13 @@ static struct trace_array global_trace = {
.trace_flags = TRACE_DEFAULT_FLAGS,
};

+void trace_set_ring_buffer_expanded(struct trace_array *tr)
+{
+ if (!tr)
+ tr = &global_trace;
+ tr->ring_buffer_expanded = true;
+}
+
LIST_HEAD(ftrace_trace_arrays);

int trace_array_get(struct trace_array *this_tr)
@@ -2012,7 +2013,7 @@ static int run_tracer_selftest(struct tracer *type)
#ifdef CONFIG_TRACER_MAX_TRACE
if (type->use_max_tr) {
/* If we expanded the buffers, make sure the max is expanded too */
- if (ring_buffer_expanded)
+ if (tr->ring_buffer_expanded)
ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
RING_BUFFER_ALL_CPUS);
tr->allocated_snapshot = true;
@@ -2038,7 +2039,7 @@ static int run_tracer_selftest(struct tracer *type)
tr->allocated_snapshot = false;

/* Shrink the max buffer again */
- if (ring_buffer_expanded)
+ if (tr->ring_buffer_expanded)
ring_buffer_resize(tr->max_buffer.buffer, 1,
RING_BUFFER_ALL_CPUS);
}
@@ -3403,7 +3404,7 @@ void trace_printk_init_buffers(void)
pr_warn("**********************************************************\n");

/* Expand the buffers to set size */
- tracing_update_buffers();
+ tracing_update_buffers(&global_trace);

buffers_allocated = 1;

@@ -6374,7 +6375,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
* we use the size that was given, and we can forget about
* expanding it later.
*/
- ring_buffer_expanded = true;
+ trace_set_ring_buffer_expanded(tr);

/* May be called before buffers are initialized */
if (!tr->array_buffer.buffer)
@@ -6452,6 +6453,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,

/**
* tracing_update_buffers - used by tracing facility to expand ring buffers
+ * @tr: The tracing instance
*
* To save on memory when the tracing is never used on a system with it
* configured in. The ring buffers are set to a minimum size. But once
@@ -6460,13 +6462,13 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
*
* This function is to be called when a tracer is about to be used.
*/
-int tracing_update_buffers(void)
+int tracing_update_buffers(struct trace_array *tr)
{
int ret = 0;

mutex_lock(&trace_types_lock);
- if (!ring_buffer_expanded)
- ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
+ if (!tr->ring_buffer_expanded)
+ ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
RING_BUFFER_ALL_CPUS);
mutex_unlock(&trace_types_lock);

@@ -6520,7 +6522,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)

mutex_lock(&trace_types_lock);

- if (!ring_buffer_expanded) {
+ if (!tr->ring_buffer_expanded) {
ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
RING_BUFFER_ALL_CPUS);
if (ret < 0)
@@ -7192,7 +7194,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
}

if (buf_size_same) {
- if (!ring_buffer_expanded)
+ if (!tr->ring_buffer_expanded)
r = sprintf(buf, "%lu (expanded: %lu)\n",
size >> 10,
trace_buf_size >> 10);
@@ -7249,10 +7251,10 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
mutex_lock(&trace_types_lock);
for_each_tracing_cpu(cpu) {
size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
- if (!ring_buffer_expanded)
+ if (!tr->ring_buffer_expanded)
expanded_size += trace_buf_size >> 10;
}
- if (ring_buffer_expanded)
+ if (tr->ring_buffer_expanded)
r = sprintf(buf, "%lu\n", size);
else
r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
@@ -7646,7 +7648,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
unsigned long val;
int ret;

- ret = tracing_update_buffers();
+ ret = tracing_update_buffers(tr);
if (ret < 0)
return ret;

@@ -9550,6 +9552,9 @@ static struct trace_array *trace_array_create(const char *name)
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;

+ /* The ring buffer is defaultly expanded */
+ trace_set_ring_buffer_expanded(tr);
+
if (ftrace_allocate_ftrace_ops(tr) < 0)
goto out_free_tr;

@@ -10444,7 +10449,7 @@ __init static int tracer_alloc_buffers(void)
trace_printk_init_buffers();

/* To save memory, keep the ring buffer size to its minimum */
- if (ring_buffer_expanded)
+ if (global_trace.ring_buffer_expanded)
ring_buf_size = trace_buf_size;
else
ring_buf_size = 1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 77debe53f07c..e92cb9c1292f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -410,6 +410,11 @@ struct trace_array {
struct cond_snapshot *cond_snapshot;
#endif
struct trace_func_repeats __percpu *last_func_repeats;
+ /*
+ * On boot up, the ring buffer is set to the minimum size, so that
+ * we do not waste memory on systems that are not using tracing.
+ */
+ bool ring_buffer_expanded;
};

enum {
@@ -761,7 +766,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
extern int DYN_FTRACE_TEST_NAME2(void);

-extern bool ring_buffer_expanded;
+extern void trace_set_ring_buffer_expanded(struct trace_array *tr);
extern bool tracing_selftest_disabled;

#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1305,7 +1310,7 @@ static inline void trace_branch_disable(void)
#endif /* CONFIG_BRANCH_TRACER */

/* set ring buffers to default size if not already done so */
-int tracing_update_buffers(void);
+int tracing_update_buffers(struct trace_array *tr);

union trace_synth_field {
u8 as_u8;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f49d6ddb6342..099b9b6bbdc4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1166,7 +1166,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
if (!cnt)
return 0;

- ret = tracing_update_buffers();
+ ret = tracing_update_buffers(tr);
if (ret < 0)
return ret;

@@ -1397,18 +1397,20 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret)
return ret;

- ret = tracing_update_buffers();
- if (ret < 0)
- return ret;
-
switch (val) {
case 0:
case 1:
ret = -ENODEV;
mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (likely(file))
+ if (likely(file)) {
+ ret = tracing_update_buffers(file->tr);
+ if (ret < 0) {
+ mutex_unlock(&event_mutex);
+ return ret;
+ }
ret = ftrace_event_enable_disable(file, val);
+ }
mutex_unlock(&event_mutex);
break;

@@ -1482,7 +1484,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret)
return ret;

- ret = tracing_update_buffers();
+ ret = tracing_update_buffers(dir->tr);
if (ret < 0)
return ret;

@@ -1956,7 +1958,7 @@ event_pid_write(struct file *filp, const char __user *ubuf,
if (!cnt)
return 0;

- ret = tracing_update_buffers();
+ ret = tracing_update_buffers(tr);
if (ret < 0)
return ret;

@@ -2824,7 +2826,7 @@ static __init int setup_trace_triggers(char *str)
int i;

strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
- ring_buffer_expanded = true;
+ trace_set_ring_buffer_expanded(NULL);
disable_tracing_selftest("running event triggers");

buf = bootup_trigger_buf;
@@ -3614,7 +3616,7 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
static __init int setup_trace_event(char *str)
{
strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
- ring_buffer_expanded = true;
+ trace_set_ring_buffer_expanded(NULL);
disable_tracing_selftest("running event tracing");

return 1;
--
2.40.1