Re: [PATCH] Enable '-Werror' by default for all kernel builds

From: Linus Torvalds
Date: Tue Sep 07 2021 - 13:34:05 EST


On Tue, Sep 7, 2021 at 10:10 AM Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> Do I know why? No. I do note that that code is disgusting.
>
> It's passing one of those structs around by value, for example. That's
> a 72-byte structure that is copied on the stack due to stupid calling
> conventions. Maybe clang generates a few extra temporaries for it as
> part of the function call stack setup? Who knows..

Ooh, yes.

This attached patch is crap - it converts the helper functions to use
const pointers instead of passing the whole structure, but it then
only converts that one file that *uses* them.

So the end result will not compile in general, but you can do

make drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_rq_dlg_calc_30.o

and it compiles for me.

And while gcc doesn't care that much - it will apparently either
generate the argument stack every call - clang cares deeply.

The nasty 720-byte stack frame that clang generates turns into just a
320-byte one, and code generation in general looks a *lot* better.

Now, as mentioned, this patch is broken and incomplete. But I really
think the AMD GPU people need to do this. It makes those functions go
from practically unusable to not horribly disgusting.

So Harry/Leo/Alex/Christian and amd-gfx list - can you look into
making this ugly "make one file compile better" patch actually work
properly?

It *looks* lto me ike that code was perhaps written for a C++ compiler
and the helpers have been written as a "pass by reference", and the
arguments used to be

const display_data_rq_misc_params_st& rq_misc_param

and then the compiler will pass the argument as a pointer. And then it
was converted to C, and the "pass by reference" in the function
declaration was turned into "pass by value", to avoid changing "." to
"->" in the use.

But a '&arg' thing in C++ really is a '*arg' pointer in C, and should
have been done as that.

Of course, it's also possible that that code was simply written by
somebody who didn't understand just *how* horrible it is to pass
structures bigger than a word or two by value.

Do we have a compiler warning for passing big structures by value?

Linus
.../display/dc/dml/dcn30/display_rq_dlg_calc_30.c | 142 ++++++++++-----------
.../display/dc/dml/dcn30/display_rq_dlg_calc_30.h | 2 +-
.../amd/display/dc/dml/display_rq_dlg_helpers.h | 20 +--
3 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index 0d934fae1c3a..9b5ee53d2de3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -92,7 +92,7 @@ static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib,
const display_data_rq_sizing_params_st rq_sizing)
{
dml_print("DML_DLG: %s: rq_sizing param\n", __func__);
- print__data_rq_sizing_params_st(mode_lib, rq_sizing);
+ print__data_rq_sizing_params_st(mode_lib, &rq_sizing);

rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10;

@@ -113,28 +113,28 @@ static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib,

static void extract_rq_regs(struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
- const display_rq_params_st rq_param)
+ const display_rq_params_st *rq_param)
{
unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
unsigned int detile_buf_plane1_addr = 0;

- extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l);
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param->sizing.rq_l);

- rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height),
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height),
1) - 3;

- if (rq_param.yuv420) {
- extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c);
- rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height),
+ if (rq_param->yuv420) {
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param->sizing.rq_c);
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height),
1) - 3;
}

- rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
- rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
+ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height);
+ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height);

// FIXME: take the max between luma, chroma chunk size?
// okay for now, as we are setting chunk_bytes to 8kb anyways
- if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param.yuv420 && rq_param.sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb
+ if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb
rq_regs->drq_expansion_mode = 0;
} else {
rq_regs->drq_expansion_mode = 2;
@@ -143,9 +143,9 @@ static void extract_rq_regs(struct display_mode_lib *mode_lib,
rq_regs->mrq_expansion_mode = 1;
rq_regs->crq_expansion_mode = 1;

- if (rq_param.yuv420) {
- if ((double)rq_param.misc.rq_l.stored_swath_bytes
- / (double)rq_param.misc.rq_c.stored_swath_bytes <= 1.5) {
+ if (rq_param->yuv420) {
+ if ((double)rq_param->misc.rq_l.stored_swath_bytes
+ / (double)rq_param->misc.rq_c.stored_swath_bytes <= 1.5) {
detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma
} else {
detile_buf_plane1_addr = dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0),
@@ -747,7 +747,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
display_data_rq_sizing_params_st *rq_sizing_param,
display_data_rq_dlg_params_st *rq_dlg_param,
display_data_rq_misc_params_st *rq_misc_param,
- const display_pipe_params_st pipe_param,
+ const display_pipe_params_st *pipe_param,
bool is_chroma,
bool is_alpha)
{
@@ -761,32 +761,32 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,

// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
- vp_width = pipe_param.src.viewport_width_c / ppe;
- vp_height = pipe_param.src.viewport_height_c;
- data_pitch = pipe_param.src.data_pitch_c;
- meta_pitch = pipe_param.src.meta_pitch_c;
- surface_height = pipe_param.src.surface_height_y / 2.0;
+ vp_width = pipe_param->src.viewport_width_c / ppe;
+ vp_height = pipe_param->src.viewport_height_c;
+ data_pitch = pipe_param->src.data_pitch_c;
+ meta_pitch = pipe_param->src.meta_pitch_c;
+ surface_height = pipe_param->src.surface_height_y / 2.0;
} else {
- vp_width = pipe_param.src.viewport_width / ppe;
- vp_height = pipe_param.src.viewport_height;
- data_pitch = pipe_param.src.data_pitch;
- meta_pitch = pipe_param.src.meta_pitch;
- surface_height = pipe_param.src.surface_height_y;
+ vp_width = pipe_param->src.viewport_width / ppe;
+ vp_height = pipe_param->src.viewport_height;
+ data_pitch = pipe_param->src.data_pitch;
+ meta_pitch = pipe_param->src.meta_pitch;
+ surface_height = pipe_param->src.surface_height_y;
}

- if (pipe_param.dest.odm_combine) {
+ if (pipe_param->dest.odm_combine) {
unsigned int access_dir = 0;
unsigned int full_src_vp_width = 0;
unsigned int hactive_odm = 0;
unsigned int src_hactive_odm = 0;
- access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
- hactive_odm = pipe_param.dest.hactive / ((unsigned int)pipe_param.dest.odm_combine*2);
+ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ hactive_odm = pipe_param->dest.hactive / ((unsigned int)pipe_param->dest.odm_combine*2);
if (is_chroma) {
- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
} else {
- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio * hactive_odm;
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
}

if (access_dir == 0) {
@@ -815,7 +815,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
rq_sizing_param->meta_chunk_bytes = 2048;
rq_sizing_param->min_meta_chunk_bytes = 256;

- if (pipe_param.src.hostvm)
+ if (pipe_param->src.hostvm)
rq_sizing_param->mpte_group_bytes = 512;
else
rq_sizing_param->mpte_group_bytes = 2048;
@@ -828,28 +828,28 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
vp_height,
data_pitch,
meta_pitch,
- pipe_param.src.source_format,
- pipe_param.src.sw_mode,
- pipe_param.src.macro_tile_size,
- pipe_param.src.source_scan,
- pipe_param.src.hostvm,
+ pipe_param->src.source_format,
+ pipe_param->src.sw_mode,
+ pipe_param->src.macro_tile_size,
+ pipe_param->src.source_scan,
+ pipe_param->src.hostvm,
is_chroma,
surface_height);
}

static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
display_rq_params_st *rq_param,
- const display_pipe_params_st pipe_param)
+ const display_pipe_params_st *pipe_param)
{
// get param for luma surface
- rq_param->yuv420 = pipe_param.src.source_format == dm_420_8
- || pipe_param.src.source_format == dm_420_10
- || pipe_param.src.source_format == dm_rgbe_alpha
- || pipe_param.src.source_format == dm_420_12;
+ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8
+ || pipe_param->src.source_format == dm_420_10
+ || pipe_param->src.source_format == dm_rgbe_alpha
+ || pipe_param->src.source_format == dm_420_12;

- rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
+ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;

- rq_param->rgbe_alpha = (pipe_param.src.source_format == dm_rgbe_alpha)?1:0;
+ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha)?1:0;

get_surf_rq_param(mode_lib,
&(rq_param->sizing.rq_l),
@@ -859,7 +859,7 @@ static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
0,
0);

- if (is_dual_plane((enum source_format_class)(pipe_param.src.source_format))) {
+ if (is_dual_plane((enum source_format_class)(pipe_param->src.source_format))) {
// get param for chroma surface
get_surf_rq_param(mode_lib,
&(rq_param->sizing.rq_c),
@@ -871,21 +871,21 @@ static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
}

// calculate how to split the det buffer space between luma and chroma
- handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
- print__rq_params_st(mode_lib, *rq_param);
+ handle_det_buf_split(mode_lib, rq_param, pipe_param->src);
+ print__rq_params_st(mode_lib, rq_param);
}

void dml30_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
- const display_pipe_params_st pipe_param)
+ const display_pipe_params_st *pipe_param)
{
display_rq_params_st rq_param = { 0 };

memset(rq_regs, 0, sizeof(*rq_regs));
dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
- extract_rq_regs(mode_lib, rq_regs, rq_param);
+ extract_rq_regs(mode_lib, rq_regs, &rq_param);

- print__rq_regs_st(mode_lib, *rq_regs);
+ print__rq_regs_st(mode_lib, rq_regs);
}

static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
@@ -984,8 +984,8 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
const unsigned int pipe_idx,
display_dlg_regs_st *disp_dlg_regs,
display_ttu_regs_st *disp_ttu_regs,
- const display_rq_dlg_params_st rq_dlg_param,
- const display_dlg_sys_params_st dlg_sys_param,
+ const display_rq_dlg_params_st *rq_dlg_param,
+ const display_dlg_sys_params_st *dlg_sys_param,
const bool cstate_en,
const bool pstate_en,
const bool vm_en,
@@ -1137,7 +1137,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
* dml_pow(2, 8));
disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits

- min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
+ min_dcfclk_mhz = dlg_sys_param->deepsleep_dcfclk_mhz;
t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes);
min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);

@@ -1191,13 +1191,13 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
scl_enable = scl->scl_enable;

line_time_in_us = (htotal / pclk_freq_in_mhz);
- swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub;
- dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub;
- swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub;
- dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub;
+ swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
+ dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
+ swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub;
+ dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub;

- meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub;
- meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub;
+ meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub;
+ meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub;
vupdate_offset = dst->vupdate_offset;
vupdate_width = dst->vupdate_width;
vready_offset = dst->vready_offset;
@@ -1379,16 +1379,16 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c);

// Active
- req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub;
- req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub;
- meta_row_height_l = rq_dlg_param.rq_l.meta_row_height;
- meta_row_height_c = rq_dlg_param.rq_c.meta_row_height;
+ req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub;
+ req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub;
+ meta_row_height_l = rq_dlg_param->rq_l.meta_row_height;
+ meta_row_height_c = rq_dlg_param->rq_c.meta_row_height;
swath_width_pixels_ub_l = 0;
swath_width_pixels_ub_c = 0;
scaler_rec_in_width_l = 0;
scaler_rec_in_width_c = 0;
- dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
- dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
+ dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
+ dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;

if (mode_422) {
swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
@@ -1824,8 +1824,8 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));

- print__ttu_regs_st(mode_lib, *disp_ttu_regs);
- print__dlg_regs_st(mode_lib, *disp_dlg_regs);
+ print__ttu_regs_st(mode_lib, disp_ttu_regs);
+ print__dlg_regs_st(mode_lib, disp_dlg_regs);
}

void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
@@ -1861,20 +1861,20 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
/ dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated

- print__dlg_sys_params_st(mode_lib, dlg_sys_param);
+ print__dlg_sys_params_st(mode_lib, &dlg_sys_param);

// system parameter calculation done

dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
- dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
dml_rq_dlg_get_dlg_params(mode_lib,
e2e_pipe_param,
num_pipes,
pipe_idx,
dlg_regs,
ttu_regs,
- rq_param.dlg,
- dlg_sys_param,
+ &rq_param.dlg,
+ &dlg_sys_param,
cstate_en,
pstate_en,
vm_en,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
index c04965cceff3..b40abc41828a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
@@ -41,7 +41,7 @@ struct display_mode_lib;
// See also: <display_rq_regs_st>
void dml30_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
- const display_pipe_params_st pipe_param);
+ const display_pipe_params_st *pipe_param);

// Function: dml_rq_dlg_get_dlg_reg
// Calculate and return DLG and TTU register struct given the system setting
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
index 2555ef0358c2..fb61a0b1470f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
@@ -31,16 +31,16 @@
/* Function: Printer functions
* Print various struct
*/
-void print__rq_params_st(struct display_mode_lib *mode_lib, display_rq_params_st rq_param);
-void print__data_rq_sizing_params_st(struct display_mode_lib *mode_lib, display_data_rq_sizing_params_st rq_sizing);
-void print__data_rq_dlg_params_st(struct display_mode_lib *mode_lib, display_data_rq_dlg_params_st rq_dlg_param);
-void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, display_data_rq_misc_params_st rq_misc_param);
-void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, display_rq_dlg_params_st rq_dlg_param);
-void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, display_dlg_sys_params_st dlg_sys_param);
+void print__rq_params_st(struct display_mode_lib *mode_lib, const display_rq_params_st *rq_param);
+void print__data_rq_sizing_params_st(struct display_mode_lib *mode_lib, const display_data_rq_sizing_params_st *rq_sizing);
+void print__data_rq_dlg_params_st(struct display_mode_lib *mode_lib, const display_data_rq_dlg_params_st *rq_dlg_param);
+void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, const display_data_rq_misc_params_st *rq_misc_param);
+void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, const display_rq_dlg_params_st *rq_dlg_param);
+void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const display_dlg_sys_params_st *dlg_sys_param);

-void print__data_rq_regs_st(struct display_mode_lib *mode_lib, display_data_rq_regs_st data_rq_regs);
-void print__rq_regs_st(struct display_mode_lib *mode_lib, display_rq_regs_st rq_regs);
-void print__dlg_regs_st(struct display_mode_lib *mode_lib, display_dlg_regs_st dlg_regs);
-void print__ttu_regs_st(struct display_mode_lib *mode_lib, display_ttu_regs_st ttu_regs);
+void print__data_rq_regs_st(struct display_mode_lib *mode_lib, const display_data_rq_regs_st *data_rq_regs);
+void print__rq_regs_st(struct display_mode_lib *mode_lib, const display_rq_regs_st *rq_regs);
+void print__dlg_regs_st(struct display_mode_lib *mode_lib, const display_dlg_regs_st *dlg_regs);
+void print__ttu_regs_st(struct display_mode_lib *mode_lib, const display_ttu_regs_st *ttu_regs);

#endif