Re: [PATCH 2/2] drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order.

From: Boris Brezillon
Date: Fri Aug 04 2017 - 05:59:18 EST


On Tue, 25 Jul 2017 09:27:33 -0700
Eric Anholt <eric@xxxxxxxxxx> wrote:

> This is useful to allow GL to provide defined results for overlapping
> glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
> window movement without first blitting to a temporary. x11perf
> -copywinwin100 goes from 1850/sec to 4850/sec.
>
> Signed-off-by: Eric Anholt <eric@xxxxxxxxxx>
> ---
>
> The work-in-progress userspace is at:
>
> https://github.com/anholt/xserver/commits/glamor-draw-bounds-overlap
> https://github.com/anholt/mesa/commits/vc4-overlapping-blit
>
> and the next step is to build the GL extension spec and piglit tests
> for it.
>
> drivers/gpu/drm/vc4/vc4_drv.c | 1 +
> drivers/gpu/drm/vc4/vc4_gem.c | 5 ++++-
> drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++-----
> include/uapi/drm/vc4_drm.h | 11 +++++++++++
> 4 files changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index c6b487c3d2b7..b5c2c28289ed 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
> case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
> case DRM_VC4_PARAM_SUPPORTS_ETC1:
> case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
> + case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
> args->value = true;
> break;
> default:
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index a3e45e67f417..ba0782ebda34 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -1008,7 +1008,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
> struct ww_acquire_ctx acquire_ctx;
> int ret = 0;
>
> - if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
> + if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
> + VC4_SUBMIT_CL_FIXED_RCL_ORDER |
> + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
> + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
> DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
> return -EINVAL;
> }
> diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
> index da3bfd53f0bd..c3b064052147 100644
> --- a/drivers/gpu/drm/vc4/vc4_render_cl.c
> +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
> @@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
> uint8_t max_y_tile = args->max_y_tile;
> uint8_t xtiles = max_x_tile - min_x_tile + 1;
> uint8_t ytiles = max_y_tile - min_y_tile + 1;
> - uint8_t x, y;
> + uint8_t xi, yi;
> uint32_t size, loop_body_size;
> + bool positive_x = false;
> + bool positive_y = false;
> +
> + if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
> + if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)
> + positive_x = true;
> + if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)
> + positive_y = true;
> + }

Are you sure you want the default value of positive_x/y to be false? It
seems to me that before this patch you were always iterating in
ascending order, but now, when VC4_SUBMIT_CL_FIXED_RCL_ORDER is not
set you do the opposite. Maybe you really want to change the default
behavior, just wanted to point this out.

Otherwise,

Reviewed-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxxxxxxx>

>
> size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
> loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
> @@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
> rcl_u16(setup, args->height);
> rcl_u16(setup, args->color_write.bits);
>
> - for (y = min_y_tile; y <= max_y_tile; y++) {
> - for (x = min_x_tile; x <= max_x_tile; x++) {
> - bool first = (x == min_x_tile && y == min_y_tile);
> - bool last = (x == max_x_tile && y == max_y_tile);
> + for (yi = 0; yi < ytiles; yi++) {
> + int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
> + for (xi = 0; xi < xtiles; xi++) {
> + int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
> + bool first = (xi == 0 && yi == 0);
> + bool last = (xi == xtiles - 1 && yi == ytiles - 1);
>
> emit_tile(exec, setup, x, y, first, last);
> }
> diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> index 6ac4c5c014cb..a8bf9a76d5b6 100644
> --- a/include/uapi/drm/vc4_drm.h
> +++ b/include/uapi/drm/vc4_drm.h
> @@ -153,6 +153,16 @@ struct drm_vc4_submit_cl {
> __u32 pad:24;
>
> #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
> +/* By default, the kernel gets to choose the order that the tiles are
> + * rendered in. If this is set, then the tiles will be rendered in a
> + * raster order, with the right-to-left vs left-to-right and
> + * top-to-bottom vs bottom-to-top dictated by
> + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping
> + * blits to be implemented using the 3D engine.
> + */
> +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1)
> +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2)
> +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3)
> __u32 flags;
>
> /* Returned value of the seqno of this render job (for the
> @@ -292,6 +302,7 @@ struct drm_vc4_get_hang_state {
> #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
> #define DRM_VC4_PARAM_SUPPORTS_ETC1 4
> #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
> +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6
>
> struct drm_vc4_get_param {
> __u32 param;