[PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats

From: Igor Matheus Andrade Torrente
Date: Tue Oct 05 2021 - 16:17:55 EST


Currently the blend function only accepts XRGB_8888 and ARGB_8888
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

Now the blend function receives a format handler to each plane and a
blend function pointer. It will take two ARGB_1616161616 pixels, one
for each handler, and will use the blend function to calculate and
store the final color in the output buffer.

These format handlers will receive the `vkms_composer` and a pair of
coordinates. And they should return the respective pixel in the
ARGB_16161616 format.

The blend function will receive two ARGB_16161616 pixels, x, y, and
the vkms_composer of the output buffer. The method should perform the
blend operation and store output to the format aforementioned
ARGB_16161616.

Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente@xxxxxxxxx>
---
drivers/gpu/drm/vkms/vkms_composer.c | 275 ++++++++++++++-------------
drivers/gpu/drm/vkms/vkms_formats.h | 125 ++++++++++++
2 files changed, 271 insertions(+), 129 deletions(-)
create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
index 82f79e508f81..1e7c10c02a52 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -9,18 +9,28 @@
#include <drm/drm_vblank.h>

#include "vkms_drv.h"
-
-static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,
- const struct vkms_composer *composer)
-{
- u32 pixel;
- int src_offset = composer->offset + (y * composer->pitch)
- + (x * composer->cpp);
-
- pixel = *(u32 *)&buffer[src_offset];
-
- return pixel;
-}
+#include "vkms_formats.h"
+
+#define get_output_vkms_composer(buffer_pointer, composer) \
+ ((struct vkms_composer) { \
+ .fb = (struct drm_framebuffer) { \
+ .format = &(struct drm_format_info) { \
+ .format = DRM_FORMAT_ARGB16161616, \
+ }, \
+ }, \
+ .map[0].vaddr = (buffer_pointer), \
+ .src = (composer)->src, \
+ .dst = (composer)->dst, \
+ .cpp = sizeof(u64), \
+ .pitch = drm_rect_width(&(composer)->dst) * sizeof(u64) \
+ })
+
+struct vkms_pixel_composition_functions {
+ u64 (*get_src_pixel)(struct vkms_composer *composer, int x, int y);
+ u64 (*get_dst_pixel)(struct vkms_composer *composer, int x, int y);
+ void (*pixel_blend)(u64 argb_src1, u64 argb_src2, int x, int y,
+ struct vkms_composer *dst_composer);
+};

/**
* compute_crc - Compute CRC value on output frame
@@ -31,42 +41,33 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,
* returns CRC value computed using crc32 on the visible portion of
* the final framebuffer at vaddr_out
*/
-static uint32_t compute_crc(const u8 *vaddr,
+static uint32_t compute_crc(const __le64 *vaddr,
const struct vkms_composer *composer)
{
- int x, y;
- u32 crc = 0, pixel = 0;
- int x_src = composer->src.x1 >> 16;
- int y_src = composer->src.y1 >> 16;
- int h_src = drm_rect_height(&composer->src) >> 16;
- int w_src = drm_rect_width(&composer->src) >> 16;
-
- for (y = y_src; y < y_src + h_src; ++y) {
- for (x = x_src; x < x_src + w_src; ++x) {
- pixel = get_pixel_from_buffer(x, y, vaddr, composer);
- crc = crc32_le(crc, (void *)&pixel, sizeof(u32));
- }
- }
+ int h = drm_rect_height(&composer->dst);
+ int w = drm_rect_width(&composer->dst);

- return crc;
+ return crc32_le(0, (void *)vaddr, w * h * sizeof(u64));
}

-static u8 blend_channel(u8 src, u8 dst, u8 alpha)
+static __le16 blend_channel(u16 src, u16 dst, u16 alpha)
{
- u32 pre_blend;
- u8 new_color;
+ u64 pre_blend;
+ u16 new_color;

- pre_blend = (src * 255 + dst * (255 - alpha));
+ pre_blend = (src * 0xffff + dst * (0xffff - alpha));

- /* Faster div by 255 */
- new_color = ((pre_blend + ((pre_blend + 257) >> 8)) >> 8);
+ new_color = DIV_ROUND_UP(pre_blend, 0xffff);

- return new_color;
+ return cpu_to_le16(new_color);
}

/**
* alpha_blend - alpha blending equation
- * @argb_src: src pixel on premultiplied alpha mode
+ * @argb_src1: pixel of the source plane on premultiplied alpha mode
+ * @argb_src2: pixel of the destiny planes on premultiplied alpha mode
+ * @x: The x coodinate(width) of the pixel
+ * @y: The y coodinate(heigth) of the pixel
* @argb_dst: dst pixel completely opaque
*
* blend pixels using premultiplied blend formula. The current DRM assumption
@@ -74,50 +75,52 @@ static u8 blend_channel(u8 src, u8 dst, u8 alpha)
* channel values. See more drm_plane_create_blend_mode_property(). Also, this
* formula assumes a completely opaque background.
*/
-static void alpha_blend(const u8 *argb_src, u8 *argb_dst)
+static void alpha_blend(u64 argb_src1, u64 argb_src2, int y, int x,
+ struct vkms_composer *dst_composer)
{
- u8 alpha;
+ __le16 *output_pixel = packed_pixels_addr(dst_composer, y, x);

- alpha = argb_src[3];
- argb_dst[0] = blend_channel(argb_src[0], argb_dst[0], alpha);
- argb_dst[1] = blend_channel(argb_src[1], argb_dst[1], alpha);
- argb_dst[2] = blend_channel(argb_src[2], argb_dst[2], alpha);
-}
+ u16 src1_a = (argb_src1 & (0xffffllu << 48)) >> 48;
+ u16 src1_r = (argb_src1 & (0xffffllu << 32)) >> 32;
+ u16 src1_g = (argb_src1 & (0xffffllu << 16)) >> 16;
+ u16 src1_b = argb_src1 & 0xffffllu;

-/**
- * x_blend - blending equation that ignores the pixel alpha
- *
- * overwrites RGB color value from src pixel to dst pixel.
- */
-static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst)
-{
- memcpy(xrgb_dst, xrgb_src, sizeof(u8) * 3);
+ u16 src2_r = (argb_src2 & (0xffffllu << 32)) >> 32;
+ u16 src2_g = (argb_src2 & (0xffffllu << 16)) >> 16;
+ u16 src2_b = argb_src2 & 0xffffllu;
+
+ output_pixel[0] = blend_channel(src1_b, src2_b, src1_a);
+ output_pixel[1] = blend_channel(src1_g, src2_g, src1_a);
+ output_pixel[2] = blend_channel(src1_r, src2_r, src1_a);
+ output_pixel[3] = 0xffff;
}

/**
- * blend - blend value at vaddr_src with value at vaddr_dst
- * @vaddr_dst: destination address
- * @vaddr_src: source address
- * @dst_composer: destination framebuffer's metadata
* @src_composer: source framebuffer's metadata
- * @pixel_blend: blending equation based on plane format
+ * @dst_composer: destiny framebuffer's metadata
+ * @funcs: A struct containing all the composition functions(get_src_pixel,
+ * get_dst_pixel, and pixel_blend)
*
- * Blend the vaddr_src value with the vaddr_dst value using a pixel blend
- * equation according to the supported plane formats DRM_FORMAT_(A/XRGB8888)
- * and clearing alpha channel to an completely opaque background. This function
- * uses buffer's metadata to locate the new composite values at vaddr_dst.
+ * Using the pixel_blend function passed as parameter, this function blends
+ * all pixels from src planes into a output buffer.
+ * Information of the output buffer is in the dst_composer parameter
+ * and the source plane in the src_composer.
+ * The get_src_pixel will use the src_composer to get the respective pixel,
+ * convert, and return it as ARGB_16161616.
+ * The same is true for the dst_composer and get_dst_pixel respectively.
+ * And finally, the blend function will receive the dst_composer, src,
+ * and dst pixels. Blend, and store thre result in the output using the
+ * dst_composer buffer information.
*
* TODO: completely clear the primary plane (a = 0xff) before starting to blend
* pixel color values
*/
-static void blend(void *vaddr_dst, void *vaddr_src,
+static void blend(struct vkms_composer *src_composer,
struct vkms_composer *dst_composer,
- struct vkms_composer *src_composer,
- void (*pixel_blend)(const u8 *, u8 *))
+ struct vkms_pixel_composition_functions *funcs)
{
int i, j, j_dst, i_dst;
- int offset_src, offset_dst;
- u8 *pixel_dst, *pixel_src;
+ u64 pixel_dst, pixel_src;

int x_src = src_composer->src.x1 >> 16;
int y_src = src_composer->src.y1 >> 16;
@@ -130,80 +133,101 @@ static void blend(void *vaddr_dst, void *vaddr_src,
int y_limit = y_src + h_dst;
int x_limit = x_src + w_dst;

- for (i = y_src, i_dst = y_dst; i < y_limit; ++i) {
- for (j = x_src, j_dst = x_dst; j < x_limit; ++j) {
- offset_dst = dst_composer->offset
- + (i_dst * dst_composer->pitch)
- + (j_dst++ * dst_composer->cpp);
- offset_src = src_composer->offset
- + (i * src_composer->pitch)
- + (j * src_composer->cpp);
-
- pixel_src = (u8 *)(vaddr_src + offset_src);
- pixel_dst = (u8 *)(vaddr_dst + offset_dst);
- pixel_blend(pixel_src, pixel_dst);
- /* clearing alpha channel (0xff)*/
- pixel_dst[3] = 0xff;
+ for (i = y_src, i_dst = y_dst; i < y_limit; ++i, i_dst++) {
+ for (j = x_src, j_dst = x_dst; j < x_limit; ++j, j_dst++) {
+ pixel_src = funcs->get_src_pixel(src_composer, j, i);
+ pixel_dst = funcs->get_dst_pixel(dst_composer, j_dst, i_dst);
+
+ funcs->pixel_blend(pixel_src, pixel_dst, j_dst, i_dst,
+ dst_composer);
}
- i_dst++;
}
}

-static void compose_plane(struct vkms_composer *primary_composer,
- struct vkms_composer *plane_composer,
- void *vaddr_out)
+static u64 ((*get_pixel_fmt_transform_function(u32 format))
+ (struct vkms_composer *, int, int))
{
- struct drm_framebuffer *fb = &plane_composer->fb;
- void *vaddr;
- void (*pixel_blend)(const u8 *p_src, u8 *p_dst);
+ if (format == DRM_FORMAT_ARGB8888)
+ return &ARGB8888_to_ARGB16161616;
+ else if (format == DRM_FORMAT_ARGB16161616)
+ return &get_ARGB16161616;
+ else
+ return &XRGB8888_to_ARGB16161616;
+}

- if (WARN_ON(dma_buf_map_is_null(&primary_composer->map[0])))
- return;
+static void ((*get_pixel_blend_function(u32 format))
+ (u64, u64, int, int, struct vkms_composer *))
+{
+ if (format == DRM_FORMAT_ARGB8888)
+ return &convert_to_ARGB8888;
+ else if (format == DRM_FORMAT_ARGB16161616)
+ return &convert_to_ARGB16161616;
+ else
+ return &convert_to_XRGB8888;
+}

- vaddr = plane_composer->map[0].vaddr;
+static void compose_plane(struct vkms_composer *src_composer,
+ struct vkms_composer *dst_composer,
+ struct vkms_pixel_composition_functions *funcs)
+{
+ u32 src_format = src_composer->fb.format->format;
+ u32 dst_format = dst_composer->fb.format->format;

- if (fb->format->format == DRM_FORMAT_ARGB8888)
- pixel_blend = &alpha_blend;
- else
- pixel_blend = &x_blend;
+ funcs->get_src_pixel = get_pixel_fmt_transform_function(src_format);
+ funcs->get_dst_pixel = get_pixel_fmt_transform_function(dst_format);

- blend(vaddr_out, vaddr, primary_composer, plane_composer, pixel_blend);
+ blend(src_composer, dst_composer, funcs);
}

-static int compose_active_planes(void **vaddr_out,
- struct vkms_composer *primary_composer,
- struct vkms_crtc_state *crtc_state)
+static __le64 *compose_active_planes(struct vkms_composer *primary_composer,
+ struct vkms_crtc_state *crtc_state)
{
- struct drm_framebuffer *fb = &primary_composer->fb;
- struct drm_gem_object *gem_obj = drm_gem_fb_get_obj(fb, 0);
- const void *vaddr;
+ struct vkms_plane_state **active_planes = crtc_state->active_planes;
+ int h = drm_rect_height(&primary_composer->dst);
+ int w = drm_rect_width(&primary_composer->dst);
+ struct vkms_pixel_composition_functions funcs;
+ struct vkms_composer dst_composer;
+ __le64 *vaddr_out;
int i;

- if (!*vaddr_out) {
- *vaddr_out = kvzalloc(gem_obj->size, GFP_KERNEL);
- if (!*vaddr_out) {
- DRM_ERROR("Cannot allocate memory for output frame.");
- return -ENOMEM;
- }
- }
-
if (WARN_ON(dma_buf_map_is_null(&primary_composer->map[0])))
- return -EINVAL;
+ return NULL;

- vaddr = primary_composer->map[0].vaddr;
+ vaddr_out = kvzalloc(w * h * sizeof(__le64), GFP_KERNEL);
+ if (!vaddr_out) {
+ DRM_ERROR("Cannot allocate memory for output frame.");
+ return NULL;
+ }

- memcpy(*vaddr_out, vaddr, gem_obj->size);
+ dst_composer = get_output_vkms_composer(vaddr_out, primary_composer);
+ funcs.pixel_blend = get_pixel_blend_function(DRM_FORMAT_ARGB16161616);
+ compose_plane(active_planes[0]->composer, &dst_composer, &funcs);

/* If there are other planes besides primary, we consider the active
* planes should be in z-order and compose them associatively:
* ((primary <- overlay) <- cursor)
*/
+ funcs.pixel_blend = alpha_blend;
for (i = 1; i < crtc_state->num_active_planes; i++)
- compose_plane(primary_composer,
- crtc_state->active_planes[i]->composer,
- *vaddr_out);
+ compose_plane(active_planes[i]->composer, &dst_composer, &funcs);

- return 0;
+ return vaddr_out;
+}
+
+static void write_wb_buffer(struct vkms_writeback_job *active_wb,
+ struct vkms_composer *primary_composer,
+ __le64 *vaddr_out)
+{
+ u32 dst_fb_format = active_wb->composer.fb.format->format;
+ struct vkms_pixel_composition_functions funcs;
+ struct vkms_composer src_composer;
+
+ src_composer = get_output_vkms_composer(vaddr_out, primary_composer);
+ funcs.pixel_blend = get_pixel_blend_function(dst_fb_format);
+ active_wb->composer.src = primary_composer->src;
+ active_wb->composer.dst = primary_composer->dst;
+
+ compose_plane(&src_composer, &active_wb->composer, &funcs);
}

/**
@@ -221,14 +245,14 @@ void vkms_composer_worker(struct work_struct *work)
struct vkms_crtc_state,
composer_work);
struct drm_crtc *crtc = crtc_state->base.crtc;
+ struct vkms_writeback_job *active_wb = crtc_state->active_writeback;
struct vkms_output *out = drm_crtc_to_vkms_output(crtc);
struct vkms_composer *primary_composer = NULL;
struct vkms_plane_state *act_plane = NULL;
bool crc_pending, wb_pending;
- void *vaddr_out = NULL;
+ __le64 *vaddr_out = NULL;
u32 crc32 = 0;
u64 frame_start, frame_end;
- int ret;

spin_lock_irq(&out->composer_lock);
frame_start = crtc_state->frame_start;
@@ -256,28 +280,21 @@ void vkms_composer_worker(struct work_struct *work)
if (!primary_composer)
return;

- if (wb_pending)
- vaddr_out = crtc_state->active_writeback->data[0].vaddr;
-
- ret = compose_active_planes(&vaddr_out, primary_composer,
- crtc_state);
- if (ret) {
- if (ret == -EINVAL && !wb_pending)
- kvfree(vaddr_out);
+ vaddr_out = compose_active_planes(primary_composer, crtc_state);
+ if (!vaddr_out)
return;
- }
-
- crc32 = compute_crc(vaddr_out, primary_composer);

if (wb_pending) {
+ write_wb_buffer(active_wb, primary_composer, vaddr_out);
drm_writeback_signal_completion(&out->wb_connector, 0);
spin_lock_irq(&out->composer_lock);
crtc_state->wb_pending = false;
spin_unlock_irq(&out->composer_lock);
- } else {
- kvfree(vaddr_out);
}

+ crc32 = compute_crc(vaddr_out, primary_composer);
+ kvfree(vaddr_out);
+
/*
* The worker can fall behind the vblank hrtimer, make sure we catch up.
*/
diff --git a/drivers/gpu/drm/vkms/vkms_formats.h b/drivers/gpu/drm/vkms/vkms_formats.h
new file mode 100644
index 000000000000..60e21adbf68d
--- /dev/null
+++ b/drivers/gpu/drm/vkms/vkms_formats.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _VKMS_FORMATS_H_
+#define _VKMS_FORMATS_H_
+
+#include <drm/drm_rect.h>
+
+#define pixel_offset(composer, x, y) \
+ ((composer)->offset + ((y) * (composer)->pitch) + ((x) * (composer)->cpp))
+
+/*
+ * packed_pixels_addr - Get the pointer to pixel of a given pair of coordinates
+ *
+ * @composer: Buffer metadata
+ * @x: The x(width) coordinate of the 2D buffer
+ * @y: The y(Heigth) coordinate of the 2D buffer
+ *
+ * Takes the information stored in the composer, a pair of coordinates, and
+ * returns the address of the first color channel.
+ * This function assumes the channels are packed together, i.e. a color channel
+ * comes immediately after another. And therefore, this function doesn't work
+ * for YUV with chroma subsampling (e.g. YUV420 and NV21).
+ */
+void *packed_pixels_addr(struct vkms_composer *composer, int x, int y)
+{
+ int offset = pixel_offset(composer, x, y);
+
+ return (u8 *)composer->map[0].vaddr + offset;
+}
+
+u64 ARGB8888_to_ARGB16161616(struct vkms_composer *composer, int x, int y)
+{
+ u8 *pixel_addr = packed_pixels_addr(composer, x, y);
+
+ /*
+ * Organizes the channels in their respective positions and converts
+ * the 8 bits channel to 16.
+ * The 257 is the "conversion ratio". This number is obtained by the
+ * (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get
+ * the best color value in a color space with more possibilities.
+ * And a similar idea applies to others RGB color conversions.
+ */
+ return ((u64)pixel_addr[3] * 257) << 48 |
+ ((u64)pixel_addr[2] * 257) << 32 |
+ ((u64)pixel_addr[1] * 257) << 16 |
+ ((u64)pixel_addr[0] * 257);
+}
+
+u64 XRGB8888_to_ARGB16161616(struct vkms_composer *composer, int x, int y)
+{
+ u8 *pixel_addr = packed_pixels_addr(composer, x, y);
+
+ /*
+ * The same as the ARGB8888 but with the alpha channel as the
+ * maximum value as possible.
+ */
+ return 0xffffllu << 48 |
+ ((u64)pixel_addr[2] * 257) << 32 |
+ ((u64)pixel_addr[1] * 257) << 16 |
+ ((u64)pixel_addr[0] * 257);
+}
+
+u64 get_ARGB16161616(struct vkms_composer *composer, int x, int y)
+{
+ __le64 *pixel_addr = packed_pixels_addr(composer, x, y);
+
+ /*
+ * Because the format byte order is in little-endian and this code
+ * needs to run on big-endian machines too, we need modify
+ * the byte order from little-endian to the CPU native byte order.
+ */
+ return le64_to_cpu(*pixel_addr);
+}
+
+/*
+ * The following functions are used as blend operations. But unlike the
+ * `alpha_blend`, these functions take an ARGB16161616 pixel from the
+ * source, convert it to a specific format, and store it in the destination.
+ *
+ * They are used in the `compose_active_planes` and `write_wb_buffer` to
+ * copy and convert one pixel from/to the output buffer to/from
+ * another buffer (e.g. writeback buffer, primary plane buffer).
+ */
+
+void convert_to_ARGB8888(u64 argb_src1, u64 argb_src2, int x, int y,
+ struct vkms_composer *dst_composer)
+{
+ u8 *pixel_addr = packed_pixels_addr(dst_composer, x, y);
+
+ /*
+ * This sequence below is important because the format's byte order is
+ * in little-endian. In the case of the ARGB8888 the memory is
+ * organized this way:
+ *
+ * | Addr | = blue channel
+ * | Addr + 1 | = green channel
+ * | Addr + 2 | = Red channel
+ * | Addr + 3 | = Alpha channel
+ */
+ pixel_addr[0] = DIV_ROUND_UP(argb_src1 & 0xffffllu, 257);
+ pixel_addr[1] = DIV_ROUND_UP((argb_src1 & (0xffffllu << 16)) >> 16, 257);
+ pixel_addr[2] = DIV_ROUND_UP((argb_src1 & (0xffffllu << 32)) >> 32, 257);
+ pixel_addr[3] = DIV_ROUND_UP((argb_src1 & (0xffffllu << 48)) >> 48, 257);
+}
+
+void convert_to_XRGB8888(u64 argb_src1, u64 argb_src2, int x, int y,
+ struct vkms_composer *dst_composer)
+{
+ u8 *pixel_addr = packed_pixels_addr(dst_composer, x, y);
+
+ pixel_addr[0] = DIV_ROUND_UP(argb_src1 & 0xffffllu, 257);
+ pixel_addr[1] = DIV_ROUND_UP((argb_src1 & (0xffffllu << 16)) >> 16, 257);
+ pixel_addr[2] = DIV_ROUND_UP((argb_src1 & (0xffffllu << 32)) >> 32, 257);
+ pixel_addr[3] = 0xff;
+}
+
+void convert_to_ARGB16161616(u64 argb_src1, u64 argb_src2, int x, int y,
+ struct vkms_composer *dst_composer)
+{
+ __le64 *pixel_addr = packed_pixels_addr(dst_composer, x, y);
+
+ *pixel_addr = cpu_to_le64(argb_src1);
+}
+
+#endif /* _VKMS_FORMATS_H_ */
--
2.30.2