Re: [PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

From: Dmitry Osipenko
Date: Tue Nov 07 2017 - 10:29:42 EST


On 07.11.2017 16:11, Mikko Perttunen wrote:
> On 05.11.2017 19:14, Dmitry Osipenko wrote:
>> On 05.11.2017 14:01, Mikko Perttunen wrote:
>>> Add an option to host1x_channel_request to interruptibly wait for a
>>> free channel. This allows IOCTLs that acquire a channel to block
>>> the userspace.
>>>
>>
>> Wouldn't it be more optimal to request channel and block after job's pining,
>> when all patching and checks are completed? Note that right now we have locking
>> around submission in DRM, which I suppose should go away by making locking fine
>> grained.
>
> That would be possible, but I don't think it should matter much since contention
> here should not be the common case.
>
>>
>> Or maybe it would be more optimal to just iterate over channels, like I
>> suggested before [0]?
>
> Somehow I hadn't noticed this before, but this would break the invariant of
> having one client/class per channel.
>

Yes, currently there is a weak relation of channel and clients device, but seems
channels device is only used for printing dev_* messages and device could be
borrowed from the channels job. I don't see any real point of hardwiring channel
to a specific device or client.

> In general since we haven't seen any issues downstream with the model
> implemented here, I'd like to try to go with this and if we have problems with
> channel allocation then we could revisit.
>

I'd prefer to collect some real numbers first, will test it with our grate /
mesa stuff. Also, we should have a host1x_test, maybe something similar to
submission perf test but using multiple contexts.

>
>>
>> [0]
>> https://github.com/cyndis/linux/commit/9e6d87f40afb01fbe13ba65c73cb617bdfcd80b2#commitcomment-25012960
>>
>>
>>> Signed-off-by: Mikko Perttunen <mperttunen@xxxxxxxxxx>
>>> ---
>>> Âdrivers/gpu/drm/tegra/drm.c | 9 +++++----
>>> Âdrivers/gpu/drm/tegra/gr2d.c |Â 6 +++---
>>> Âdrivers/gpu/drm/tegra/gr3d.c |Â 6 +++---
>>> Âdrivers/gpu/host1x/channel.c | 40 ++++++++++++++++++++++++++++++----------
>>> Âdrivers/gpu/host1x/channel.h |Â 1 +
>>> Âinclude/linux/host1x.hÂÂÂÂÂÂ |Â 2 +-
>>> Â6 files changed, 43 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
>>> index 658bc8814f38..19f77c1a76c0 100644
>>> --- a/drivers/gpu/drm/tegra/drm.c
>>> +++ b/drivers/gpu/drm/tegra/drm.c
>>> @@ -389,7 +389,8 @@ static int host1x_waitchk_copy_from_user(struct
>>> host1x_waitchk *dest,
>>> Â * Request a free hardware host1x channel for this user context, or if the
>>> Â * context already has one, bump its refcount.
>>> Â *
>>> - * Returns 0 on success, or -EBUSY if there were no free hardware channels.
>>> + * Returns 0 on success, -EINTR if wait for a free channel was interrupted,
>>> + * or other error.
>>> Â */
>>> Âint tegra_drm_context_get_channel(struct tegra_drm_context *context)
>>> Â{
>>> @@ -398,10 +399,10 @@ int tegra_drm_context_get_channel(struct
>>> tegra_drm_context *context)
>>> ÂÂÂÂ mutex_lock(&context->lock);
>>>
>>> ÂÂÂÂ if (context->pending_jobs == 0) {
>>> -ÂÂÂÂÂÂÂ context->channel = host1x_channel_request(client->dev);
>>> -ÂÂÂÂÂÂÂ if (!context->channel) {
>>> +ÂÂÂÂÂÂÂ context->channel = host1x_channel_request(client->dev, true);
>>> +ÂÂÂÂÂÂÂ if (IS_ERR(context->channel)) {
>>> ÂÂÂÂÂÂÂÂÂÂÂÂ mutex_unlock(&context->lock);
>>> -ÂÂÂÂÂÂÂÂÂÂÂ return -EBUSY;
>>> +ÂÂÂÂÂÂÂÂÂÂÂ return PTR_ERR(context->channel);
>>> ÂÂÂÂÂÂÂÂ }
>>> ÂÂÂÂ }
>>>
>>> diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
>>> index 3db3bcac48b9..c1853402f69b 100644
>>> --- a/drivers/gpu/drm/tegra/gr2d.c
>>> +++ b/drivers/gpu/drm/tegra/gr2d.c
>>> @@ -32,9 +32,9 @@ static int gr2d_init(struct host1x_client *client)
>>> ÂÂÂÂ unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
>>> ÂÂÂÂ struct gr2d *gr2d = to_gr2d(drm);
>>>
>>> -ÂÂÂ gr2d->channel = host1x_channel_request(client->dev);
>>> -ÂÂÂ if (!gr2d->channel)
>>> -ÂÂÂÂÂÂÂ return -ENOMEM;
>>> +ÂÂÂ gr2d->channel = host1x_channel_request(client->dev, false);
>>> +ÂÂÂ if (IS_ERR(gr2d->channel))
>>> +ÂÂÂÂÂÂÂ return PTR_ERR(gr2d->channel);
>>>
>>> ÂÂÂÂ client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
>>> ÂÂÂÂ if (!client->syncpts[0]) {
>>> diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
>>> index 279438342c8c..793a91d577cb 100644
>>> --- a/drivers/gpu/drm/tegra/gr3d.c
>>> +++ b/drivers/gpu/drm/tegra/gr3d.c
>>> @@ -42,9 +42,9 @@ static int gr3d_init(struct host1x_client *client)
>>> ÂÂÂÂ unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
>>> ÂÂÂÂ struct gr3d *gr3d = to_gr3d(drm);
>>>
>>> -ÂÂÂ gr3d->channel = host1x_channel_request(client->dev);
>>> -ÂÂÂ if (!gr3d->channel)
>>> -ÂÂÂÂÂÂÂ return -ENOMEM;
>>> +ÂÂÂ gr3d->channel = host1x_channel_request(client->dev, false);
>>> +ÂÂÂ if (IS_ERR(gr3d->channel))
>>> +ÂÂÂÂÂÂÂ return PTR_ERR(gr3d->channel);
>>>
>>> ÂÂÂÂ client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
>>> ÂÂÂÂ if (!client->syncpts[0]) {
>>> diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
>>> index 9d8cad12f9d8..eebcd51261df 100644
>>> --- a/drivers/gpu/host1x/channel.c
>>> +++ b/drivers/gpu/host1x/channel.c
>>> @@ -43,6 +43,7 @@ int host1x_channel_list_init(struct host1x_channel_list
>>> *chlist,
>>> ÂÂÂÂ bitmap_zero(chlist->allocated_channels, num_channels);
>>>
>>> ÂÂÂÂ mutex_init(&chlist->lock);
>>> +ÂÂÂ sema_init(&chlist->sema, num_channels);
>>>
>>> ÂÂÂÂ return 0;
>>> Â}
>>> @@ -99,6 +100,8 @@ static void release_channel(struct kref *kref)
>>> ÂÂÂÂ host1x_cdma_deinit(&channel->cdma);
>>>
>>> ÂÂÂÂ clear_bit(channel->id, chlist->allocated_channels);
>>> +
>>> +ÂÂÂ up(&chlist->sema);
>>> Â}
>>>
>>> Âvoid host1x_channel_put(struct host1x_channel *channel)
>>> @@ -107,19 +110,30 @@ void host1x_channel_put(struct host1x_channel *channel)
>>> Â}
>>> ÂEXPORT_SYMBOL(host1x_channel_put);
>>>
>>> -static struct host1x_channel *acquire_unused_channel(struct host1x *host)
>>> +static struct host1x_channel *acquire_unused_channel(struct host1x *host,
>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool wait)
>>> Â{
>>> ÂÂÂÂ struct host1x_channel_list *chlist = &host->channel_list;
>>> ÂÂÂÂ unsigned int max_channels = host->info->nb_channels;
>>> ÂÂÂÂ unsigned int index;
>>> +ÂÂÂ int err;
>>> +
>>> +ÂÂÂ if (wait) {
>>> +ÂÂÂÂÂÂÂ err = down_interruptible(&chlist->sema);
>>> +ÂÂÂÂÂÂÂ if (err)
>>> +ÂÂÂÂÂÂÂÂÂÂÂ return ERR_PTR(err);
>>> +ÂÂÂ } else {
>>> +ÂÂÂÂÂÂÂ if (down_trylock(&chlist->sema))
>>> +ÂÂÂÂÂÂÂÂÂÂÂ return ERR_PTR(-EBUSY);
>>> +ÂÂÂ }
>>>
>>> ÂÂÂÂ mutex_lock(&chlist->lock);
>>>
>>> ÂÂÂÂ index = find_first_zero_bit(chlist->allocated_channels, max_channels);
>>> -ÂÂÂ if (index >= max_channels) {
>>> +ÂÂÂ if (WARN(index >= max_channels, "failed to find free channel")) {
>>> ÂÂÂÂÂÂÂÂ mutex_unlock(&chlist->lock);
>>> ÂÂÂÂÂÂÂÂ dev_err(host->dev, "failed to find free channel\n");
>>> -ÂÂÂÂÂÂÂ return NULL;
>>> +ÂÂÂÂÂÂÂ return ERR_PTR(-EBUSY);
>>> ÂÂÂÂ }
>>>
>>> ÂÂÂÂ chlist->channels[index].id = index;
>>> @@ -134,20 +148,26 @@ static struct host1x_channel
>>> *acquire_unused_channel(struct host1x *host)
>>> Â/**
>>> Â * host1x_channel_request() - Allocate a channel
>>> Â * @device: Host1x unit this channel will be used to send commands to
>>> + * @wait: Whether to wait for a free channels if all are reserved
>>> + *
>>> + * Allocates a new host1x channel for @device. If all channels are in use,
>>> + * and @wait is true, does an interruptible wait until one is available.
>>> Â *
>>> - * Allocates a new host1x channel for @device. May return NULL if CDMA
>>> - * initialization fails.
>>> + * If a channel was acquired, returns a pointer to it. Otherwise returns
>>> + * an error pointer with -EINTR if the wait was interrupted, -EBUSY
>>> + * if a channel could not be acquired or another error code if channel
>>> + * initialization failed.
>>> Â */
>>> -struct host1x_channel *host1x_channel_request(struct device *dev)
>>> +struct host1x_channel *host1x_channel_request(struct device *dev, bool wait)
>>> Â{
>>> ÂÂÂÂ struct host1x *host = dev_get_drvdata(dev->parent);
>>> ÂÂÂÂ struct host1x_channel_list *chlist = &host->channel_list;
>>> ÂÂÂÂ struct host1x_channel *channel;
>>> ÂÂÂÂ int err;
>>>
>>> -ÂÂÂ channel = acquire_unused_channel(host);
>>> -ÂÂÂ if (!channel)
>>> -ÂÂÂÂÂÂÂ return NULL;
>>> +ÂÂÂ channel = acquire_unused_channel(host, wait);
>>> +ÂÂÂ if (IS_ERR(channel))
>>> +ÂÂÂÂÂÂÂ return channel;
>>>
>>> ÂÂÂÂ kref_init(&channel->refcount);
>>> ÂÂÂÂ mutex_init(&channel->submitlock);
>>> @@ -168,6 +188,6 @@ struct host1x_channel *host1x_channel_request(struct
>>> device *dev)
>>>
>>> ÂÂÂÂ dev_err(dev, "failed to initialize channel\n");
>>>
>>> -ÂÂÂ return NULL;
>>> +ÂÂÂ return ERR_PTR(err);
>>> Â}
>>> ÂEXPORT_SYMBOL(host1x_channel_request);
>>> diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
>>> index e68a8ae9a670..1f5cf8029b62 100644
>>> --- a/drivers/gpu/host1x/channel.h
>>> +++ b/drivers/gpu/host1x/channel.h
>>> @@ -31,6 +31,7 @@ struct host1x_channel_list {
>>> ÂÂÂÂ struct host1x_channel *channels;
>>>
>>> ÂÂÂÂ struct mutex lock;
>>> +ÂÂÂ struct semaphore sema;
>>> ÂÂÂÂ unsigned long *allocated_channels;
>>> Â};
>>>
>>> diff --git a/include/linux/host1x.h b/include/linux/host1x.h
>>> index f931d28a68ff..2a34905d4408 100644
>>> --- a/include/linux/host1x.h
>>> +++ b/include/linux/host1x.h
>>> @@ -171,7 +171,7 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
>>> Âstruct host1x_channel;
>>> Âstruct host1x_job;
>>>
>>> -struct host1x_channel *host1x_channel_request(struct device *dev);
>>> +struct host1x_channel *host1x_channel_request(struct device *dev, bool wait);
>>> Âstruct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
>>> Âvoid host1x_channel_put(struct host1x_channel *channel);
>>> Âint host1x_job_submit(struct host1x_job *job);
>>>
>>