[PATCH 5.9 005/133] drm/i915/gem: Always test execution status on closing the context

From: Greg Kroah-Hartman
Date: Mon Nov 09 2020 - 08:26:12 EST


From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

commit 651dabe27f9638f569f6a794f9d3cc1889cd315e upstream.

Verify that if a context is active at the time it is closed, that it is
either persistent and preemptible (with hangcheck running) or it shall
be removed from execution.

Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
Testcase: igt/gem_ctx_persistence/heartbeat-close
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx> # v5.7+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Acked-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Link: https://patchwork.freedesktop.org/patch/msgid/20200928221510.26044-3-chris@xxxxxxxxxxxxxxxxxx
(cherry picked from commit d3bb2f9b5ee66d5e000293edd6b6575e59d11db9)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 48 +++++-----------------------
1 file changed, 10 insertions(+), 38 deletions(-)

--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -390,24 +390,6 @@ __context_engines_static(const struct i9
return rcu_dereference_protected(ctx->engines, true);
}

-static bool __reset_engine(struct intel_engine_cs *engine)
-{
- struct intel_gt *gt = engine->gt;
- bool success = false;
-
- if (!intel_has_reset_engine(gt))
- return false;
-
- if (!test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags)) {
- success = intel_engine_reset(engine, NULL) == 0;
- clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags);
- }
-
- return success;
-}
-
static void __reset_context(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -431,12 +413,7 @@ static bool __cancel_engine(struct intel
* kill the banned context, we fallback to doing a local reset
* instead.
*/
- if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) &&
- !intel_engine_pulse(engine))
- return true;
-
- /* If we are unable to send a pulse, try resetting this engine. */
- return __reset_engine(engine);
+ return intel_engine_pulse(engine) == 0;
}

static bool
@@ -493,7 +470,7 @@ static struct intel_engine_cs *active_en
return engine;
}

-static void kill_engines(struct i915_gem_engines *engines)
+static void kill_engines(struct i915_gem_engines *engines, bool ban)
{
struct i915_gem_engines_iter it;
struct intel_context *ce;
@@ -508,7 +485,7 @@ static void kill_engines(struct i915_gem
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;

- if (intel_context_set_banned(ce))
+ if (ban && intel_context_set_banned(ce))
continue;

/*
@@ -521,7 +498,7 @@ static void kill_engines(struct i915_gem
engine = active_engine(ce);

/* First attempt to gracefully cancel the context */
- if (engine && !__cancel_engine(engine))
+ if (engine && !__cancel_engine(engine) && ban)
/*
* If we are unable to send a preemptive pulse to bump
* the context from the GPU, we have to resort to a full
@@ -531,8 +508,10 @@ static void kill_engines(struct i915_gem
}
}

-static void kill_stale_engines(struct i915_gem_context *ctx)
+static void kill_context(struct i915_gem_context *ctx)
{
+ bool ban = (!i915_gem_context_is_persistent(ctx) ||
+ !ctx->i915->params.enable_hangcheck);
struct i915_gem_engines *pos, *next;

spin_lock_irq(&ctx->stale.lock);
@@ -545,7 +524,7 @@ static void kill_stale_engines(struct i9

spin_unlock_irq(&ctx->stale.lock);

- kill_engines(pos);
+ kill_engines(pos, ban);

spin_lock_irq(&ctx->stale.lock);
GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -557,11 +536,6 @@ static void kill_stale_engines(struct i9
spin_unlock_irq(&ctx->stale.lock);
}

-static void kill_context(struct i915_gem_context *ctx)
-{
- kill_stale_engines(ctx);
-}
-
static void engines_idle_release(struct i915_gem_context *ctx,
struct i915_gem_engines *engines)
{
@@ -596,7 +570,7 @@ static void engines_idle_release(struct

kill:
if (list_empty(&engines->link)) /* raced, already closed */
- kill_engines(engines);
+ kill_engines(engines, true);

i915_sw_fence_commit(&engines->fence);
}
@@ -654,9 +628,7 @@ static void context_close(struct i915_ge
* case we opt to forcibly kill off all remaining requests on
* context close.
*/
- if (!i915_gem_context_is_persistent(ctx) ||
- !ctx->i915->params.enable_hangcheck)
- kill_context(ctx);
+ kill_context(ctx);

i915_gem_context_put(ctx);
}