Creating cyclecounter and lock member in timecounter structure [ Was Re: [RFC 1/4] drm/i915/perf: Add support to correlate GPU timestamp with system time]

From: Sagar Arun Kamble
Date: Thu Nov 23 2017 - 02:34:22 EST


Hi,

We needed inputs on possible optimization that can be done to timecounter/cyclecounter structures/usage.
This mail is in response to review of patch https://patchwork.freedesktop.org/patch/188448/.

As Chris's observation below, about dozen of timecounter users in the kernel have below structures
defined individually:

spinlock_t lock;
struct cyclecounter cc;
struct timecounter tc;

Can we move lock and cc to tc? That way it will be convenient.
Also it will allow unifying the locking/overflow watchdog handling across all drivers.

Please suggest.

Thanks
Sagar

On 11/15/2017 5:55 PM, Chris Wilson wrote:
Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
#include <drm/drmP.h>
#include <drm/intel-gtt.h>
@@ -2149,6 +2150,14 @@ struct i915_perf_stream {
* @oa_config: The OA configuration used by the stream.
*/
struct i915_oa_config *oa_config;
+
+ /**
+ * System time correlation variables.
+ */
+ struct cyclecounter cc;
+ spinlock_t systime_lock;
+ struct timespec64 start_systime;
+ struct timecounter tc;
This pattern is repeated a lot by struct timecounter users. (I'm still
trying to understand why the common case is not catered for by a
convenience timecounter api.)

};
/**
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 00be015..72ddc34 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/clocksource.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
@@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
}
/**
+ * i915_cyclecounter_read - read raw cycle/timestamp counter
+ * @cc: cyclecounter structure
+ */
+static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
+{
+ struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc);
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ u64 ts_count;
+
+ intel_runtime_pm_get(dev_priv);
+ ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
+ GEN7_TIMESTAMP_UDW);
+ intel_runtime_pm_put(dev_priv);
+
+ return ts_count;
+}
+
+static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream)
+{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
+ struct cyclecounter *cc = &stream->cc;
+ u32 maxsec;
+
+ cc->read = i915_cyclecounter_read;
+ cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
+ maxsec = cc->mask / cs_ts_freq;
+
+ clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
+ NSEC_PER_SEC, maxsec);
+}
+
+static void i915_perf_init_timecounter(struct i915_perf_stream *stream)
+{
+#define SYSTIME_START_OFFSET 350000 /* Counter read takes about 350us */
+ unsigned long flags;
+ u64 ns;
+
+ i915_perf_init_cyclecounter(stream);
+ spin_lock_init(&stream->systime_lock);
+
+ getnstimeofday64(&stream->start_systime);
+ ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET;
Use ktime directly. Or else Arnd will be back with a patch to fix it.
(All non-ktime interfaces are effectively deprecated; obsolete for
drivers.)

+ spin_lock_irqsave(&stream->systime_lock, flags);
+ timecounter_init(&stream->tc, &stream->cc, ns);
+ spin_unlock_irqrestore(&stream->systime_lock, flags);
+}
+
+/**
* i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
* @stream: A disabled i915 perf stream
*
@@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
/* Allow stream->ops->enable() to refer to this */
stream->enabled = true;
+ i915_perf_init_timecounter(stream);
+
if (stream->ops->enable)
stream->ops->enable(stream);
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cfdf4f8..e7e6966 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8882,6 +8882,12 @@ enum skl_power_gate {
/* Gen4+ Timestamp and Pipe Frame time stamp registers */
#define GEN4_TIMESTAMP _MMIO(0x2358)
+#define GEN7_TIMESTAMP_UDW _MMIO(0x235C)
+#define PRE_GEN7_TIMESTAMP_WIDTH 32
+#define GEN7_TIMESTAMP_WIDTH 36
+#define CS_TIMESTAMP_WIDTH(dev_priv) \
+ (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
+ GEN7_TIMESTAMP_WIDTH)
s/PRE_GEN7/GEN4/ would be consistent.
If you really want to add support for earlier, I9XX_.

Ok. I can accept the justification, and we are not the only ones who do
the cyclecounter -> timecounter correction like this.
-Chris