[PATCH 03/32] perf/x86/intel/cqm: remove all code for rotation of RMIDs

From: David Carrillo-Cisneros
Date: Fri Apr 29 2016 - 00:46:28 EST


In preparation for future patches that will introduce a per-package
rotation of RMIDs.

The new rotation logic follows the same ideas as the present rotation
logic being removed but takes advantage of the per-package RMID design
and a more detailed bookkeeping to guarantee to meet user SLOs.
It also avoid IPIs, and does not keep an unused rotation RMID in some
cases (as present version does).

Reviewed-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cqm.c | 371 --------------------------------------------
1 file changed, 371 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index a3fde49..3c1e247 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -312,42 +312,6 @@ struct rmid_read {
static void __intel_cqm_event_count(void *info);

/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
- struct perf_event *event;
- struct list_head *head = &group->hw.cqm_group_entry;
- u32 old_rmid = group->hw.cqm_rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- /*
- * If our RMID is being deallocated, perform a read now.
- */
- if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
- struct rmid_read rr = {
- .value = ATOMIC64_INIT(0),
- .rmid = old_rmid,
- };
-
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
- &rr, 1);
- local64_set(&group->count, atomic64_read(&rr.value));
- }
-
- raw_spin_lock_irq(&cache_lock);
-
- group->hw.cqm_rmid = rmid;
- list_for_each_entry(event, head, hw.cqm_group_entry)
- event->hw.cqm_rmid = rmid;
-
- raw_spin_unlock_irq(&cache_lock);
-
- return old_rmid;
-}
-
-/*
* If we fail to assign a new RMID for intel_cqm_rotation_rmid because
* cachelines are still tagged with RMIDs in limbo, we progressively
* increment the threshold until we find an RMID in limbo with <=
@@ -364,44 +328,6 @@ static unsigned int __intel_cqm_threshold;
static unsigned int __intel_cqm_max_threshold;

/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
- struct cqm_rmid_entry *entry;
-
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- if (entry->state != RMID_AVAILABLE)
- break;
-
- if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
- entry->state = RMID_DIRTY;
- }
-}
-
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
- struct perf_event *leader, *event;
-
- lockdep_assert_held(&cache_mutex);
-
- leader = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
- event = leader;
-
- list_for_each_entry_continue(event, &cache_groups,
- hw.cqm_groups_entry) {
- if (__rmid_valid(event->hw.cqm_rmid))
- continue;
-
- intel_cqm_xchg_rmid(event, rmid);
- return true;
- }
-
- return false;
-}
-
-/*
* Initially use this constant for both the limbo queue time and the
* rotation timer interval, pmu::hrtimer_interval_ms.
*
@@ -411,291 +337,8 @@ static bool intel_cqm_sched_in_event(u32 rmid)
*/
#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */

-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
- struct cqm_rmid_entry *entry, *tmp;
-
- lockdep_assert_held(&cache_mutex);
-
- *available = 0;
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- unsigned long min_queue_time;
- unsigned long now = jiffies;
-
- /*
- * We hold RMIDs placed into limbo for a minimum queue
- * time. Before the minimum queue time has elapsed we do
- * not recycle RMIDs.
- *
- * The reasoning is that until a sufficient time has
- * passed since we stopped using an RMID, any RMID
- * placed onto the limbo list will likely still have
- * data tagged in the cache, which means we'll probably
- * fail to recycle it anyway.
- *
- * We can save ourselves an expensive IPI by skipping
- * any RMIDs that have not been queued for the minimum
- * time.
- */
- min_queue_time = entry->queue_time +
- msecs_to_jiffies(__rmid_queue_time_ms);
-
- if (time_after(min_queue_time, now))
- break;
-
- entry->state = RMID_AVAILABLE;
- (*available)++;
- }
-
- /*
- * Fast return if none of the RMIDs on the limbo list have been
- * sitting on the queue for the minimum queue time.
- */
- if (!*available)
- return false;
-
- /*
- * Test whether an RMID is free for each package.
- */
- on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
- list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
- /*
- * Exhausted all RMIDs that have waited min queue time.
- */
- if (entry->state == RMID_YOUNG)
- break;
-
- if (entry->state == RMID_DIRTY)
- continue;
-
- list_del(&entry->list); /* remove from limbo */
-
- /*
- * The rotation RMID gets priority if it's
- * currently invalid. In which case, skip adding
- * the RMID to the the free lru.
- */
- if (!__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_rotation_rmid = entry->rmid;
- continue;
- }
-
- if (intel_cqm_sched_in_event(entry->rmid))
- continue;
-
- /*
- * Otherwise place it onto the free list.
- */
- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
-
-
- return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
- struct perf_event *rotor;
- u32 rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- rotor = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
-
- /*
- * The group at the front of the list should always have a valid
- * RMID. If it doesn't then no groups have RMIDs assigned and we
- * don't need to rotate the list.
- */
- if (next == rotor)
- return;
-
- rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
- __put_rmid(rmid);
-
- list_rotate_left(&cache_groups);
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
- struct perf_event *group, *start = NULL;
- unsigned int threshold_limit;
- unsigned int nr_needed = 0;
- unsigned int nr_available;
- bool rotated = false;
-
- mutex_lock(&cache_mutex);
-
-again:
- /*
- * Fast path through this function if there are no groups and no
- * RMIDs that need cleaning.
- */
- if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
- if (!__rmid_valid(group->hw.cqm_rmid)) {
- if (!start)
- start = group;
- nr_needed++;
- }
- }
-
- /*
- * We have some event groups, but they all have RMIDs assigned
- * and no RMIDs need cleaning.
- */
- if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- if (!nr_needed)
- goto stabilize;
-
- /*
- * We force deallocate the rmid of the group at the head of
- * cache_groups. The first event group without an RMID then gets
- * assigned intel_cqm_rotation_rmid. This ensures we always make
- * forward progress.
- *
- * Rotate the cache_groups list so the previous head is now the
- * tail.
- */
- __intel_cqm_pick_and_rotate(start);
-
- /*
- * If the rotation is going to succeed, reduce the threshold so
- * that we don't needlessly reuse dirty RMIDs.
- */
- if (__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
- intel_cqm_rotation_rmid = __get_rmid();
-
- if (__intel_cqm_threshold)
- __intel_cqm_threshold--;
- }
-
- rotated = true;
-
-stabilize:
- /*
- * We now need to stablize the RMID we freed above (if any) to
- * ensure that the next time we rotate we have an RMID with zero
- * occupancy value.
- *
- * Alternatively, if we didn't need to perform any rotation,
- * we'll have a bunch of RMIDs in limbo that need stabilizing.
- */
- threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
- while (intel_cqm_rmid_stabilize(&nr_available) &&
- __intel_cqm_threshold < threshold_limit) {
- unsigned int steal_limit;
-
- /*
- * Don't spin if nobody is actively waiting for an RMID,
- * the rotation worker will be kicked as soon as an
- * event needs an RMID anyway.
- */
- if (!nr_needed)
- break;
-
- /* Allow max 25% of RMIDs to be in limbo. */
- steal_limit = (cqm_max_rmid + 1) / 4;
-
- /*
- * We failed to stabilize any RMIDs so our rotation
- * logic is now stuck. In order to make forward progress
- * we have a few options:
- *
- * 1. rotate ("steal") another RMID
- * 2. increase the threshold
- * 3. do nothing
- *
- * We do both of 1. and 2. until we hit the steal limit.
- *
- * The steal limit prevents all RMIDs ending up on the
- * limbo list. This can happen if every RMID has a
- * non-zero occupancy above threshold_limit, and the
- * occupancy values aren't dropping fast enough.
- *
- * Note that there is prioritisation at work here - we'd
- * rather increase the number of RMIDs on the limbo list
- * than increase the threshold, because increasing the
- * threshold skews the event data (because we reuse
- * dirty RMIDs) - threshold bumps are a last resort.
- */
- if (nr_available < steal_limit)
- goto again;
-
- __intel_cqm_threshold++;
- }
-
-out:
- mutex_unlock(&cache_mutex);
- return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
static struct pmu intel_cqm_pmu;

-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
- unsigned long delay;
-
- __intel_cqm_rmid_rotate();
-
- delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
- schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
/*
* Find a group and setup RMID.
*
@@ -937,7 +580,6 @@ static void intel_cqm_event_destroy(struct perf_event *event)
static int intel_cqm_event_init(struct perf_event *event)
{
struct perf_event *group = NULL;
- bool rotate = false;

if (event->attr.type != intel_cqm_pmu.type)
return -ENOENT;
@@ -971,23 +613,10 @@ static int intel_cqm_event_init(struct perf_event *event)
} else {
list_add_tail(&event->hw.cqm_groups_entry,
&cache_groups);
-
- /*
- * All RMIDs are either in use or have recently been
- * used. Kick the rotation worker to clean/free some.
- *
- * We only do this for the group leader, rather than for
- * every event in a group to save on needless work.
- */
- if (!__rmid_valid(event->hw.cqm_rmid))
- rotate = true;
}

mutex_unlock(&cache_mutex);

- if (rotate)
- schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
return 0;
}

--
2.8.0.rc3.226.g39d4020