Re: [PATCH 2/3] perf/arm-cmn: Rework DTC counters (again)

From: Will Deacon
Date: Mon Oct 23 2023 - 06:27:02 EST


On Fri, Oct 20, 2023 at 06:51:26PM +0100, Robin Murphy wrote:
> The bitmap-based scheme for tracking DTC counter usage turns out to be a
> complete dead-end for its imagined purpose, since by the time we have to
> keep track of a per-DTC counter index anyway, we already have enough
> information to make the bitmap itself redundant. Revert the remains of
> it back to almost the original scheme, but now expanded to track per-DTC
> indices, in preparation for making use of them in anger.
>
> Note that since cycle count events always use a dedicated counter on a
> single DTC, we reuse the field to encode their DTC index directly.
>
> Signed-off-by: Robin Murphy <robin.murphy@xxxxxxx>
> ---
> drivers/perf/arm-cmn.c | 126 +++++++++++++++++++++--------------------
> 1 file changed, 64 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index f1ac8d0cdb3b..675f1638013e 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -281,16 +281,13 @@ struct arm_cmn_node {
> u16 id, logid;
> enum cmn_node_type type;
>
> - int dtm;
> - union {
> - /* DN/HN-F/CXHA */
> - struct {
> - u8 val : 4;
> - u8 count : 4;
> - } occupid[SEL_MAX];
> - /* XP */
> - u8 dtc;
> - };
> + u8 dtm;
> + s8 dtc;
> + /* DN/HN-F/CXHA */
> + struct {
> + u8 val : 4;
> + u8 count : 4;
> + } occupid[SEL_MAX];
> union {
> u8 event[4];
> __le32 event_sel;
> @@ -540,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>
> seq_puts(s, "\n |");
> for (x = 0; x < cmn->mesh_x; x++) {
> - u8 dtc = cmn->xps[xp_base + x].dtc;
> + s8 dtc = cmn->xps[xp_base + x].dtc;
>
> - if (dtc & (dtc - 1))
> + if (dtc < 0)
> seq_puts(s, " DTC ?? |");
> else
> - seq_printf(s, " DTC %ld |", __ffs(dtc));
> + seq_printf(s, " DTC %d |", dtc);
> }
> seq_puts(s, "\n |");
> for (x = 0; x < cmn->mesh_x; x++)
> @@ -589,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
> struct arm_cmn_hw_event {
> struct arm_cmn_node *dn;
> u64 dtm_idx[4];
> - unsigned int dtc_idx;
> - u8 dtcs_used;
> + s8 dtc_idx[CMN_MAX_DTCS];
> u8 num_dns;
> u8 dtm_offset;
> bool wide_sel;
> @@ -600,6 +596,10 @@ struct arm_cmn_hw_event {
> #define for_each_hw_dn(hw, dn, i) \
> for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
>
> +/* @i is the DTC number, @idx is the counter index on that DTC */
> +#define for_each_hw_dtc_idx(hw, i, idx) \
> + for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)

This macro is pretty hideous ;) The kbuild robot complained as well, but
given that it's internal to the driver and it does make the callsites
quite a bit simpler, I'm inclined to stick with it for now. At least, I
couldn't come up with something else which was just as succinct.

Will