Re: [PATCH 2/3] perf tools: Make the JSON parser more conformant when in strict mode

From: Jiri Olsa
Date: Thu Oct 07 2021 - 13:52:41 EST


On Thu, Oct 07, 2021 at 12:05:41PM +0100, James Clark wrote:
> Return an error when a trailing comma is found or a new item is
> encountered before a comma or an opening brace. This ensures that the
> perf json files conform more closely to the spec at https://www.json.org
>
> Signed-off-by: James Clark <james.clark@xxxxxxx>
> ---
> tools/perf/pmu-events/jsmn.c | 42 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 40 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c
> index 11d1fa18bfa5..8124d2d3ff0c 100644
> --- a/tools/perf/pmu-events/jsmn.c
> +++ b/tools/perf/pmu-events/jsmn.c
> @@ -176,6 +176,14 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> jsmnerr_t r;
> int i;
> jsmntok_t *token;
> +#ifdef JSMN_STRICT

I might have missed some discussion on this, but do we need the
JSMN_STRICT define, if you enable it in the next patch?
why can't we be more strict by default.. do you plan to disable
it in future?

thanks,
jirka

> + /*
> + * Keeps track of whether a new object/list/primitive is expected. New items are only
> + * allowed after an opening brace, comma or colon. A closing brace after a comma is not
> + * valid JSON.
> + */
> + int expecting_item = 1;
> +#endif
>
> for (; parser->pos < len; parser->pos++) {
> char c;
> @@ -185,6 +193,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> switch (c) {
> case '{':
> case '[':
> +#ifdef JSMN_STRICT
> + if (!expecting_item)
> + return JSMN_ERROR_INVAL;
> +#endif
> token = jsmn_alloc_token(parser, tokens, num_tokens);
> if (token == NULL)
> return JSMN_ERROR_NOMEM;
> @@ -196,6 +208,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> break;
> case '}':
> case ']':
> +#ifdef JSMN_STRICT
> + if (expecting_item)
> + return JSMN_ERROR_INVAL;
> +#endif
> type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
> for (i = parser->toknext - 1; i >= 0; i--) {
> token = &tokens[i];
> @@ -219,6 +235,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> }
> break;
> case '\"':
> +#ifdef JSMN_STRICT
> + if (!expecting_item)
> + return JSMN_ERROR_INVAL;
> + expecting_item = 0;
> +#endif
> r = jsmn_parse_string(parser, js, len, tokens,
> num_tokens);
> if (r < 0)
> @@ -229,11 +250,15 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> case '\t':
> case '\r':
> case '\n':
> - case ':':
> - case ',':
> case ' ':
> break;
> #ifdef JSMN_STRICT
> + case ':':
> + case ',':
> + if (expecting_item)
> + return JSMN_ERROR_INVAL;
> + expecting_item = 1;
> + break;
> /*
> * In strict mode primitives are:
> * numbers and booleans.
> @@ -253,6 +278,9 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> case 'f':
> case 'n':
> #else
> + case ':':
> + case ',':
> + break;
> /*
> * In non-strict mode every unquoted value
> * is a primitive.
> @@ -260,6 +288,12 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> /*FALL THROUGH */
> default:
> #endif
> +
> +#ifdef JSMN_STRICT
> + if (!expecting_item)
> + return JSMN_ERROR_INVAL;
> + expecting_item = 0;
> +#endif
> r = jsmn_parse_primitive(parser, js, len, tokens,
> num_tokens);
> if (r < 0)
> @@ -282,7 +316,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
> return JSMN_ERROR_PART;
> }
>
> +#ifdef JSMN_STRICT
> + return expecting_item ? JSMN_ERROR_INVAL : JSMN_SUCCESS;
> +#else
> return JSMN_SUCCESS;
> +#endif
> }
>
> /*
> --
> 2.28.0
>