Re: [PATCH] Fix invalid escape sequence warnings

From: Andrii Nakryiko
Date: Tue Aug 22 2023 - 19:31:12 EST


On Wed, Aug 16, 2023 at 5:22 AM Vishal Chourasia <vishalc@xxxxxxxxxxxxx> wrote:
>
> The Python script `bpf_doc.py` uses regular expressions with
> backslashes in string literals, which results in SyntaxWarnings
> during its execution.
>
> This patch addresses these warnings by converting relevant string
> literals to raw strings, which interpret backslashes as literal
> characters. This ensures that the regular expressions are parsed
> correctly without causing any warnings.
>
> Signed-off-by: Vishal Chourasia <vishalc@xxxxxxxxxxxxx>
> Reported-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
>
> ---
> scripts/bpf_doc.py | 34 +++++++++++++++++-----------------
> 1 file changed, 17 insertions(+), 17 deletions(-)
>
> diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
> index eaae2ce78381..dfd819c952b2 100755
> --- a/scripts/bpf_doc.py
> +++ b/scripts/bpf_doc.py
> @@ -59,9 +59,9 @@ class Helper(APIElement):
> Break down helper function protocol into smaller chunks: return type,
> name, distincts arguments.
> """
> - arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
> + arg_re = re.compile(r'((\w+ )*?(\w+|...))( (\**)(\w+))?$')
> res = {}
> - proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
> + proto_re = re.compile(r'(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
>
> capture = proto_re.match(self.proto)
> res['ret_type'] = capture.group(1)
> @@ -114,11 +114,11 @@ class HeaderParser(object):
> return Helper(proto=proto, desc=desc, ret=ret)
>
> def parse_symbol(self):
> - p = re.compile(' \* ?(BPF\w+)$')
> + p = re.compile(r' \* ?(BPF\w+)$')
> capture = p.match(self.line)
> if not capture:
> raise NoSyscallCommandFound
> - end_re = re.compile(' \* ?NOTES$')
> + end_re = re.compile(r' \* ?NOTES$')
> end = end_re.match(self.line)
> if end:
> raise NoSyscallCommandFound
> @@ -133,7 +133,7 @@ class HeaderParser(object):
> # - Same as above, with "const" and/or "struct" in front of type
> # - "..." (undefined number of arguments, for bpf_trace_printk())
> # There is at least one term ("void"), and at most five arguments.
> - p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
> + p = re.compile(r' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
> capture = p.match(self.line)
> if not capture:
> raise NoHelperFound
> @@ -141,7 +141,7 @@ class HeaderParser(object):
> return capture.group(1)
>
> def parse_desc(self, proto):
> - p = re.compile(' \* ?(?:\t| {5,8})Description$')
> + p = re.compile(r' \* ?(?:\t| {5,8})Description$')
> capture = p.match(self.line)
> if not capture:
> raise Exception("No description section found for " + proto)
> @@ -154,7 +154,7 @@ class HeaderParser(object):
> if self.line == ' *\n':
> desc += '\n'
> else:
> - p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
> + p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
> capture = p.match(self.line)
> if capture:
> desc_present = True
> @@ -167,7 +167,7 @@ class HeaderParser(object):
> return desc
>
> def parse_ret(self, proto):
> - p = re.compile(' \* ?(?:\t| {5,8})Return$')
> + p = re.compile(r' \* ?(?:\t| {5,8})Return$')
> capture = p.match(self.line)
> if not capture:
> raise Exception("No return section found for " + proto)
> @@ -180,7 +180,7 @@ class HeaderParser(object):
> if self.line == ' *\n':
> ret += '\n'
> else:
> - p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
> + p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
> capture = p.match(self.line)
> if capture:
> ret_present = True
> @@ -219,12 +219,12 @@ class HeaderParser(object):
> self.seek_to('enum bpf_cmd {',
> 'Could not find start of bpf_cmd enum', 0)
> # Searches for either one or more BPF\w+ enums
> - bpf_p = re.compile('\s*(BPF\w+)+')
> + bpf_p = re.compile(r'\s*(BPF\w+)+')
> # Searches for an enum entry assigned to another entry,
> # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
> # not documented hence should be skipped in check to
> # determine if the right number of syscalls are documented
> - assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
> + assign_p = re.compile(r'\s*(BPF\w+)\s*=\s*(BPF\w+)')
> bpf_cmd_str = ''
> while True:
> capture = assign_p.match(self.line)
> @@ -239,7 +239,7 @@ class HeaderParser(object):
> break
> self.line = self.reader.readline()
> # Find the number of occurences of BPF\w+
> - self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
> + self.enum_syscalls = re.findall(r'(BPF\w+)+', bpf_cmd_str)
>
> def parse_desc_helpers(self):
> self.seek_to(helpersDocStart,
> @@ -263,7 +263,7 @@ class HeaderParser(object):
> self.seek_to('#define ___BPF_FUNC_MAPPER(FN, ctx...)',
> 'Could not find start of eBPF helper definition list')
> # Searches for one FN(\w+) define or a backslash for newline
> - p = re.compile('\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
> + p = re.compile(r'\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
> fn_defines_str = ''
> i = 0
> while True:
> @@ -278,7 +278,7 @@ class HeaderParser(object):
> break
> self.line = self.reader.readline()
> # Find the number of occurences of FN(\w+)
> - self.define_unique_helpers = re.findall('FN\(\w+, \d+, ##ctx\)', fn_defines_str)
> + self.define_unique_helpers = re.findall(r'FN\(\w+, \d+, ##ctx\)', fn_defines_str)
>
> def validate_helpers(self):
> last_helper = ''
> @@ -425,7 +425,7 @@ class PrinterRST(Printer):
> try:
> cmd = ['git', 'log', '-1', '--pretty=format:%cs', '--no-patch',
> '-L',
> - '/{}/,/\*\//:include/uapi/linux/bpf.h'.format(delimiter)]
> + r'/{}/,/\*\//:include/uapi/linux/bpf.h'.format(delimiter)]

this one is not a regex, do we still need to change it?

> date = subprocess.run(cmd, cwd=linuxRoot,
> capture_output=True, check=True)
> return date.stdout.decode().rstrip()
> @@ -496,7 +496,7 @@ HELPERS
> date=lastUpdate))
>
> def print_footer(self):
> - footer = '''
> + footer = r'''

same here, not a regex string

> EXAMPLES
> ========
>
> @@ -598,7 +598,7 @@ SEE ALSO
> one_arg = '{}{}'.format(comma, a['type'])
> if a['name']:
> if a['star']:
> - one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
> + one_arg += r' {}**\ '.format(a['star'].replace('*', '\\*'))

and this one as well?

> else:
> one_arg += '** '
> one_arg += '*{}*\\ **'.format(a['name'])
> --
> 2.41.0
>