[PATCH] get_maintainer: add --keywords-in-file option

From: Joe Perches
Date: Thu Oct 05 2023 - 17:35:29 EST


There were some recent attempts [1] [2] to make the K: field less noisy
and its behavior more obvious. Ultimately, a shift in the default
behavior and an associated command line flag is the best choice.

Currently, K: will match keywords found in both patches and files.

Matching content from entire files is (while documented) not obvious
behavior and is usually not wanted by maintainers.

Now only patch content will be matched against unless --keywords-in-file
is also provided as an argument to get_maintainer.

Add the actual keyword matched to the role or rolestats as well.

For instance given the diff below that removes clang:

diff --git a/drivers/hid/bpf/entrypoints/README b/drivers/hid/bpf/entrypoints/README
index 147e0d41509f..f88eb19e8ef2 100644
--- a/drivers/hid/bpf/entrypoints/README
+++ b/drivers/hid/bpf/entrypoints/README
@@ -1,4 +1,4 @@
WARNING:
If you change "entrypoints.bpf.c" do "make -j" in this directory to rebuild "entrypoints.skel.h".
-Make sure to have clang 10 installed.
+Make sure to have 10 installed.
See Documentation/bpf/bpf_devel_QA.rst

The new role/rolestats output includes ":Keyword:\b(?i:clang|llvm)\b"

$ git diff drivers/hid/bpf/entrypoints/README | .scripts/get_maintainer.pl
Jiri Kosina <jikos@xxxxxxxxxx> (maintainer:HID CORE LAYER,commit_signer:1/1=100%)
Benjamin Tissoires <benjamin.tissoires@xxxxxxxxxx> (maintainer:HID CORE LAYER,commit_signer:1/1=100%,authored:1/1=100%,added_lines:4/4=100%)
Nathan Chancellor <nathan@xxxxxxxxxx> (supporter:CLANG/LLVM BUILD SUPPORT:Keyword:\b(?i:clang|llvm)\b)
Nick Desaulniers <ndesaulniers@xxxxxxxxxx> (supporter:CLANG/LLVM BUILD SUPPORT:Keyword:\b(?i:clang|llvm)\b)
Tom Rix <trix@xxxxxxxxxx> (reviewer:CLANG/LLVM BUILD SUPPORT:Keyword:\b(?i:clang|llvm)\b)
Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> (commit_signer:1/1=100%)
linux-input@xxxxxxxxxxxxxxx (open list:HID CORE LAYER)
linux-kernel@xxxxxxxxxxxxxxx (open list)
llvm@xxxxxxxxxxxxxxx (open list:CLANG/LLVM BUILD SUPPORT:Keyword:\b(?i:clang|llvm)\b)

Link: https://lore.kernel.org/r/20231004-get_maintainer_change_k-v1-1-ac7ced18306a@xxxxxxxxxx
Link: https://lore.kernel.org/all/20230928-get_maintainer_add_d-v2-0-8acb3f394571@xxxxxxxxxx
Link: https://lore.kernel.org/all/3dca40b677dd2fef979a5a581a2db91df2c21801.camel@xxxxxxxxxxx
Original-patch-by: Justin Stitt <justinstitt@xxxxxxxxxx>
Signed-off-by: Joe Perches <joe@xxxxxxxxxxx>
---
scripts/get_maintainer.pl | 38 ++++++++++++++++++++------------------
1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index ab123b498fd9..16d8ac6005b6 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -57,6 +57,7 @@ my $subsystem = 0;
my $status = 0;
my $letters = "";
my $keywords = 1;
+my $keywords_in_file = 0;
my $sections = 0;
my $email_file_emails = 0;
my $from_filename = 0;
@@ -272,6 +273,7 @@ if (!GetOptions(
'letters=s' => \$letters,
'pattern-depth=i' => \$pattern_depth,
'k|keywords!' => \$keywords,
+ 'kf|keywords-in-file!' => \$keywords_in_file,
'sections!' => \$sections,
'fe|file-emails!' => \$email_file_emails,
'f|file' => \$from_filename,
@@ -318,6 +320,7 @@ if ($sections || $letters ne "") {
$subsystem = 0;
$web = 0;
$keywords = 0;
+ $keywords_in_file = 0;
$interactive = 0;
} else {
my $selections = $email + $scm + $status + $subsystem + $web;
@@ -548,16 +551,14 @@ foreach my $file (@ARGV) {
$file =~ s/^\Q${cur_path}\E//; #strip any absolute path
$file =~ s/^\Q${lk_path}\E//; #or the path to the lk tree
push(@files, $file);
- if ($file ne "MAINTAINERS" && -f $file && $keywords) {
+ if ($file ne "MAINTAINERS" && -f $file && $keywords && $keywords_in_file) {
open(my $f, '<', $file)
or die "$P: Can't open $file: $!\n";
my $text = do { local($/) ; <$f> };
close($f);
- if ($keywords) {
- foreach my $line (keys %keyword_hash) {
- if ($text =~ m/$keyword_hash{$line}/x) {
- push(@keyword_tvi, $line);
- }
+ foreach my $line (keys %keyword_hash) {
+ if ($text =~ m/$keyword_hash{$line}/x) {
+ push(@keyword_tvi, $line);
}
}
}
@@ -919,7 +920,7 @@ sub get_maintainers {
}

foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
- add_categories($line);
+ add_categories($line, "");
if ($sections) {
my $i;
my $start = find_starting_index($line);
@@ -947,7 +948,7 @@ sub get_maintainers {
if ($keywords) {
@keyword_tvi = sort_and_uniq(@keyword_tvi);
foreach my $line (@keyword_tvi) {
- add_categories($line);
+ add_categories($line, ":Keyword:$keyword_hash{$line}");
}
}

@@ -1076,6 +1077,7 @@ Output type options:
Other options:
--pattern-depth => Number of pattern directory traversals (default: 0 (all))
--keywords => scan patch for keywords (default: $keywords)
+ --keywords-in-file => scan file for keywords (default: $keywords_in_file)
--sections => print all of the subsystem sections with pattern matches
--letters => print all matching 'letter' types from all matching sections
--mailmap => use .mailmap file (default: $email_use_mailmap)
@@ -1086,7 +1088,7 @@ Other options:

Default options:
[--email --tree --nogit --git-fallback --m --r --n --l --multiline
- --pattern-depth=0 --remove-duplicates --rolestats]
+ --pattern-depth=0 --remove-duplicates --rolestats --keywords]

Notes:
Using "-f directory" may give unexpected results:
@@ -1312,7 +1314,7 @@ sub get_list_role {
}

sub add_categories {
- my ($index) = @_;
+ my ($index, $suffix) = @_;

my $i;
my $start = find_starting_index($index);
@@ -1342,7 +1344,7 @@ sub add_categories {
if (!$hash_list_to{lc($list_address)}) {
$hash_list_to{lc($list_address)} = 1;
push(@list_to, [$list_address,
- "subscriber list${list_role}"]);
+ "subscriber list${list_role}" . $suffix]);
}
}
} else {
@@ -1352,12 +1354,12 @@ sub add_categories {
if ($email_moderated_list) {
$hash_list_to{lc($list_address)} = 1;
push(@list_to, [$list_address,
- "moderated list${list_role}"]);
+ "moderated list${list_role}" . $suffix]);
}
} else {
$hash_list_to{lc($list_address)} = 1;
push(@list_to, [$list_address,
- "open list${list_role}"]);
+ "open list${list_role}" . $suffix]);
}
}
}
@@ -1365,19 +1367,19 @@ sub add_categories {
} elsif ($ptype eq "M") {
if ($email_maintainer) {
my $role = get_maintainer_role($i);
- push_email_addresses($pvalue, $role);
+ push_email_addresses($pvalue, $role . $suffix);
}
} elsif ($ptype eq "R") {
if ($email_reviewer) {
my $subsystem = get_subsystem_name($i);
- push_email_addresses($pvalue, "reviewer:$subsystem");
+ push_email_addresses($pvalue, "reviewer:$subsystem" . $suffix);
}
} elsif ($ptype eq "T") {
- push(@scm, $pvalue);
+ push(@scm, $pvalue . $suffix);
} elsif ($ptype eq "W") {
- push(@web, $pvalue);
+ push(@web, $pvalue . $suffix);
} elsif ($ptype eq "S") {
- push(@status, $pvalue);
+ push(@status, $pvalue . $suffix);
}
}
}