[RFC patch] checkpatch: test identifier lengths

From: Joe Perches
Date: Fri Feb 16 2018 - 12:13:37 EST


On Fri, 2018-02-16 at 15:55 +0300, Dan Carpenter wrote:
> On Fri, Feb 16, 2018 at 05:06:34PM +0530, Yash Omer wrote:
> > This patch fix line should not end with open parenthesis found by checkpatch.plscript.
> >
> > Signed-off-by: Yash Omer <yashomer0007@xxxxxxxxx>
> > ---
> > drivers/staging/nvec/nvec.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/staging/nvec/nvec.c b/drivers/staging/nvec/nvec.c
> > index 52054a528723..39fb737543b5 100644
> > --- a/drivers/staging/nvec/nvec.c
> > +++ b/drivers/staging/nvec/nvec.c
> > @@ -383,8 +383,8 @@ static void nvec_request_master(struct work_struct *work)
> > msg = list_first_entry(&nvec->tx_data, struct nvec_msg, node);
> > spin_unlock_irqrestore(&nvec->tx_lock, flags);
> > nvec_gpio_set_value(nvec, 0);
> > - err = wait_for_completion_interruptible_timeout(
> > - &nvec->ec_transfer, msecs_to_jiffies(5000));
> > + err = wait_for_completion_interruptible_timeout
> > + (&nvec->ec_transfer, msecs_to_jiffies(5000));
>
> The original code is basically fine... It's OK to ignore checkpatch in
> this situation.

Right.

Please remember checkpatch is basically stupid as it is
a trivial pattern matcher.

It is very hard to use 80 column line lengths with very
long identifiers.

Any time identifiers are very long (here 40+ characters),
checkpatch long line and line format messages should be
taken with extremely large grains of salt.

Dan/Andrew, what do you think of adding some --strict only
'very_long_identifier' message to checkpatch?

Something like the below: (perhaps using 25 chars lengths?)

This also excludes any long identifier found in include/...
like the camelcase tests.

---
scripts/checkpatch.pl | 102 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3d4040322ae1..3659ed0988c4 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -59,6 +59,7 @@ my $conststructsfile = "$D/const_structs.checkpatch";
my $typedefsfile = "";
my $color = "auto";
my $allow_c99_comments = 1;
+my $long_identifier_len = 25;

sub help {
my ($exitcode) = @_;
@@ -541,12 +542,13 @@ our @typeListWithAttr = (
qr{struct\s+$InitAttribute\s+$Ident},
qr{union\s+$InitAttribute\s+$Ident},
);
-
our @modifierList = (
qr{fastcall},
);
our @modifierListFile = ();

+our %long_identifiers = ();
+
our @mode_permission_funcs = (
["module_param", 3],
["module_param_(?:array|named|string)", 4],
@@ -834,6 +836,29 @@ sub seed_camelcase_file {
}
}

+sub seed_long_identifier_file {
+ my ($file) = @_;
+
+ return if (!(-f $file));
+
+ local $/;
+
+ open(my $include_file, '<', "$file")
+ or warn "$P: Can't read '$file' $!\n";
+ my $text = <$include_file>;
+ close($include_file);
+
+ my @lines = split('\n', $text);
+
+ foreach my $line (@lines) {
+ while ($line =~ /($Ident)/g) {
+ my $ident = $1;
+ next if (length($ident) <= $long_identifier_len);
+ $long_identifiers{$ident} = 1;
+ }
+ }
+}
+
sub is_maintained_obsolete {
my ($filename) = @_;

@@ -902,6 +927,64 @@ sub seed_camelcase_includes {
}
}

+my $long_identifiers_seeded = 0;
+sub seed_long_identifiers {
+ return if ($long_identifiers_seeded);
+
+ my $files;
+ my $long_identifier_cache = "";
+ my @include_files = ();
+
+ $long_identifiers_seeded = 1;
+
+ if (-e ".git") {
+ my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
+ chomp $git_last_include_commit;
+ $long_identifier_cache = ".checkpatch-long_identifier.git.$git_last_include_commit";
+ } else {
+ my $last_mod_date = 0;
+ $files = `find $root/include -name "*.h"`;
+ @include_files = split('\n', $files);
+ foreach my $file (@include_files) {
+ my $date = POSIX::strftime("%Y%m%d%H%M",
+ localtime((stat $file)[9]));
+ $last_mod_date = $date if ($last_mod_date < $date);
+ }
+ $long_identifier_cache = ".checkpatch-long_identifier.date.$last_mod_date";
+ }
+
+ if ($long_identifier_cache ne "" && -f $long_identifier_cache) {
+ open(my $long_identifier_file, '<', "$long_identifier_cache")
+ or warn "$P: Can't read '$long_identifier_cache' $!\n";
+ while (<$long_identifier_file>) {
+ chomp;
+ $long_identifiers{$_} = 1;
+ }
+ close($long_identifier_file);
+
+ return;
+ }
+
+ if (-e ".git") {
+ $files = `git ls-files "include/*.h"`;
+ @include_files = split('\n', $files);
+ }
+
+ foreach my $file (@include_files) {
+ seed_long_identifier_file($file);
+ }
+
+ if ($long_identifier_cache ne "") {
+ unlink glob ".checkpatch-long_identifier.*";
+ open(my $long_identifier_file, '>', "$long_identifier_cache")
+ or warn "$P: Can't write '$long_identifier_cache' $!\n";
+ foreach (sort { lc($a) cmp lc($b) } keys(%long_identifiers)) {
+ print $long_identifier_file ("$_\n");
+ }
+ close($long_identifier_file);
+ }
+}
+
sub git_commit_info {
my ($commit, $id, $desc) = @_;

@@ -1017,6 +1100,7 @@ for my $filename (@ARGV) {
@modifierListFile = ();
@typeListFile = ();
build_types();
+ seed_long_identifiers();
}

if (!$quiet) {
@@ -2256,6 +2340,7 @@ sub process {
my $setup_docs = 0;

my $camelcase_file_seeded = 0;
+ my $long_identifier_file_seeded = 0;

sanitise_line_reset();
my $line;
@@ -3554,6 +3639,21 @@ sub process {
#ignore lines not being added
next if ($line =~ /^[^\+]/);

+# check for new long identifiers
+ while ($sline =~ /($Ident)/g) {
+ my $ident = $1;
+ if ($check && show_type("LONG_IDENTIFIER") &&
+ $ident !~ /^X+$/ && #not a string
+ length($ident) > $long_identifier_len) {
+ seed_long_identifiers();
+ if (!defined($long_identifiers{$ident})) {
+ CHK("LONG_IDENTIFIER",
+ "long identifier '$ident' - consider shortening\n" . $herecurr);
+ $long_identifiers{$ident} = 1;
+ }
+ }
+ }
+
# check for dereferences that span multiple lines
if ($prevline =~ /^\+.*$Lval\s*(?:\.|->)\s*$/ &&
$line =~ /^\+\s*(?!\#\s*(?!define\s+|if))\s*$Lval/) {