[RFC PATCH v1 03/23] objtool: Move decode_instructions() to a separate file

From: Youling Tang
Date: Tue Jun 20 2023 - 03:47:38 EST


From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx>

check.c implements static stack validation. But decode_instructions() which
resides in it can be shared with other types of validation. E.g., dynamic
FP validation. Move the function to its own file - decode.c.

Signed-off-by: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx>
---
tools/objtool/Build | 1 +
tools/objtool/check.c | 127 +-----------------------
tools/objtool/decode.c | 136 ++++++++++++++++++++++++++
tools/objtool/include/objtool/check.h | 2 +
tools/objtool/include/objtool/insn.h | 2 +
5 files changed, 142 insertions(+), 126 deletions(-)
create mode 100644 tools/objtool/decode.c

diff --git a/tools/objtool/Build b/tools/objtool/Build
index c04e36267379..64ccae49cd5f 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -7,6 +7,7 @@ objtool-y += special.o
objtool-y += builtin-check.o
objtool-y += cfi.o
objtool-y += insn.o
+objtool-y += decode.o
objtool-y += elf.o
objtool-y += objtool.o

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 619f7467e39c..26ed9b0b8f49 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -167,134 +167,9 @@ static bool dead_end_function(struct objtool_file *file, struct symbol *func)
return __dead_end_function(file, func, 0);
}

-static unsigned long nr_insns;
+unsigned long nr_insns;
static unsigned long nr_insns_visited;

-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
- struct section *sec;
- struct symbol *func;
- unsigned long offset;
- struct instruction *insn;
- int ret;
-
- for_each_sec(file, sec) {
- struct instruction *insns = NULL;
- u8 prev_len = 0;
- u8 idx = 0;
-
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
- continue;
-
- if (strcmp(sec->name, ".altinstr_replacement") &&
- strcmp(sec->name, ".altinstr_aux") &&
- strncmp(sec->name, ".discard.", 9))
- sec->text = true;
-
- if (!strcmp(sec->name, ".noinstr.text") ||
- !strcmp(sec->name, ".entry.text") ||
- !strcmp(sec->name, ".cpuidle.text") ||
- !strncmp(sec->name, ".text.__x86.", 12))
- sec->noinstr = true;
-
- /*
- * .init.text code is ran before userspace and thus doesn't
- * strictly need retpolines, except for modules which are
- * loaded late, they very much do need retpoline in their
- * .init.text
- */
- if (!strcmp(sec->name, ".init.text") && !opts.module)
- sec->init = true;
-
- for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
- if (!insns || idx == INSN_CHUNK_MAX) {
- insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
- if (!insns) {
- WARN("malloc failed");
- return -1;
- }
- idx = 0;
- } else {
- idx++;
- }
- insn = &insns[idx];
- insn->idx = idx;
-
- INIT_LIST_HEAD(&insn->call_node);
- insn->sec = sec;
- insn->offset = offset;
- insn->prev_len = prev_len;
-
- ret = arch_decode_instruction(file, sec, offset,
- sec->sh.sh_size - offset,
- insn);
- if (ret)
- return ret;
-
- prev_len = insn->len;
-
- /*
- * By default, "ud2" is a dead end unless otherwise
- * annotated, because GCC 7 inserts it for certain
- * divide-by-zero cases.
- */
- if (insn->type == INSN_BUG)
- insn->dead_end = true;
-
- hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
- nr_insns++;
- }
-
-// printf("%s: last chunk used: %d\n", sec->name, (int)idx);
-
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_NOTYPE && func->type != STT_FUNC)
- continue;
-
- if (func->offset == sec->sh.sh_size) {
- /* Heuristic: likely an "end" symbol */
- if (func->type == STT_NOTYPE)
- continue;
- WARN("%s(): STT_FUNC at end of section",
- func->name);
- return -1;
- }
-
- if (func->return_thunk || func->alias != func)
- continue;
-
- if (!find_insn(file, sec, func->offset)) {
- WARN("%s(): can't find starting instruction",
- func->name);
- return -1;
- }
-
- sym_for_each_insn(file, func, insn) {
- insn->sym = func;
- if (func->type == STT_FUNC &&
- insn->type == INSN_ENDBR &&
- list_empty(&insn->call_node)) {
- if (insn->offset == func->offset) {
- list_add_tail(&insn->call_node, &file->endbr_list);
- file->nr_endbr++;
- } else {
- file->nr_endbr_int++;
- }
- }
- }
- }
- }
-
- if (opts.stats)
- printf("nr_insns: %lu\n", nr_insns);
-
- return 0;
-}
-
/*
* Read the pv_ops[] .data table to find the static initialized values.
*/
diff --git a/tools/objtool/decode.c b/tools/objtool/decode.c
new file mode 100644
index 000000000000..f369412ffce1
--- /dev/null
+++ b/tools/objtool/decode.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
+ */
+#include <linux/objtool.h>
+
+#include <objtool/builtin.h>
+#include <objtool/check.h>
+#include <objtool/insn.h>
+#include <objtool/warn.h>
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+int decode_instructions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+ int ret;
+
+ for_each_sec(file, sec) {
+ struct instruction *insns = NULL;
+ u8 prev_len = 0;
+ u8 idx = 0;
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ if (strcmp(sec->name, ".altinstr_replacement") &&
+ strcmp(sec->name, ".altinstr_aux") &&
+ strncmp(sec->name, ".discard.", 9))
+ sec->text = true;
+
+ if (!strcmp(sec->name, ".noinstr.text") ||
+ !strcmp(sec->name, ".entry.text") ||
+ !strcmp(sec->name, ".cpuidle.text") ||
+ !strncmp(sec->name, ".text.__x86.", 12))
+ sec->noinstr = true;
+
+ /*
+ * .init.text code is ran before userspace and thus doesn't
+ * strictly need retpolines, except for modules which are
+ * loaded late, they very much do need retpoline in their
+ * .init.text
+ */
+ if (!strcmp(sec->name, ".init.text") && !opts.module)
+ sec->init = true;
+
+ for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
+ if (!insns || idx == INSN_CHUNK_MAX) {
+ insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+ if (!insns) {
+ WARN("malloc failed");
+ return -1;
+ }
+ idx = 0;
+ } else {
+ idx++;
+ }
+ insn = &insns[idx];
+ insn->idx = idx;
+
+ INIT_LIST_HEAD(&insn->call_node);
+ insn->sec = sec;
+ insn->offset = offset;
+ insn->prev_len = prev_len;
+
+ ret = arch_decode_instruction(file, sec, offset,
+ sec->sh.sh_size - offset,
+ insn);
+ if (ret)
+ return ret;
+
+ prev_len = insn->len;
+
+ /*
+ * By default, "ud2" is a dead end unless otherwise
+ * annotated, because GCC 7 inserts it for certain
+ * divide-by-zero cases.
+ */
+ if (insn->type == INSN_BUG)
+ insn->dead_end = true;
+
+ hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
+ nr_insns++;
+ }
+
+// printf("%s: last chunk used: %d\n", sec->name, (int)idx);
+
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_NOTYPE && func->type != STT_FUNC)
+ continue;
+
+ if (func->offset == sec->sh.sh_size) {
+ /* Heuristic: likely an "end" symbol */
+ if (func->type == STT_NOTYPE)
+ continue;
+ WARN("%s(): STT_FUNC at end of section",
+ func->name);
+ return -1;
+ }
+
+ if (func->return_thunk || func->alias != func)
+ continue;
+
+ if (!find_insn(file, sec, func->offset)) {
+ WARN("%s(): can't find starting instruction",
+ func->name);
+ return -1;
+ }
+
+ sym_for_each_insn(file, func, insn) {
+ insn->sym = func;
+ if (func->type == STT_FUNC &&
+ insn->type == INSN_ENDBR &&
+ list_empty(&insn->call_node)) {
+ if (insn->offset == func->offset) {
+ list_add_tail(&insn->call_node, &file->endbr_list);
+ file->nr_endbr++;
+ } else {
+ file->nr_endbr_int++;
+ }
+ }
+ }
+ }
+ }
+
+ if (opts.stats)
+ printf("nr_insns: %lu\n", nr_insns);
+
+ return 0;
+}
+
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 450ebc092b1f..34898364bf03 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -31,4 +31,6 @@ struct alt_group {
#define VISITED_BRANCH_MASK 0x03
#define VISITED_ENTRY 0x04

+extern unsigned long nr_insns;
+
#endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/insn.h b/tools/objtool/include/objtool/insn.h
index edd46b5ea1e4..92f8f1ff6c09 100644
--- a/tools/objtool/include/objtool/insn.h
+++ b/tools/objtool/include/objtool/insn.h
@@ -127,6 +127,8 @@ bool is_first_func_insn(struct objtool_file *file,
struct instruction *insn, struct symbol *sym);


+int decode_instructions(struct objtool_file *file);
+
#define for_each_insn(file, insn) \
for (struct section *__sec, *__fake = (struct section *)1; \
__fake; __fake = NULL) \
--
2.39.2