tracing: gcc for x86 calling mcount with -fomit-frame-pointer

From: John Reiser
Date: Thu Dec 17 2009 - 12:22:05 EST


On x86 and x86_64, current "gcc -pg -fomit-frame-pointer" is not allowed.
This experimental patch against:
http://mirrors.kernel.org/fedora/releases/12/Fedora/source/SRPMS/gcc-4.4.2-7.fc12.src.rpm
allows such a combination, via the command line options:
gcc --profile-before-prolog -fomit-frame-pointer
This turns on profiling (as if -pg), moves the "call mcount" to be the
very first instruction of a profiled routine, and omits the frame pointer
(unless some condition other than profiling requires a frame pointer.)
Placing the "call mcount" first, before any other code, has some advantages.
For instance, a postprocessor easily can modify a CALL whose destination
is known, to skip past the "call mcount" at the entry point.

The current glibc implementation of mcount relies on a frame pointer.
At least one recent change to Linux kernel traceback for tracing also relies
on a frame pointer. So still there are conflicts, but they are different.

diff --git a/gcc/c-opts.c b/gcc/c-opts.c
index 28bdc31..aa8df9e 100644
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -746,6 +746,10 @@ c_common_handle_option (size_t scode, const char *arg, int value)
cpp_opts->preprocessed = value;
break;

+ case OPT_fprofile_before_prolog:
+ flag_profile_before_prolog = value;
+ break;
+
case OPT_freplace_objc_classes:
flag_replace_objc_classes = value;
break;
diff --git a/gcc/c.opt b/gcc/c.opt
index 711710b..63cd8b6 100644
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -720,6 +720,10 @@ fpreprocessed
C ObjC C++ ObjC++
Treat the input file as already preprocessed

+fprofile-before-prolog
+C ObjC C++ ObjC++
+Generate profiling code before the function prolog
+
freplace-objc-classes
ObjC ObjC++
Used in Fix-and-Continue mode to indicate that object files may be swapped in at runtime
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c7a36f4..80bac96 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7496,7 +7496,7 @@ ix86_frame_pointer_required (void)
|| ix86_current_function_calls_tls_descriptor))
return 1;

- if (crtl->profile)
+ if (crtl->profile && !crtl->profile_before_prolog)
return 1;

return 0;
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 39b8746..11008e8 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -55,7 +55,7 @@ along with GCC; see the file COPYING3. If not see
frame, so we cannot allow profiling without a frame pointer. */

#undef SUBTARGET_FRAME_POINTER_REQUIRED
-#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+#define SUBTARGET_FRAME_POINTER_REQUIRED (crtl->profile && !crtl->profile_before_prolog)

#undef SIZE_TYPE
#define SIZE_TYPE "unsigned int"
diff --git a/gcc/final.c b/gcc/final.c
index 5d717e1..8479245 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1512,12 +1512,8 @@ final_start_function (rtx first ATTRIBUTE_UNUSED, FILE *file,
leaf_renumber_regs (first);
#endif

- /* The Sun386i and perhaps other machines don't work right
- if the profiling code comes after the prologue. */
-#ifdef PROFILE_BEFORE_PROLOGUE
- if (crtl->profile)
+ if (crtl->profile && crtl->profile_before_prolog)
profile_function (file);
-#endif /* PROFILE_BEFORE_PROLOGUE */

#if defined (DWARF2_UNWIND_INFO) && defined (HAVE_prologue)
if (dwarf2out_do_frame ())
@@ -1559,10 +1555,8 @@ final_start_function (rtx first ATTRIBUTE_UNUSED, FILE *file,
static void
profile_after_prologue (FILE *file ATTRIBUTE_UNUSED)
{
-#ifndef PROFILE_BEFORE_PROLOGUE
- if (crtl->profile)
+ if (crtl->profile && !crtl->profile_before_prolog)
profile_function (file);
-#endif /* not PROFILE_BEFORE_PROLOGUE */
}

static void
@@ -3897,7 +3891,7 @@ leaf_function_p (void)
rtx insn;
rtx link;

- if (crtl->profile || profile_arc_flag)
+ if ((crtl->profile && !crtl->profile_before_prolog) || profile_arc_flag)
return 0;

for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
diff --git a/gcc/flags.h b/gcc/flags.h
index e406bf1..9dbc63f 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -189,6 +189,9 @@ extern int flag_permissive;
/* Nonzero if we are compiling code for a shared library, zero for
executable. */

+/* Nonzero means generate profiling code before the function prolog. */
+extern int flag_profile_before_prolog;
+
extern int flag_shlib;

/* -dA causes debug information to be produced in
diff --git a/gcc/function.c b/gcc/function.c
index 2eb3d3b..08ed9a0 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -4362,9 +4362,13 @@ expand_function_start (tree subr)
valid operands of arithmetic insns. */
init_recog_no_volatile ();

+ /* flag_profile_before_prolog: also set ->profile as temporary hack
+ to evade conflict between -pg and -fomit-frame-pointer. */
crtl->profile
- = (profile_flag
+ = ((profile_flag || flag_profile_before_prolog)
&& ! DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (subr));
+ crtl->profile_before_prolog
+ = (crtl->profile && flag_profile_before_prolog);

crtl->limit_stack
= (stack_limit_rtx != NULL_RTX && ! DECL_NO_LIMIT_STACK (subr));
@@ -5016,13 +5020,11 @@ thread_prologue_and_epilogue_insns (void)
record_insns (seq, NULL, &prologue_insn_hash);
emit_note (NOTE_INSN_PROLOGUE_END);
-#ifndef PROFILE_BEFORE_PROLOGUE
/* Ensure that instructions are not moved into the prologue when
profiling is on. The call to the profiling routine can be
emitted within the live range of a call-clobbered register. */
- if (crtl->profile)
+ if (crtl->profile && !crtl->profile_before_prolog)
emit_insn (gen_blockage ());
-#endif

seq = get_insns ();
end_sequence ();
diff --git a/gcc/function.h b/gcc/function.h
index 5658e9e..9c97aeb 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -396,6 +396,9 @@ struct rtl_data GTY(())
/* Nonzero if profiling code should be generated. */
bool profile;

+ /* Nonzero if profiling code should be generated before prolog. */
+ bool profile_before_prolog;
+
/* Nonzero if the current function uses the constant pool. */
bool uses_const_pool;

diff --git a/gcc/toplev.c b/gcc/toplev.c
index b379f8e..e7300ce 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -290,6 +290,9 @@ int flag_pedantic_errors = 0;

int flag_permissive = 0;

+/* Nonzero means generate profiling code before the function prolog. */
+int flag_profile_before_prolog = 0;
+
/* -dA causes debug commentary information to be produced in
the generated assembly code (to make it more readable). This option
is generally only of use to those who actually need to read the
@@ -1687,6 +1690,11 @@ process_options (void)
if (warn_unused_value == -1)
warn_unused_value = warn_unused;

+#ifdef PROFILE_BEFORE_PROLOG
+ /* Forced on some architectures. */
+ flag_profile_before_prolog = 1;
+#endif
+
/* Allow the front end to perform consistency checks and do further
initialization based on the command line options. This hook also
sets the original filename if appropriate (e.g. foo.i -> foo.c)

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/