Re: [PATCH RFC 00/10] perf: user space sframe unwinding

From: Josh Poimboeuf
Date: Wed Nov 08 2023 - 19:45:51 EST


On Wed, Nov 08, 2023 at 04:41:05PM -0800, Josh Poimboeuf wrote:
> Some distros have started compiling frame pointers into all their
> packages to enable the kernel to do system-wide profiling of user space.
> Unfortunately that creates a runtime performance penalty across the
> entire system. Using DWARF (or .eh_frame) instead isn't feasible
> because of complexity and slowness.
>
> For in-kernel unwinding we solved this problem with the creation of the
> ORC unwinder for x86_64. Similarly, for user space the GNU assembler
> has created the SFrame ("Simple Frame") format starting with binutils
> 2.40.
>
> These patches add support for unwinding user space from the kernel using
> SFrame with perf. It should be easy to add user unwinding support for
> other components like ftrace.
>
> I tested it on Gentoo by recompiling everything with -Wa,-gsframe and
> using a custom glibc patch (which I'll send in a reply to this email).

Here's my glibc patch:

diff --git a/elf/dl-load.c b/elf/dl-load.c
index 2923b1141d..333d7c39fd 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -29,6 +29,7 @@
#include <bits/wordsize.h>
#include <sys/mman.h>
#include <sys/param.h>
+#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <gnu/lib-names.h>
@@ -88,6 +89,10 @@ struct filebuf

#define STRING(x) __STRING (x)

+#ifndef PT_GNU_SFRAME
+#define PT_GNU_SFRAME 0x6474e554
+#endif
+

int __stack_prot attribute_hidden attribute_relro
#if _STACK_GROWS_DOWN && defined PROT_GROWSDOWN
@@ -1213,6 +1218,10 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
l->l_relro_addr = ph->p_vaddr;
l->l_relro_size = ph->p_memsz;
break;
+
+ case PT_GNU_SFRAME:
+ l->l_sframe_addr = ph->p_vaddr;
+ break;
}

if (__glibc_unlikely (nloadcmds == 0))
@@ -1263,6 +1272,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
l->l_map_start = l->l_map_end = 0;
goto lose;
}
+
+
}

if (l->l_ld != 0)
@@ -1376,6 +1387,13 @@ cannot enable executable stack as shared object requires");
break;
}

+#define PR_ADD_SFRAME 71
+ if (l->l_sframe_addr != 0)
+ {
+ l->l_sframe_addr += l->l_addr;
+ __prctl(PR_ADD_SFRAME, l->l_sframe_addr, NULL, NULL, NULL);
+ }
+
/* We are done mapping in the file. We no longer need the descriptor. */
if (__glibc_unlikely (__close_nocancel (fd) != 0))
{
diff --git a/include/link.h b/include/link.h
index c6af095d87..36ac75680f 100644
--- a/include/link.h
+++ b/include/link.h
@@ -348,6 +348,8 @@ struct link_map
ElfW(Addr) l_relro_addr;
size_t l_relro_size;

+ ElfW(Addr) l_sframe_addr;
+
unsigned long long int l_serial;
};