[PATCH 4/4] oprofile update: kernel/user addresses fix

From: John Levon (levon@movementarian.org)
Date: Tue Feb 11 2003 - 06:45:37 EST


This patch replaces the assumption that > PAGE_OFFSET == kernel address
with testing for user_mode(regs) and inserting switch codes instead.

diff -Naur -X dontdiff linux/arch/i386/oprofile/op_model_athlon.c linux2/arch/i386/oprofile/op_model_athlon.c
--- linux/arch/i386/oprofile/op_model_athlon.c 2003-01-03 03:15:26.000000000 +0000
+++ linux2/arch/i386/oprofile/op_model_athlon.c 2003-01-23 20:24:53.000000000 +0000
@@ -96,10 +96,13 @@
 {
         unsigned int low, high;
         int i;
+ unsigned long eip = instruction_pointer(regs);
+ int is_kernel = !user_mode(regs);
+
         for (i = 0 ; i < NUM_COUNTERS; ++i) {
                 CTR_READ(low, high, msrs, i);
                 if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(instruction_pointer(regs), i, cpu);
+ oprofile_add_sample(eip, is_kernel, i, cpu);
                         CTR_WRITE(reset_value[i], msrs, i);
                         return 1;
                 }
diff -Naur -X dontdiff linux/arch/i386/oprofile/op_model_p4.c linux2/arch/i386/oprofile/op_model_p4.c
--- linux/arch/i386/oprofile/op_model_p4.c 2003-01-15 19:26:32.000000000 +0000
+++ linux2/arch/i386/oprofile/op_model_p4.c 2003-01-31 03:57:28.000000000 +0000
@@ -569,6 +569,8 @@
 {
         unsigned long ctr, low, high, stag, real;
         int i;
+ unsigned long eip = instruction_pointer(regs);
+ int is_kernel = !user_mode(regs);
 
         stag = get_stagger();
 
@@ -599,7 +601,7 @@
                 CCCR_READ(low, high, real);
                  CTR_READ(ctr, high, real);
                 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
- oprofile_add_sample(regs->eip, i, cpu);
+ oprofile_add_sample(eip, is_kernel, i, cpu);
                          CTR_WRITE(reset_value[i], real);
                         CCCR_CLEAR_OVF(low);
                         CCCR_WRITE(low, high, real);
@@ -624,7 +626,8 @@
         stag = get_stagger();
 
         for (i = 0; i < num_counters; ++i) {
- if (!reset_value[i]) continue;
+ if (!reset_value[i])
+ continue;
                 CCCR_READ(low, high, VIRT_CTR(stag, i));
                 CCCR_SET_ENABLE(low);
                 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
diff -Naur -X dontdiff linux/arch/i386/oprofile/op_model_ppro.c linux2/arch/i386/oprofile/op_model_ppro.c
--- linux/arch/i386/oprofile/op_model_ppro.c 2003-01-03 03:15:26.000000000 +0000
+++ linux2/arch/i386/oprofile/op_model_ppro.c 2003-01-23 20:24:53.000000000 +0000
@@ -90,11 +90,13 @@
 {
         unsigned int low, high;
         int i;
+ unsigned long eip = instruction_pointer(regs);
+ int is_kernel = !user_mode(regs);
  
         for (i = 0 ; i < NUM_COUNTERS; ++i) {
                 CTR_READ(low, high, msrs, i);
                 if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(instruction_pointer(regs), i, cpu);
+ oprofile_add_sample(eip, is_kernel, i, cpu);
                         CTR_WRITE(reset_value[i], msrs, i);
                         return 1;
                 }
diff -Naur -X dontdiff linux/arch/i386/oprofile/timer_int.c linux2/arch/i386/oprofile/timer_int.c
--- linux/arch/i386/oprofile/timer_int.c 2003-02-10 19:40:25.000000000 +0000
+++ linux2/arch/i386/oprofile/timer_int.c 2003-01-23 20:24:53.000000000 +0000
@@ -20,8 +20,9 @@
 {
         struct pt_regs * regs = (struct pt_regs *)data;
         int cpu = smp_processor_id();
+ unsigned long eip = instruction_pointer(regs);
  
- oprofile_add_sample(instruction_pointer(regs), 0, cpu);
+ oprofile_add_sample(eip, !user_mode(regs), 0, cpu);
         return 0;
 }
  
diff -Naur -X dontdiff linux/arch/parisc/oprofile/timer_int.c linux2/arch/parisc/oprofile/timer_int.c
--- linux/arch/parisc/oprofile/timer_int.c 2003-02-11 10:53:30.000000000 +0000
+++ linux2/arch/parisc/oprofile/timer_int.c 2003-01-23 20:30:56.000000000 +0000
@@ -19,8 +19,10 @@
 {
         struct pt_regs * regs = (struct pt_regs *)data;
         int cpu = smp_processor_id();
+ unsigned long pc = regs->iaoq[0];
+ int is_kernel = !user_mode(regs);
  
- oprofile_add_sample(regs->iaoq[0], 0, cpu);
+ oprofile_add_sample(pc, is_kernel, 0, cpu);
         return 0;
 }
  
diff -Naur -X dontdiff linux/arch/ppc64/oprofile/timer_int.c linux2/arch/ppc64/oprofile/timer_int.c
--- linux/arch/ppc64/oprofile/timer_int.c 2003-02-10 19:40:25.000000000 +0000
+++ linux2/arch/ppc64/oprofile/timer_int.c 2003-01-23 20:28:39.000000000 +0000
@@ -19,8 +19,10 @@
 {
         struct pt_regs * regs = (struct pt_regs *)data;
         int cpu = smp_processor_id();
+ unsigned long pc = instruction_pointer(regs);
+ int is_kernel = !user_mode(regs);
  
- oprofile_add_sample(instruction_pointer(regs), 0, cpu);
+ oprofile_add_sample(pc, is_kernel, 0, cpu);
         return 0;
 }
  
diff -Naur -X dontdiff linux/arch/sparc64/oprofile/timer_int.c linux2/arch/sparc64/oprofile/timer_int.c
--- linux/arch/sparc64/oprofile/timer_int.c 2003-02-10 19:40:25.000000000 +0000
+++ linux2/arch/sparc64/oprofile/timer_int.c 2003-01-23 20:27:54.000000000 +0000
@@ -19,8 +19,10 @@
 {
         struct pt_regs * regs = (struct pt_regs *)data;
         int cpu = smp_processor_id();
+ unsigned long pc = instruction_pointer(regs);
+ int is_kernel = !user_mode(regs);
  
- oprofile_add_sample(instruction_pointer(regs), 0, cpu);
+ oprofile_add_sample(pc, is_kernel, 0, cpu);
         return 0;
 }
  
diff -Naur -X dontdiff linux/drivers/oprofile/buffer_sync.c linux2/drivers/oprofile/buffer_sync.c
--- linux/drivers/oprofile/buffer_sync.c 2003-01-11 20:04:17.000000000 +0000
+++ linux2/drivers/oprofile/buffer_sync.c 2003-02-10 19:47:30.000000000 +0000
@@ -199,8 +199,16 @@
         last_cookie = ~0UL;
 }
 
+static void add_kernel_ctx_switch(unsigned int in_kernel)
+{
+ add_event_entry(ESCAPE_CODE);
+ if (in_kernel)
+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+ else
+ add_event_entry(KERNEL_EXIT_SWITCH_CODE);
+}
  
-static void add_ctx_switch(pid_t pid, unsigned long cookie)
+static void add_user_ctx_switch(pid_t pid, unsigned long cookie)
 {
         add_event_entry(ESCAPE_CODE);
         add_event_entry(CTX_SWITCH_CODE);
@@ -243,19 +251,13 @@
 }
 
  
-static inline int is_kernel(unsigned long val)
-{
- return val > PAGE_OFFSET;
-}
-
-
 /* Add a sample to the global event buffer. If possible the
  * sample is converted into a persistent dentry/offset pair
  * for later lookup from userspace.
  */
-static void add_sample(struct mm_struct * mm, struct op_sample * s)
+static void add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
 {
- if (is_kernel(s->eip)) {
+ if (in_kernel) {
                 add_sample_entry(s->eip, s->event);
         } else if (mm) {
                 add_us_sample(mm, s);
@@ -319,26 +321,34 @@
         struct mm_struct * mm = 0;
         struct task_struct * new;
         unsigned long cookie;
+ int in_kernel = 1;
         int i;
  
         for (i=0; i < cpu_buf->pos; ++i) {
                 struct op_sample * s = &cpu_buf->buffer[i];
  
                 if (is_ctx_switch(s->eip)) {
- new = (struct task_struct *)s->event;
-
- release_mm(mm);
- mm = take_task_mm(new);
-
- cookie = get_exec_dcookie(mm);
- add_ctx_switch(new->pid, cookie);
+ if (s->event <= 1) {
+ /* kernel/userspace switch */
+ in_kernel = s->event;
+ add_kernel_ctx_switch(s->event);
+ } else {
+ /* userspace context switch */
+ new = (struct task_struct *)s->event;
+
+ release_mm(mm);
+ mm = take_task_mm(new);
+
+ cookie = get_exec_dcookie(mm);
+ add_user_ctx_switch(new->pid, cookie);
+ }
                 } else {
- add_sample(mm, s);
+ add_sample(mm, s, in_kernel);
                 }
         }
         release_mm(mm);
 
- cpu_buf->pos = 0;
+ cpu_buffer_reset(cpu_buf);
 }
  
  
@@ -364,10 +374,12 @@
                  * lockers only, and this region is already
                  * protected by buffer_sem. It's raw to prevent
                  * the preempt bogometer firing. Fruity, huh ? */
- _raw_spin_lock(&cpu_buf->int_lock);
- add_cpu_switch(i);
- sync_buffer(cpu_buf);
- _raw_spin_unlock(&cpu_buf->int_lock);
+ if (cpu_buf->pos > 0) {
+ _raw_spin_lock(&cpu_buf->int_lock);
+ add_cpu_switch(i);
+ sync_buffer(cpu_buf);
+ _raw_spin_unlock(&cpu_buf->int_lock);
+ }
         }
 
         up(&buffer_sem);
@@ -393,3 +405,4 @@
         schedule_work(&sync_wq);
         /* timer is re-added by the scheduled task */
 }
+
diff -Naur -X dontdiff linux/drivers/oprofile/cpu_buffer.c linux2/drivers/oprofile/cpu_buffer.c
--- linux/drivers/oprofile/cpu_buffer.c 2002-12-17 16:59:03.000000000 +0000
+++ linux2/drivers/oprofile/cpu_buffer.c 2003-01-23 20:24:53.000000000 +0000
@@ -62,6 +62,7 @@
                 spin_lock_init(&b->int_lock);
                 b->pos = 0;
                 b->last_task = 0;
+ b->last_is_kernel = -1;
                 b->sample_received = 0;
                 b->sample_lost_locked = 0;
                 b->sample_lost_overflow = 0;
@@ -84,12 +85,20 @@
  * be safe from any context. Instead we trylock the CPU's int_lock.
  * int_lock is taken by the processing code in sync_cpu_buffers()
  * so we avoid disturbing that.
+ *
+ * is_kernel is needed because on some architectures you cannot
+ * tell if you are in kernel or user space simply by looking at
+ * eip. We tag this in the buffer by generating kernel enter/exit
+ * events whenever is_kernel changes
  */
-void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu)
+void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
+ unsigned long event, int cpu)
 {
         struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
         struct task_struct * task;
 
+ is_kernel = !!is_kernel;
+
         cpu_buf->sample_received++;
  
         if (!spin_trylock(&cpu_buf->int_lock)) {
@@ -101,9 +110,17 @@
                 cpu_buf->sample_lost_overflow++;
                 goto out;
         }
-
+
         task = current;
 
+ /* notice a switch from user->kernel or vice versa */
+ if (cpu_buf->last_is_kernel != is_kernel) {
+ cpu_buf->last_is_kernel = is_kernel;
+ cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
+ cpu_buf->buffer[cpu_buf->pos].event = is_kernel;
+ cpu_buf->pos++;
+ }
+
         /* notice a task switch */
         if (cpu_buf->last_task != task) {
                 cpu_buf->last_task = task;
@@ -130,3 +147,19 @@
 out:
         spin_unlock(&cpu_buf->int_lock);
 }
+
+/* resets the cpu buffer to a sane state - should be called with
+ * cpu_buf->int_lock held
+ */
+void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf)
+{
+ cpu_buf->pos = 0;
+
+ /* reset these to invalid values; the next sample
+ * collected will populate the buffer with proper
+ * values to initialize the buffer
+ */
+ cpu_buf->last_is_kernel = -1;
+ cpu_buf->last_task = 0;
+}
+
diff -Naur -X dontdiff linux/drivers/oprofile/cpu_buffer.h linux2/drivers/oprofile/cpu_buffer.h
--- linux/drivers/oprofile/cpu_buffer.h 2002-12-16 03:53:07.000000000 +0000
+++ linux2/drivers/oprofile/cpu_buffer.h 2003-01-23 20:24:53.000000000 +0000
@@ -20,7 +20,7 @@
 int alloc_cpu_buffers(void);
 
 void free_cpu_buffers(void);
-
+
 /* CPU buffer is composed of such entries (which are
  * also used for context switch notes)
  */
@@ -34,6 +34,7 @@
         /* protected by int_lock */
         unsigned long pos;
         struct task_struct * last_task;
+ int last_is_kernel;
         struct op_sample * buffer;
         unsigned long sample_received;
         unsigned long sample_lost_locked;
@@ -43,4 +44,6 @@
 
 extern struct oprofile_cpu_buffer cpu_buffer[];
 
+void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf);
+
 #endif /* OPROFILE_CPU_BUFFER_H */
diff -Naur -X dontdiff linux/drivers/oprofile/event_buffer.h linux2/drivers/oprofile/event_buffer.h
--- linux/drivers/oprofile/event_buffer.h 2002-12-16 03:53:07.000000000 +0000
+++ linux2/drivers/oprofile/event_buffer.h 2003-01-23 20:24:53.000000000 +0000
@@ -25,9 +25,11 @@
  * relevant data.
  */
 #define ESCAPE_CODE ~0UL
-#define CTX_SWITCH_CODE 1
-#define CPU_SWITCH_CODE 2
-#define COOKIE_SWITCH_CODE 3
+#define CTX_SWITCH_CODE 1
+#define CPU_SWITCH_CODE 2
+#define COOKIE_SWITCH_CODE 3
+#define KERNEL_ENTER_SWITCH_CODE 4
+#define KERNEL_EXIT_SWITCH_CODE 5
  
 /* add data to the event buffer */
 void add_event_entry(unsigned long data);
diff -Naur -X dontdiff linux/include/linux/oprofile.h linux2/include/linux/oprofile.h
--- linux/include/linux/oprofile.h 2003-02-10 19:40:25.000000000 +0000
+++ linux2/include/linux/oprofile.h 2003-01-23 20:24:53.000000000 +0000
@@ -49,7 +49,8 @@
  * Add a sample. This may be called from any context. Pass
  * smp_processor_id() as cpu.
  */
-extern void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu);
+extern void oprofile_add_sample(unsigned long eip, unsigned int is_kernel,
+ unsigned long event, int cpu);
 
 /**
  * Create a file of the given name as a child of the given root, with
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Feb 15 2003 - 22:00:33 EST