diff --git a/kernel/Makefile b/kernel/Makefile index 4cb8e8b23c6ecbcfbf12fda4980a0e9957b94346..9aa7973798eae1177b88a7aeea4aa30489250712 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -122,3 +122,5 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE targets += config_data.h $(obj)/config_data.h: $(obj)/config_data.gz FORCE $(call filechk,ikconfiggz) + +obj-y += behave.o \ No newline at end of file diff --git a/kernel/behave.c b/kernel/behave.c new file mode 100644 index 0000000000000000000000000000000000000000..c8fa9d32eb2d80315572c0c4be0a20a73e078fb7 --- /dev/null +++ b/kernel/behave.c @@ -0,0 +1,180 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/kthread.h> +#include <linux/slab.h> + +#include "behave.h" + +int is_initialized = 0; + +// ============================================================================= +#define PROC_BUF_SIZE 512 + +/* + * PID -> perf_event* Hashmap + * Only insertion is supported currently (since memory efficiency is not our + * current concern) + */ +typedef struct { + // primitive int -> pid_t and int -> perf_event* "hashmap" (not really) combination implemented + // using two arrays to store the pid and corresponding perf_event* at the same index + pid_t index_to_pid[PROC_BUF_SIZE]; + struct perf_event* index_to_pevent[PROC_BUF_SIZE]; + size_t last_proc_index; // index of currently last process in the arrays +} task_pevent_map; + +task_pevent_map task_2_pevent; + +void init_task_pevent_map(void) { + int i; + for (i = 0; i < PROC_BUF_SIZE; ++i) { + task_2_pevent.index_to_pid[i] = 0; + task_2_pevent.index_to_pevent[i] = NULL; + } + task_2_pevent.last_proc_index = 0; + printk(KERN_WARNING "Task map initialized by TASK: %u\n", current->pid); +} + +/* + * Returns the perf_event* of the corresponding given process by PID if it + * exists otherwise it returns NULL (assumption: process with PID=0 is not + * considered a normal process) + * + * This is a primitive "Hashmap"-retrieval implementation (O(n)) + */ +struct perf_event* get_pevent_by_pid(pid_t pid) { + size_t proc_index; + size_t i; + // to get the PBM by PID one needs to find the index that corresponds to + // the given PID and use that to retrieve the PBM of the second array + // directly by index + proc_index = 0; + for(i = 0; i <= task_2_pevent.last_proc_index; i++) { + if (task_2_pevent.index_to_pid[i] == pid) { + proc_index = i; + break; + } + } + return proc_index != 0 ? task_2_pevent.index_to_pevent[proc_index] : NULL; +} + +/* + * Adds a process (pid, perf_event) to the pid->perf_event hashmap (NON-idempotently!) + * + * Returns if the process has been successfully inserted into the hashmap + */ +int add_proc_to_map(pid_t pid, struct perf_event* pevent) { + if (PROC_BUF_SIZE <= task_2_pevent.last_proc_index) { + printk(KERN_WARNING "PROC MAP ADD: last_proc_index too large: %lu\n", task_2_pevent.last_proc_index); + return 0; + } + printk(KERN_WARNING "i: %lu, pid: %u\n", task_2_pevent.last_proc_index, pid); + task_2_pevent.last_proc_index++; + task_2_pevent.index_to_pid[task_2_pevent.last_proc_index] = pid; + task_2_pevent.index_to_pevent[task_2_pevent.last_proc_index] = pevent; + return 1; +} +// ============================================================================= + +int start_counting(struct task_struct *p) { + unsigned long irq_flags; + struct perf_event_attr pe; + struct perf_event *pevent; + u64 cpu; + + if (!is_initialized) { + init_task_pevent_map(); + is_initialized = 1; + } + + memset(&pe, 0, sizeof(struct perf_event_attr)); + pe.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(struct perf_event_attr); + pe.config = PERF_COUNT_HW_INSTRUCTIONS; + pe.sample_period = 400800; + pe.disabled = 0; // start the counter as soon as we're in userland + pe.pinned = 1; // ? + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + + // Not needed on 3.2? + pe.wakeup_events = 1; + + cpu = smp_processor_id(); + printk(KERN_WARNING "TASK: %u, CPU: %llu, PTR: %llu\n", p->pid, cpu, (u64)p); + + // disable irqs to make 'perf_event_ctx_activate' in 'kernel/events/core.c' happy + local_irq_save(irq_flags); + pevent = perf_event_create_kernel_counter( + &pe, + -1, // measure on all cores (in case the process runs on different ones) + p, // exclusively measure the forked process (BEWARE: a process can only measure itself!) + NULL, //&overflow_handler, + NULL + ); + local_irq_restore(irq_flags); + + if (IS_ERR(pevent)) { + printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", p->pid, PTR_ERR(pevent)); + return 0; + } + if (pevent->state != PERF_EVENT_STATE_ACTIVE) { + printk(KERN_WARNING "TASK: %u | Event is inactive", p->pid); + } + + add_proc_to_map(p->pid, pevent); + + printk(KERN_WARNING "TASK: %u | Counting started...\n", p->pid); + return 1; +} + +/* + * handle the perf overflow event -> task needed more instructions than planed + */ +void overflow_handler( + struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + printk(KERN_WARNING "PB TASK RAN TOO LONG\n"); + //perf_event_get + //->unlocked_ioctl(event, reset); +} + +int stop_counting(void) { + unsigned long irq_flags; + int read_error; + struct perf_event *pevent; + u64 perf_counter; + + if (!is_initialized) { + printk(KERN_WARNING "TASK: %u | Pevent map not initialized!\n", current->pid); + return 0; + } + + printk(KERN_WARNING "TASK: %u | Stopping counting...\n", current->pid); + pevent = get_pevent_by_pid(current->pid); + if (!pevent) { + printk(KERN_WARNING "TASK: %u | ERROR: Could not find perf_event!\n", current->pid); + return 0; + } + if (IS_ERR(pevent)) { + printk(KERN_WARNING "TASK: %u | PEVENT INVALID\n", current->pid); + return 0; + } + + read_error = perf_event_read_local(pevent, &perf_counter); + if (read_error) { + printk(KERN_WARNING "TASK: %u | FETCHING PERFORMANCE COUNTER IN stop_counting FAILED WITH %i\n", current->pid, read_error); + } + + local_irq_save(irq_flags); + perf_event_disable(pevent); + perf_event_release_kernel(pevent); + local_irq_restore(irq_flags); + pevent = NULL; + + printk(KERN_WARNING "TASK: %u | ...Counting stopped: %llu instr.\n", current->pid, perf_counter); + return 0; +} diff --git a/kernel/behave.h b/kernel/behave.h new file mode 100644 index 0000000000000000000000000000000000000000..4e83c96cfcce2d12280fcbf663620b557f3f1e8f --- /dev/null +++ b/kernel/behave.h @@ -0,0 +1,14 @@ +#ifndef PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H +#define PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H + +#include <linux/perf_event.h> + +int start_counting(struct task_struct *p); +int stop_counting(void); + +void overflow_handler( + struct perf_event *, + struct perf_sample_data *, + struct pt_regs *regs); + +#endif //PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H diff --git a/kernel/events/core.c b/kernel/events/core.c index e886b2593e3ad11414e674b52879c1ef7f79eff8..cffe84d8f4dd465de40fee874d7f5c4e47bb674d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3702,6 +3702,8 @@ int perf_event_read_local(struct perf_event *event, u64 *value) /* If this is a per-task event, it must be for current */ if ((event->attach_state & PERF_ATTACH_TASK) && event->hw.target != current) { + printk(KERN_EMERG "ERROR: HW-target: %llu [pid: %u], current: %llu [pid: %u]\n", + (u64)event->hw.target, event->hw.target->pid, (u64)current, current->pid); ret = -EINVAL; goto out; } diff --git a/kernel/exit.c b/kernel/exit.c index c5548faa9f377c5bf01f4a4db8e3020448565469..830ac3bcc9cd93064cb675c2ed052481258ea0ee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -68,6 +68,8 @@ #include <asm/pgtable.h> #include <asm/mmu_context.h> +#include "behave.h" + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -764,6 +766,13 @@ void __noreturn do_exit(long code) { struct task_struct *tsk = current; int group_dead; + + // call the readout before the process is terminated + if (strcmp(tsk->real_parent->comm, "bash") == 0) { + stop_counting(); + printk(KERN_EMERG "EXIT: %u, CMD: '%s', PTR: %llu\n", tsk->pid, tsk->comm, (u64)tsk); + } + TASKS_RCU(int tasks_rcu_i); profile_task_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index b7e9e57b71eaef65bd56b409b32b582e9b16ed4a..fd4167b88d3b43719e07ad65794518b284ecac5c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -101,6 +101,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/task.h> +#include "behave.h" + /* * Minimum number of threads to boot the kernel */ @@ -2014,6 +2016,7 @@ long _do_fork(unsigned long clone_flags, int trace = 0; long nr; + printk(KERN_EMERG "DO FORK CALLED by: %u\n", current->pid); /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly @@ -2069,6 +2072,12 @@ long _do_fork(unsigned long clone_flags, } put_pid(pid); + printk(KERN_EMERG "FORKED!!!!: %u\n", p->pid); + //FIXME At this point p->comm is not up to date but shows the command of the parent! + // (This may not be a problem since the name of the forked processes are not needed?) + if (strcmp(p->comm, "bash") == 0) { + start_counting(p); + } } else { nr = PTR_ERR(p); } diff --git a/pb_utils/run_qemu.sh b/pb_utils/run_qemu.sh index d426580d65bc2283e9457774f177d0c289bb45f2..cbce27e9053e9fc09e3b3a6802a99eb2cb913ab5 100755 --- a/pb_utils/run_qemu.sh +++ b/pb_utils/run_qemu.sh @@ -9,7 +9,8 @@ COMMAND="qemu-system-x86_64" [ "$MODE" == "gdb" ] && COMMAND+=$DEBUG_OPTIONS COMMAND+=" -kernel $KERNEL -cpu host" COMMAND+=$IMAGE_OPTIONS -COMMAND+=" -append \"root=/dev/sda rootwait rw single console=ttyS0 nokaslr\"" # disable kaslr for better gdb debugging +# linux kernel parameters: single user mode, disable kaslr for better gdb debugging +COMMAND+=" -append \"root=/dev/sda rootwait rw single console=ttyS0 nokaslr\"" COMMAND+=" --enable-kvm" COMMAND+=" --nographic" COMMAND+=" --smp 1"