From 6cb2451153260e75edbc5be5a0ee093581122ab5 Mon Sep 17 00:00:00 2001
From: FKHals <5229803-FKHals@users.noreply.gitlab.com>
Date: Wed, 27 Sep 2023 15:53:29 +0200
Subject: [PATCH] Add measuring multiple different tasks concurrently

by using a pid->perf_event "hashmap" to access the corresponding
perf_event for the current task.
---
 kernel/Makefile      |   2 +
 kernel/behave.c      | 180 +++++++++++++++++++++++++++++++++++++++++++
 kernel/behave.h      |  14 ++++
 kernel/events/core.c |   2 +
 kernel/exit.c        |   9 +++
 kernel/fork.c        |   9 +++
 pb_utils/run_qemu.sh |   3 +-
 7 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 kernel/behave.c
 create mode 100644 kernel/behave.h

diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb8e8b23c6e..9aa7973798ea 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -122,3 +122,5 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
 	$(call filechk,ikconfiggz)
+
+obj-y += behave.o
\ No newline at end of file
diff --git a/kernel/behave.c b/kernel/behave.c
new file mode 100644
index 000000000000..c8fa9d32eb2d
--- /dev/null
+++ b/kernel/behave.c
@@ -0,0 +1,180 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "behave.h"
+
+int is_initialized = 0;
+
+// =============================================================================
+#define PROC_BUF_SIZE 512
+
+/*
+ * PID -> perf_event* Hashmap
+ * Only insertion is supported currently (since memory efficiency is not our
+ * current concern)
+ */
+typedef struct {
+    // primitive int -> pid_t and int -> perf_event* "hashmap" (not really) combination implemented
+    // using two arrays to store the pid and corresponding perf_event* at the same index
+    pid_t index_to_pid[PROC_BUF_SIZE];
+    struct perf_event* index_to_pevent[PROC_BUF_SIZE];
+    size_t last_proc_index; // index of currently last process in the arrays
+} task_pevent_map;
+
+task_pevent_map task_2_pevent;
+
+void init_task_pevent_map(void) {
+	int i;
+    for (i = 0; i < PROC_BUF_SIZE; ++i) {
+        task_2_pevent.index_to_pid[i] = 0;
+        task_2_pevent.index_to_pevent[i] = NULL;
+    }
+    task_2_pevent.last_proc_index = 0;
+	printk(KERN_WARNING "Task map initialized by TASK: %u\n", current->pid);
+}
+
+/*
+ * Returns the perf_event* of the corresponding given process by PID if it
+ * exists otherwise it returns NULL (assumption: process with PID=0 is not
+ * considered a normal process)
+ *
+ * This is a primitive "Hashmap"-retrieval implementation (O(n))
+ */
+struct perf_event* get_pevent_by_pid(pid_t pid) {
+	size_t proc_index;
+	size_t i;
+    // to get the PBM by PID one needs to find the index that corresponds to
+    // the given PID and use that to retrieve the PBM of the second array
+    // directly by index
+    proc_index = 0;
+    for(i = 0; i <= task_2_pevent.last_proc_index; i++) {
+        if (task_2_pevent.index_to_pid[i] == pid) {
+            proc_index = i;
+            break;
+        }
+    }
+    return proc_index != 0 ? task_2_pevent.index_to_pevent[proc_index] : NULL;
+}
+
+/*
+ * Adds a process (pid, perf_event) to the pid->perf_event hashmap (NON-idempotently!)
+ *
+ * Returns if the process has been successfully inserted into the hashmap
+ */
+int add_proc_to_map(pid_t pid, struct perf_event* pevent) {
+    if (PROC_BUF_SIZE <= task_2_pevent.last_proc_index) {
+        printk(KERN_WARNING "PROC MAP ADD: last_proc_index too large: %lu\n", task_2_pevent.last_proc_index);
+        return 0;
+    }
+    printk(KERN_WARNING "i: %lu, pid: %u\n", task_2_pevent.last_proc_index, pid);
+    task_2_pevent.last_proc_index++;
+    task_2_pevent.index_to_pid[task_2_pevent.last_proc_index] = pid;
+    task_2_pevent.index_to_pevent[task_2_pevent.last_proc_index] = pevent;
+    return 1;
+}
+// =============================================================================
+
+int start_counting(struct task_struct *p) {
+	unsigned long irq_flags;
+    struct perf_event_attr pe;
+	struct perf_event *pevent;
+	u64 cpu;
+
+	if (!is_initialized) {
+		init_task_pevent_map();
+		is_initialized = 1;
+	}
+
+	memset(&pe, 0, sizeof(struct perf_event_attr));
+	pe.type = PERF_TYPE_HARDWARE;
+	pe.size = sizeof(struct perf_event_attr);
+	pe.config = PERF_COUNT_HW_INSTRUCTIONS;
+	pe.sample_period = 400800;
+	pe.disabled = 0;		// start the counter as soon as we're in userland
+	pe.pinned = 1;			// ?
+	pe.exclude_kernel = 1;
+	pe.exclude_hv = 1;
+
+	// Not needed on 3.2?
+	pe.wakeup_events = 1;
+
+	cpu = smp_processor_id();
+	printk(KERN_WARNING "TASK: %u, CPU: %llu, PTR: %llu\n", p->pid, cpu, (u64)p);
+
+	// disable irqs to make 'perf_event_ctx_activate' in 'kernel/events/core.c' happy
+	local_irq_save(irq_flags);
+	pevent = perf_event_create_kernel_counter(
+        &pe,
+        -1,   // measure on all cores (in case the process runs on different ones)
+        p,    // exclusively measure the forked process (BEWARE: a process can only measure itself!)
+        NULL, //&overflow_handler,
+        NULL
+    );
+	local_irq_restore(irq_flags);
+
+	if (IS_ERR(pevent)) {
+			printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", p->pid, PTR_ERR(pevent));
+            return 0;
+	}
+    if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		printk(KERN_WARNING "TASK: %u | Event is inactive", p->pid);
+	}
+
+	add_proc_to_map(p->pid, pevent);
+
+    printk(KERN_WARNING "TASK: %u | Counting started...\n", p->pid);
+	return 1;
+}
+
+/*
+ *	handle the perf overflow event -> task needed more instructions than planed
+ */
+void overflow_handler(
+		struct perf_event *event,
+		struct perf_sample_data *data,
+		struct pt_regs *regs)
+{
+	printk(KERN_WARNING "PB TASK RAN TOO LONG\n");
+	//perf_event_get
+	//->unlocked_ioctl(event, reset);
+}
+
+int stop_counting(void) {
+	unsigned long irq_flags;
+    int read_error;
+	struct perf_event *pevent;
+	u64 perf_counter;
+
+	if (!is_initialized) {
+		printk(KERN_WARNING "TASK: %u | Pevent map not initialized!\n", current->pid);
+		return 0;
+	}
+
+	printk(KERN_WARNING "TASK: %u | Stopping counting...\n", current->pid);
+	pevent = get_pevent_by_pid(current->pid);
+	if (!pevent) {
+		printk(KERN_WARNING "TASK: %u | ERROR: Could not find perf_event!\n", current->pid);
+		return 0;
+	}
+	if (IS_ERR(pevent)) {
+		printk(KERN_WARNING "TASK: %u | PEVENT INVALID\n", current->pid);
+		return 0;
+	}
+
+    read_error = perf_event_read_local(pevent, &perf_counter);
+	if (read_error) {
+		printk(KERN_WARNING "TASK: %u | FETCHING PERFORMANCE COUNTER IN stop_counting FAILED WITH %i\n", current->pid, read_error);
+	}
+
+	local_irq_save(irq_flags);
+	perf_event_disable(pevent);
+	perf_event_release_kernel(pevent);
+	local_irq_restore(irq_flags);
+    pevent = NULL;
+
+    printk(KERN_WARNING "TASK: %u | ...Counting stopped: %llu instr.\n", current->pid, perf_counter);
+	return 0;
+}
diff --git a/kernel/behave.h b/kernel/behave.h
new file mode 100644
index 000000000000..4e83c96cfcce
--- /dev/null
+++ b/kernel/behave.h
@@ -0,0 +1,14 @@
+#ifndef PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
+#define PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
+
+#include <linux/perf_event.h>
+
+int start_counting(struct task_struct *p);
+int stop_counting(void);
+
+void overflow_handler(
+		struct perf_event *,
+		struct perf_sample_data *,
+		struct pt_regs *regs);
+
+#endif //PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e886b2593e3a..cffe84d8f4dd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3702,6 +3702,8 @@ int perf_event_read_local(struct perf_event *event, u64 *value)
 	/* If this is a per-task event, it must be for current */
 	if ((event->attach_state & PERF_ATTACH_TASK) &&
 	    event->hw.target != current) {
+		printk(KERN_EMERG "ERROR: HW-target: %llu [pid: %u], current: %llu [pid: %u]\n",
+				(u64)event->hw.target, event->hw.target->pid, (u64)current, current->pid);
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/kernel/exit.c b/kernel/exit.c
index c5548faa9f37..830ac3bcc9cd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,6 +68,8 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 
+#include "behave.h"
+
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
@@ -764,6 +766,13 @@ void __noreturn do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
+
+	// call the readout before the process is terminated
+	if (strcmp(tsk->real_parent->comm, "bash") == 0) {
+		stop_counting();
+        printk(KERN_EMERG "EXIT: %u, CMD: '%s', PTR: %llu\n", tsk->pid, tsk->comm, (u64)tsk);
+	}
+
 	TASKS_RCU(int tasks_rcu_i);
 
 	profile_task_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index b7e9e57b71ea..fd4167b88d3b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -101,6 +101,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
 
+#include "behave.h"
+
 /*
  * Minimum number of threads to boot the kernel
  */
@@ -2014,6 +2016,7 @@ long _do_fork(unsigned long clone_flags,
 	int trace = 0;
 	long nr;
 
+    printk(KERN_EMERG "DO FORK CALLED by: %u\n", current->pid);
 	/*
 	 * Determine whether and which event to report to ptracer.  When
 	 * called from kernel_thread or CLONE_UNTRACED is explicitly
@@ -2069,6 +2072,12 @@ long _do_fork(unsigned long clone_flags,
 		}
 
 		put_pid(pid);
+        printk(KERN_EMERG "FORKED!!!!: %u\n", p->pid);
+		//FIXME At this point p->comm is not up to date but shows the command of the parent!
+		//      (This may not be a problem since the name of the forked processes are not needed?)
+		if (strcmp(p->comm, "bash") == 0) {
+			start_counting(p);
+		}
 	} else {
 		nr = PTR_ERR(p);
 	}
diff --git a/pb_utils/run_qemu.sh b/pb_utils/run_qemu.sh
index d426580d65bc..cbce27e9053e 100755
--- a/pb_utils/run_qemu.sh
+++ b/pb_utils/run_qemu.sh
@@ -9,7 +9,8 @@ COMMAND="qemu-system-x86_64"
 [ "$MODE" == "gdb" ] &&  COMMAND+=$DEBUG_OPTIONS
 COMMAND+=" -kernel $KERNEL -cpu host"
 COMMAND+=$IMAGE_OPTIONS
-COMMAND+=" -append \"root=/dev/sda rootwait rw single console=ttyS0 nokaslr\"" # disable kaslr for better gdb debugging
+# linux kernel parameters: single user mode, disable kaslr for better gdb debugging
+COMMAND+=" -append \"root=/dev/sda rootwait rw single console=ttyS0 nokaslr\""
 COMMAND+=" --enable-kvm"
 COMMAND+=" --nographic"
 COMMAND+=" --smp 1"
-- 
GitLab