From ba767dc29f4593a7a44909abcf39c4d4bd50b6b0 Mon Sep 17 00:00:00 2001
From: FKHals <5229803-FKHals@users.noreply.gitlab.com>
Date: Mon, 17 Apr 2023 09:25:37 +0200
Subject: [PATCH] Use syscall to set process to measure by PID

instead of measuring programs implicitly by name (task_struct->comm)
---
 arch/x86/entry/syscalls/syscall_64.tbl |  3 +-
 include/linux/syscalls.h               |  1 +
 include/uapi/asm-generic/unistd.h      |  2 +
 kernel/behave.c                        | 57 ++++++++++++++-------
 kernel/behave.h                        |  4 +-
 pb_utils/pb_submitter/build.sh         |  3 +-
 pb_utils/pb_submitter/measure.c        | 68 ++++++++++++++++++++++++++
 7 files changed, 117 insertions(+), 21 deletions(-)
 create mode 100644 pb_utils/pb_submitter/measure.c

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index a074966d507e..513a33675e61 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -381,4 +381,5 @@
 546	x32	preadv2			compat_sys_preadv64v2
 547	x32	pwritev2		compat_sys_pwritev64v2
 
-4919 64 pb_set_plan     sys_pb_set_plan
\ No newline at end of file
+4919 64 pb_set_plan     sys_pb_set_plan
+3141 64 pbm_set_root_proc   sys_pbm_set_root_proc
\ No newline at end of file
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index da9e09de0bc2..b7adbd31d6b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -209,6 +209,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 
 
 asmlinkage long sys_pb_set_plan(struct pb_plan __user *plan);
+asmlinkage long sys_pbm_set_root_proc(void);
 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 			       qid_t id, void __user *addr);
 asmlinkage long sys_time(time_t __user *tloc);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 0ae0e0d00890..d392394d575c 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -68,6 +68,8 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
 
 #define __NR_pb_set_plan 0x1337
 __SYSCALL(__NR_pb_set_plan, sys_pb_set_plan)
+#define __NR_pbm_set_root_proc 3141
+__SYSCALL(__NR_pbm_set_root_proc, sys_pbm_set_root_proc)
 
 /* fs/dcache.c */
 #define __NR_getcwd 17
diff --git a/kernel/behave.c b/kernel/behave.c
index 2d86ea3f22dc..b3cb71774f54 100644
--- a/kernel/behave.c
+++ b/kernel/behave.c
@@ -3,10 +3,33 @@
 #include <linux/perf_event.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/syscalls.h>
 
 #include "behave.h"
 
-int is_initialized = 0;
+bool is_initialized = false;
+pid_t root_proc;
+
+/**
+ * Set the calling process as the root task of the PBM and initialize it
+ */
+SYSCALL_DEFINE0(pbm_set_root_proc) {
+	unsigned long irq_flags;
+    long res;
+
+    // avoid context switching during initialization by disabling interrupts
+	local_irq_save(irq_flags);
+	if (!is_initialized) {
+		pbm_init();
+		is_initialized = 1;
+	}
+	local_irq_restore(irq_flags);
+
+    root_proc = current->pid;
+    res = pbm_fork(current, current->real_parent->pid, NULL);
+
+    return 0;
+}
 
 /******************************************************************************
 * Based on "libpbm" (see header file for more info)
@@ -406,7 +429,6 @@ int pbm_task_end(PBM* pbm) {
  * run (current == child) otherwise the perf counting will fail!
  */
 int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_node) {
-	unsigned long irq_flags;
 
 	PBM* parent_pbm;
     PBM* child_pbm;
@@ -416,13 +438,6 @@ int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_node) {
      * pbm_fork_parent_new_task() to not be called but pbm_fork() since the child is "mpirun" so we
      * put the initialization in here instead of into pbm_fork_parent_new_task().
      */
-	// avoid context switching during initialization by disabling interrupts
-	local_irq_save(irq_flags);
-	if (!is_initialized) {
-		pbm_init();
-		is_initialized = 1;
-	}
-	local_irq_restore(irq_flags);
 
 	child_pbm = get_pbm_by_pid(proc->pid);
 	parent_pbm = get_pbm_by_pid(parent_pid);
@@ -602,6 +617,8 @@ void pbm_join_and_print_graph_self(pid_t pid) {
     } else {
         printk(KERN_WARNING "JOIN: PBM not found for: %u\n", pid);
     }
+    // reset so that is_relevant_process() can return early
+    is_initialized = 0;
 }
 
 /* Crude recursive ADG printer, starts with given node */
@@ -680,19 +697,25 @@ void _pbm_unvisit_node(pbm_NODE* node) {
 
 /******************************************************************************/
 
-int is_root_process(struct task_struct* p) {
-    return strcmp(p->comm, "mpirun") == 0;
+bool is_root_process(struct task_struct* p) {
+    return p->pid == root_proc
+            // make sure that no process is accidentally (before init) declared root
+            && is_initialized;
 }
 
-int is_relevant_process(struct task_struct* p) {
+bool is_relevant_process(struct task_struct* p) {
     struct task_struct* proc = p;
-    // check if mpirun is a parent, super-parent, ... until the root-parent ("swapper") is found
-    while (proc && !(strcmp(proc->comm, "swapper") == 0)) {
+    // return early if init is not done since there can be no relevant processes
+    if (!is_initialized) {
+        return false;
+    }
+    // check if mpirun is a parent, super-parent, ... until the linux root/idle-process
+    // (comm = "swapper", pid = 0) is found
+    while (proc->pid != 0) {
         if (is_root_process(proc)) {
-            return 1;
+            return true;
         }
-        //printk(KERN_WARNING "Searching relevant process: %s\n", proc->comm);
         proc = proc->real_parent;
     }
-    return 0;
+    return false;
 }
\ No newline at end of file
diff --git a/kernel/behave.h b/kernel/behave.h
index 58b243424eda..ec0439290ddd 100644
--- a/kernel/behave.h
+++ b/kernel/behave.h
@@ -168,8 +168,8 @@ void pbm_uninit(void);
 
 /******************************************************************************/
 
-int is_root_process(struct task_struct* p);
-int is_relevant_process(struct task_struct *p);
+bool is_root_process(struct task_struct* p);
+bool is_relevant_process(struct task_struct *p);
 
 int start_counting(struct task_struct *p);
 int stop_counting(void);
diff --git a/pb_utils/pb_submitter/build.sh b/pb_utils/pb_submitter/build.sh
index 56d3f6d82157..cacedf2ed07b 100755
--- a/pb_utils/pb_submitter/build.sh
+++ b/pb_utils/pb_submitter/build.sh
@@ -10,7 +10,8 @@ gcc -static -o pb_submitter pb_submitter.c
 gcc -static -o test_prog test_prog.c
 gcc -static -o task_long task_long_test.c
 gcc -static -o sysc_long syscall_long_test.c
+gcc -static -o measure measure.c
 
-cp pb_submitter test_prog task_long sysc_long example_run.sh example_plan /root
+cp pb_submitter measure test_prog task_long sysc_long example_run.sh example_plan /root
 
 echo "All done. Run '/root/example_run.sh' within ./run_qemu.sh now"
diff --git a/pb_utils/pb_submitter/measure.c b/pb_utils/pb_submitter/measure.c
new file mode 100644
index 000000000000..c9607ae3025e
--- /dev/null
+++ b/pb_utils/pb_submitter/measure.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <string.h>
+
+// address of the syscall in the kernel syscall table
+#define BEHAVE_SET_ROOT_PROC 3141
+
+static void usage(void)
+{
+    fputs("Usage: ./measure <prog_name> <argument_1> ...", stderr);
+}
+
+int main(int argc, char** argv)
+{
+    int ret;
+
+    if (argc < 2) {
+        usage();
+        return -1;
+    }
+
+    // start the perf measuring
+    ret = syscall(BEHAVE_SET_ROOT_PROC);
+    if (ret != 0) {
+        fputs("BEHAVE_SET_ROOT_PROC failed", stderr);
+        return -1;
+    }
+
+    /**
+     * BEWARE:
+     * We also measure the execve call etc. which does not really
+     * belong to the program but as long as we do it in the forecast model
+     * (here) as well as in the runtime model the resulting models should be
+     * rather similiar).
+     */
+
+    // pass the root proc pid to be able to be read using e.g. a bash pipe
+    char buffer[16];
+    snprintf(buffer, sizeof(buffer), "%d", getpid());
+    write(1 /*STDOUT*/, buffer, strlen(buffer));
+
+    /**
+     * BEWARE:
+     * Since we use the STDOUT to pass arguments to the a following program we
+     * must make sure that the measured program itself does NOT write to STDOUT
+     * but only to STDERR!
+     */
+
+    // drop the first element of argv (the name of this program) so that only
+    // the name and arguments of the program to run remain
+    char ** args = &argv[1];
+
+    // replace this process with the chosen program since we want to keep using
+    // the same PID for simplicity purposes (forking a new process would have
+    // worked as well but this process is not needed anymore anyways)
+    ret = execvp(argv[1], args);
+
+    if (ret < 0) {
+        perror("execve");
+        return -1;
+    }
+
+    return 0;
+}
-- 
GitLab