diff --git a/kernel/behave.c b/kernel/behave.c
index c8fa9d32eb2d80315572c0c4be0a20a73e078fb7..f90b5a503ffbcccdc34faf8c12db6fc4bda73972 100644
--- a/kernel/behave.c
+++ b/kernel/behave.c
@@ -8,153 +8,322 @@
 
 int is_initialized = 0;
 
-// =============================================================================
+/******************************************************************************
+* Based on "libpbm" (see header file for more info)
+*/
+
+#define TASK_BUF_SIZE 4096
 #define PROC_BUF_SIZE 512
 
-/*
- * PID -> perf_event* Hashmap
- * Only insertion is supported currently (since memory efficiency is not our
+/* -----------------------------------------------------------------------------
+ * PID -> PBM* Hashmap
+ * Only insertion is needed currently (since memory efficiency is not our
  * current concern)
  */
 typedef struct {
-    // primitive int -> pid_t and int -> perf_event* "hashmap" (not really) combination implemented
-    // using two arrays to store the pid and corresponding perf_event* at the same index
+    // primitive int -> int and int -> PBM* hashmap combination
+    // the two arrays store the pid and corresponding PBM* at the same index
     pid_t index_to_pid[PROC_BUF_SIZE];
-    struct perf_event* index_to_pevent[PROC_BUF_SIZE];
-    size_t last_proc_index; // index of currently last process in the arrays
-} task_pevent_map;
+    PBM* index_to_pbm[PROC_BUF_SIZE];
+    // index of currently last process in the arrays
+    size_t last_proc_index;
+    //pthread_mutex_t lock;
+} shared_pbm_int_map;
+
+shared_pbm_int_map _index_2_pbm;
+shared_pbm_int_map* index_2_pbm = &_index_2_pbm;
 
-task_pevent_map task_2_pevent;
 
-void init_task_pevent_map(void) {
-	int i;
+static void init_pbm_int_map(void) {
+    int i;
+    //index_2_pbm = init_shared_memory(index_2_pbm, sizeof(shared_pbm_int_map));
+
     for (i = 0; i < PROC_BUF_SIZE; ++i) {
-        task_2_pevent.index_to_pid[i] = 0;
-        task_2_pevent.index_to_pevent[i] = NULL;
+        index_2_pbm->index_to_pid[i] = 0;
+        index_2_pbm->index_to_pbm[i] = NULL;
     }
-    task_2_pevent.last_proc_index = 0;
-	printk(KERN_WARNING "Task map initialized by TASK: %u\n", current->pid);
+    index_2_pbm->last_proc_index = 0;
+    //init_shared_lock(&index_2_pbm->lock);
+}
+
+static void uninit_pbm_int_map(void) {
+    //munmap(index_2_pbm, sizeof(index_2_pbm));
 }
 
 /*
- * Returns the perf_event* of the corresponding given process by PID if it
+ * Returns the PBM-pointer of the corresponding given process by PID if it
  * exists otherwise it returns NULL (assumption: process with PID=0 is not
  * considered a normal process)
  *
  * This is a primitive "Hashmap"-retrieval implementation (O(n))
  */
-struct perf_event* get_pevent_by_pid(pid_t pid) {
-	size_t proc_index;
-	size_t i;
+PBM* get_pbm_by_pid(pid_t pid) {
+    size_t i;
     // to get the PBM by PID one needs to find the index that corresponds to
     // the given PID and use that to retrieve the PBM of the second array
     // directly by index
-    proc_index = 0;
-    for(i = 0; i <= task_2_pevent.last_proc_index; i++) {
-        if (task_2_pevent.index_to_pid[i] == pid) {
+    size_t proc_index = 0;
+    for(i = 0; i < PROC_BUF_SIZE; i++) {
+        if (index_2_pbm->index_to_pid[i] == pid) {
             proc_index = i;
             break;
         }
     }
-    return proc_index != 0 ? task_2_pevent.index_to_pevent[proc_index] : NULL;
+    return proc_index != 0 ? index_2_pbm->index_to_pbm[proc_index] : NULL;
 }
 
 /*
- * Adds a process (pid, perf_event) to the pid->perf_event hashmap (NON-idempotently!)
+ * Adds a process (pid, pbm) to the pid->pbm hashmap (NON-idempotently!)
  *
  * Returns if the process has been successfully inserted into the hashmap
  */
-int add_proc_to_map(pid_t pid, struct perf_event* pevent) {
-    if (PROC_BUF_SIZE <= task_2_pevent.last_proc_index) {
-        printk(KERN_WARNING "PROC MAP ADD: last_proc_index too large: %lu\n", task_2_pevent.last_proc_index);
+int add_proc_to_map(pid_t pid, PBM* pbm) {
+    //pthread_mutex_lock(&index_2_pbm->lock);
+    if (TASK_BUF_SIZE <= index_2_pbm->last_proc_index) {
+        printk(KERN_WARNING "PROC MAP ADD: last_proc_index too large: %lu\n", index_2_pbm->last_proc_index);
         return 0;
     }
-    printk(KERN_WARNING "i: %lu, pid: %u\n", task_2_pevent.last_proc_index, pid);
-    task_2_pevent.last_proc_index++;
-    task_2_pevent.index_to_pid[task_2_pevent.last_proc_index] = pid;
-    task_2_pevent.index_to_pevent[task_2_pevent.last_proc_index] = pevent;
+    printk(KERN_WARNING "i: %lu, pid: %u\n", index_2_pbm->last_proc_index, pid);
+    index_2_pbm->last_proc_index++;
+    index_2_pbm->index_to_pid[index_2_pbm->last_proc_index] = pid;
+    index_2_pbm->index_to_pbm[index_2_pbm->last_proc_index] = pbm;
+    //pthread_mutex_unlock(&index_2_pbm->lock);
     return 1;
 }
-// =============================================================================
 
-int start_counting(struct task_struct *p) {
+static void debug_print_map(void) {
+    size_t i;
+    // lock to make the map printing sequential without interleaving other
+    // outputs
+    //pthread_mutex_lock(&index_2_pbm->lock);
+    printk(KERN_WARNING "MAP\n-----\n");
+    for(i = 1; i < PROC_BUF_SIZE; i++) {
+        if (0 == index_2_pbm->index_to_pid[i])
+            break;
+        printk(KERN_WARNING "  %u\n", index_2_pbm->index_to_pid[i]);
+    }
+    printk(KERN_WARNING "-----\n");
+    //pthread_mutex_unlock(&index_2_pbm->lock);
+}
+
+/* -----------------------------------------------------------------------------
+ * Task buffer which holds the nodes of the task graph
+ */
+typedef struct {
+    // buffer that holds the nodes of the task graph
+    pbm_NODE task_buffer[TASK_BUF_SIZE];
+    // index of current task in task_buffer[]
+    uint32_t curr_task_index;
+    //pthread_mutex_t lock;
+} task_buf;
+
+task_buf _tasks;
+task_buf* tasks = &_tasks;
+
+void init_task_buf(void) {
+    //tasks = init_shared_memory(tasks, sizeof(task_buf));
+    memset(tasks, 0, sizeof(task_buf));
+    tasks->curr_task_index = 0;
+    //init_shared_lock(&tasks->lock);
+}
+
+void uninit_task_buf(void) {
+    //munmap(tasks, sizeof(tasks));
+}
+
+pbm_NODE* task_alloc(void) {
+    pbm_NODE* new_task_node;
+    //pthread_mutex_lock(&tasks->lock);
+    if (TASK_BUF_SIZE <= tasks->curr_task_index) {
+        printk(KERN_WARNING "ERROR: Tried to alloc more tasks than available!\n");
+        return NULL;
+    }
+    // get the memory address of the next free task node space
+    new_task_node = &(tasks->task_buffer[tasks->curr_task_index]);
+    tasks->curr_task_index++;
+    //pthread_mutex_unlock(&tasks->lock);
+    return new_task_node;
+}
+
+void debug_print_tasks(void) {
+    size_t i;
+    pbm_NODE t;
+    printk(KERN_WARNING "-----\nTASKS:\n");
+    for(i = 0; i <= tasks->curr_task_index; i++) {
+        t = tasks->task_buffer[i];
+        printk(KERN_WARNING "type: %u, thread_id: %u, count: %llu,  children: %llx, next_sib: %llx\n", t.type, t.thread_id, t.count, (uint64_t)t.children, (uint64_t)t.next_sib);
+    }
+    printk(KERN_WARNING "^^^^^\n");
+}
+
+ /* -----------------------------------------------------------------------------
+ * PBM buffer which holds the nodes of the process graph
+ */
+typedef struct {
+    // buffer that holds the nodes of the process graph
+    PBM process_buffer[PROC_BUF_SIZE];
+    // index of current process in proc_buffer[]
+    uint32_t curr_proc_index;
+    //pthread_mutex_t lock;
+} proc_buf;
+
+proc_buf _procs;
+proc_buf* procs = &_procs;
+
+static void init_proc_buf(void) {
+    //procs = init_shared_memory(procs, sizeof(proc_buf));
+    memset(procs, 0, sizeof(proc_buf));
+    procs->curr_proc_index = 0;
+    //init_shared_lock(&procs->lock);
+}
+
+static void uninit_proc_buf(void) {
+    //munmap(procs, sizeof(procs));
+}
+
+PBM* proc_alloc(void) {
+    PBM* new_pbm;
+    //pthread_mutex_lock(&procs->lock);
+    if (PROC_BUF_SIZE <= procs->curr_proc_index) {
+        printk(KERN_WARNING "ERROR: Tried to alloc more processes than available!\n");
+        return NULL;
+    }
+    printk(KERN_WARNING "alloc proc index: %u\n", procs->curr_proc_index);
+    // get the memory address of the next free process node space
+    new_pbm = &(procs->process_buffer[procs->curr_proc_index]);
+    procs->curr_proc_index++;
+    //pthread_mutex_unlock(&procs->lock);
+    return new_pbm;
+}
+
+void debug_print_procs(void) {
+    size_t i;
+    PBM p;
+    printk(KERN_WARNING "-----\nPROCS:\n");
+    for(i = 0; i <= procs->curr_proc_index; i++) {
+        p = procs->process_buffer[i];
+        printk(KERN_WARNING "root: %llx, last: %llx, children: %llx, next_sib: %llx\n", (uint64_t)p.root, (uint64_t)p.last, (uint64_t)p.children, (uint64_t)p.next_sib);
+    }
+    printk(KERN_WARNING "^^^^^\n");
+}
+
+/* -----------------------------------------------------------------------------
+* General function for management and creation of program behavior models (PBMs)
+*/
+
+void pbm_init(void) {
+    init_pbm_int_map();
+    init_task_buf();
+    init_proc_buf();
+}
+
+void pbm_uninit(void) {
+    uninit_pbm_int_map();
+    uninit_task_buf();
+    uninit_proc_buf();
+}
+
+/* Insert a task node into the PBM of given type (only COMP for now) */
+int pbm_task_start(PBM* pbm, uint8_t type, struct task_struct* proc) {
+    pbm_NODE* node;
+
 	unsigned long irq_flags;
-    struct perf_event_attr pe;
 	struct perf_event *pevent;
-	u64 cpu;
 
-	if (!is_initialized) {
-		init_task_pevent_map();
-		is_initialized = 1;
+	// only continue for COMP-nodes since other types are not implemented yet
+    if(!(pbm && type == COMP)) {
+        return 0;
 	}
 
-	memset(&pe, 0, sizeof(struct perf_event_attr));
-	pe.type = PERF_TYPE_HARDWARE;
-	pe.size = sizeof(struct perf_event_attr);
-	pe.config = PERF_COUNT_HW_INSTRUCTIONS;
-	pe.sample_period = 400800;
-	pe.disabled = 0;		// start the counter as soon as we're in userland
-	pe.pinned = 1;			// ?
-	pe.exclude_kernel = 1;
-	pe.exclude_hv = 1;
+	/*
+    * Append a new node to the task graph
+    */
+    node = _pbm_create_node(type, proc->pid);
+    if(!node) {
+        printk(KERN_WARNING "ERROR: Could not create node!\n");
+        return 0;
+    }
+    if(!pbm->root) {
+        pbm->root = node;
+	} else {
+        pbm->last->children = node; // append the new node
+	}
+	// finally the new node becomes the last inserted one
+    pbm->last = node;
 
-	// Not needed on 3.2?
-	pe.wakeup_events = 1;
+    /*
+    * Configure the performance counter
+    */
+    memset(&(pbm->pea), 0, sizeof(struct perf_event_attr));
+	pbm->pea.type = PERF_TYPE_HARDWARE;
+	pbm->pea.size = sizeof(struct perf_event_attr);
+	pbm->pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+	pbm->pea.sample_period = 400800;
+	pbm->pea.disabled = 0; // start the counter as soon as we're in userland
+	pbm->pea.pinned = 1;
+	pbm->pea.exclude_kernel = 1;
+	pbm->pea.exclude_hv = 1;
+	pbm->pea.wakeup_events = 1; // Not needed on 3.2?
 
-	cpu = smp_processor_id();
-	printk(KERN_WARNING "TASK: %u, CPU: %llu, PTR: %llu\n", p->pid, cpu, (u64)p);
+	printk(KERN_WARNING "TASK: %u, CPU: %i, PTR: %llu\n", proc->pid, smp_processor_id(), (u64)proc);
 
+    /*
+    * Try to enable the performance counter
+    */
 	// disable irqs to make 'perf_event_ctx_activate' in 'kernel/events/core.c' happy
 	local_irq_save(irq_flags);
 	pevent = perf_event_create_kernel_counter(
-        &pe,
+        &(pbm->pea),
         -1,   // measure on all cores (in case the process runs on different ones)
-        p,    // exclusively measure the forked process (BEWARE: a process can only measure itself!)
+        proc, // exclusively measure the forked process (BEWARE: a process can only measure itself!)
         NULL, //&overflow_handler,
         NULL
     );
 	local_irq_restore(irq_flags);
-
 	if (IS_ERR(pevent)) {
-			printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", p->pid, PTR_ERR(pevent));
+			printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", proc->pid, PTR_ERR(pevent));
+            // cast to prevent compiler warnings
+            if (-EOPNOTSUPP == (int64_t)pevent) {
+                printk(KERN_WARNING
+                    "TASK: %u | EOPNOTSUPP (-95): The hardware does not support certain attributes! "
+                    "E.g. perf_event_attr.precise_ip > 0 may not be supported.\n", proc->pid);
+            }
+            if (-EINVAL == (int64_t)pevent) {
+                printk(KERN_WARNING
+                    "TASK: %u | EINVAL (-22): Invalid argument!"
+                    "E.g. CPU with given index does not exist.\n", proc->pid);
+            }
             return 0;
 	}
     if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
-		printk(KERN_WARNING "TASK: %u | Event is inactive", p->pid);
+		printk(KERN_WARNING "TASK: %u | Event is inactive", proc->pid);
 	}
+	pbm->pevent = pevent;
 
-	add_proc_to_map(p->pid, pevent);
+    printk(KERN_WARNING "TASK: %u | Counting started...\n", proc->pid);
 
-    printk(KERN_WARNING "TASK: %u | Counting started...\n", p->pid);
-	return 1;
+    return 2;
 }
 
-/*
- *	handle the perf overflow event -> task needed more instructions than planed
- */
-void overflow_handler(
-		struct perf_event *event,
-		struct perf_sample_data *data,
-		struct pt_regs *regs)
-{
-	printk(KERN_WARNING "PB TASK RAN TOO LONG\n");
-	//perf_event_get
-	//->unlocked_ioctl(event, reset);
-}
-
-int stop_counting(void) {
+/* Conclude the last task of the given PBM */
+int pbm_task_end(PBM* pbm) {
 	unsigned long irq_flags;
     int read_error;
 	struct perf_event *pevent;
 	u64 perf_counter;
 
+    if (!pbm) {
+        printk(KERN_WARNING "Error: Could not end given task due to invalid PBM!\n");
+        return 0;
+    }
+
+    // record performance results
 	if (!is_initialized) {
 		printk(KERN_WARNING "TASK: %u | Pevent map not initialized!\n", current->pid);
 		return 0;
 	}
-
 	printk(KERN_WARNING "TASK: %u | Stopping counting...\n", current->pid);
-	pevent = get_pevent_by_pid(current->pid);
+	pevent = pbm->pevent;
 	if (!pevent) {
 		printk(KERN_WARNING "TASK: %u | ERROR: Could not find perf_event!\n", current->pid);
 		return 0;
@@ -163,12 +332,21 @@ int stop_counting(void) {
 		printk(KERN_WARNING "TASK: %u | PEVENT INVALID\n", current->pid);
 		return 0;
 	}
-
     read_error = perf_event_read_local(pevent, &perf_counter);
 	if (read_error) {
 		printk(KERN_WARNING "TASK: %u | FETCHING PERFORMANCE COUNTER IN stop_counting FAILED WITH %i\n", current->pid, read_error);
+            if (-EINVAL == (int64_t)read_error) {
+                // If this is a per-task event, it must be for current.
+                // If this is a per-CPU event, it must be for this CPU.
+                printk(KERN_WARNING
+                    "TASK: %u | EINVAL (-22): Invalid argument! "
+                    "E.g. trying to measure a different task than itself.\n", current->pid);
+            }
+	} else {
+		pbm->last->count = perf_counter;
 	}
 
+	// disable performance counter while preventing context switching
 	local_irq_save(irq_flags);
 	perf_event_disable(pevent);
 	perf_event_release_kernel(pevent);
@@ -176,5 +354,368 @@ int stop_counting(void) {
     pevent = NULL;
 
     printk(KERN_WARNING "TASK: %u | ...Counting stopped: %llu instr.\n", current->pid, perf_counter);
-	return 0;
+
+    return 0;
 }
+
+/**
+ * Why is the code concerning the forking separated into the two functions
+ * pbm_fork_parent_new_task() and pbm_fork() instead of simply putting it at the end of _do_fork?
+ *
+ * The separation is necessary since in the _do_fork a context switch from the parent to the
+ * child process takes place which is problematic since we want to end (and restart) perf-measuring
+ * the parent as well as the child process and the measurements (in pbm_task_end()) can only happen
+ * from the process itself. But in the beginning of _do_fork the child process does not exist yet.
+ * Therefore we have to split the code into the two functions to be able to measure the parent
+ * before the context switch as well as initialize the child-measuring after switching to the child.
+ */
+
+/* Stop previous task and start new task for the parent process and also reset the perf counter
+ * Returns a pointer to the fork-task-node which the forked process can use as a time information.
+ *
+ * BEWARE:
+ * Inside the _do_fork routine the context gets switched from the parent to the child process.
+ * This function must get called in the _do_fork() routine BEFORE (!) the child process starts to
+ * run (current == parent) otherwise the perf counting will fail!
+ */
+ pbm_NODE* pbm_fork_parent_new_task(struct task_struct* parent_proc) {
+    PBM* parent_pbm;
+    pbm_NODE* fork_node;
+
+    // end task of parent process
+	parent_pbm = get_pbm_by_pid(parent_proc->pid);
+	if(!parent_pbm) {
+		printk(KERN_WARNING "COULD NOT FIND PARENT-PBM!\n");
+		//TODO Since this will happen right at the first call to pbm_fork()
+		//TODO since no parent process has been initialized (since this is
+		//TODO the first relevant parent process)
+        return NULL;
+	}
+	pbm_task_end(parent_pbm);
+
+	/*
+    * Before starting the new task, append the fork-node to the task graph to maintain the correct order
+    */ 
+    fork_node = _pbm_create_node(FORK, parent_proc->pid);
+    if(!fork_node) {
+        printk(KERN_WARNING "COULD NOT CREATE NEW FORK NODE!\n");
+        return NULL;
+    }
+    if(!parent_pbm->root) {
+        parent_pbm->root = fork_node;
+    } else {
+        parent_pbm->last->children = fork_node; // append the new node
+    }
+    parent_pbm->last = fork_node; // the new node becomes the last inserted one
+
+    // start the new task
+	pbm_task_start(parent_pbm, COMP, parent_proc);
+
+    return fork_node;
+ }
+
+//TODO Consider the difference between calling it from the root-process (that has no registered parent process) and from "normal" child processes (that are registered in the maps)
+/* Insert a FORK node into the given PBM for up to 'num_thr' child threads
+ *
+ * BEWARE:
+ * Inside fork.c:_do_fork() the context gets switched from the parent to the child process.
+ * This function must get called in the _do_fork() routine AFTER (!) the child process starts to
+ * run (current == child) otherwise the perf counting will fail!
+ */
+int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_date) {
+	unsigned long irq_flags;
+
+	PBM* parent_pbm;
+    PBM* child_pbm;
+
+    /* NOTE:
+     * since the first time that _do_fork() is called the parent is "bash" which causes the
+     * pbm_fork_parent_new_task() to not be called but pbm_fork() since the child is "mpirun" so we
+     * put the initialization in here instead of into pbm_fork_parent_new_task().
+     */
+	// avoid context switching during initialization by disabling interrupts
+	local_irq_save(irq_flags);
+	if (!is_initialized) {
+		pbm_init();
+		is_initialized = 1;
+	}
+	local_irq_restore(irq_flags);
+
+	child_pbm = get_pbm_by_pid(proc->pid);
+	parent_pbm = get_pbm_by_pid(parent_pid);
+
+    printk(KERN_WARNING "FORK: %u from parent %u\n", proc->pid, parent_pid);
+
+    // check if the child already exists (if and only if the "forked" process
+    // is the process itself which happens in this case because OpenMP also
+    // uses the parent process for parallel calculations)
+    if (!child_pbm) {
+        // Create and initialize a new PBM for the child
+        {
+            child_pbm = proc_alloc();
+            if(!child_pbm) {
+                printk(KERN_WARNING "ERROR: Could not alloc child-PBM! %llx\n", (uint64_t)&child_pbm);
+                return 0;
+            }
+
+            // general configurations for perf_event interface
+            child_pbm->pea.size = sizeof(struct perf_event_attr);
+
+            child_pbm->root = NULL;
+            child_pbm->last = NULL;
+            child_pbm->children = NULL;
+            child_pbm->next_sib = NULL;
+            child_pbm->fork_date = NULL; // this gets updated later in the function // TODO Remove initialization here since it is done later?
+            child_pbm->exit_date = NULL;
+        }
+        if (!add_proc_to_map(proc->pid, child_pbm)) {
+            printk(KERN_WARNING "FORK ERROR: Could not add process to map: %u\n", proc->pid);
+            return 0;
+            // TODO Reverse previous allocation of child pbm?
+        } else {
+            printk(KERN_WARNING "Added process to map: %u\n", proc->pid);
+            debug_print_map(); //FIXME
+        }
+    } else {
+        printk(KERN_WARNING "Process already exists: %u\n", proc->pid);
+    }
+
+    // add child pbm to parents children
+	if(parent_pbm) { // checking this is only important in case of the root task which has no (recorded) parent
+		if(parent_pbm->children) {
+			// prepend the child to the list of children so we dont have to
+			// modify the sibbling (which works since we use a single-linked
+			// list)
+			child_pbm->next_sib = parent_pbm->children;
+		}
+		parent_pbm->children = child_pbm;
+	}
+
+    // We have to know WHEN the exit happens relative to the parent. So every child remembers the
+    // current fork-task-node of the parent on exit (so that the join can happen at the correct
+    // position (more or less, may be imperfect due to parallelism))
+    child_pbm->fork_date = fork_date;
+
+    // continue performance counting for child (restarting parent counting has already been started
+    pbm_task_start(child_pbm, COMP, proc);
+
+    return 1;
+}
+
+// This should get called by the child at sysexit()
+int pbm_exit(pid_t pid, pid_t parent_pid) {
+    PBM* pbm;
+    PBM* parent_pbm;
+
+    printk(KERN_WARNING "EXIT: %u\n", pid);
+    pbm = get_pbm_by_pid(pid);
+    if(!pbm) {
+        printk(KERN_WARNING "COULD NOT FIND PBM!\n");
+        debug_print_map();
+        return 0;
+    }
+    pbm_task_end(pbm);
+
+    parent_pbm = get_pbm_by_pid(parent_pid);
+    // set current parent task as the exit task of this child where the join
+    // gets inserted
+    if(parent_pbm) {
+        pbm->exit_date = parent_pbm->last;
+    }
+
+    return 1;
+}
+
+/* -----------------------------------------------------------------------------
+* PBM graph post-processing functions
+*/
+
+/* Insert a JOIN node into the given PBM and merge the forked child PBMs into this PBM */
+int pbm_join(PBM* child_pbm) {
+    pbm_NODE* fork_node;
+    pbm_NODE* join_node;
+    pid_t join_label;
+
+    if(!child_pbm)
+        return 0;
+
+    fork_node = child_pbm->fork_date;
+
+    // the child process is used to label the join operation to know which process the join belongs
+    // to since using the parent as the label would be ambiguous since more than one child could
+    // have been spawned by the same parent
+    join_label = child_pbm->last->thread_id;
+    join_node = _pbm_create_node(JOIN, join_label);
+    if(!join_node) {
+        printk(KERN_WARNING "ERROR: Could not create node!\n");
+        return 0;
+    }
+
+    // any fork-node has exactly two children because a fork creates only one
+    // copy of an existing process (1 (copy) + 1 (existing) = 2)
+    fork_node->count = 2;
+    join_node->count = 2;
+
+    // insert front of child task graph into parent task graph:
+    // prepend child-task-tree to list of child-nodes in the fork-node
+    {
+        // assumption: child_pbm->root->next_sib == NULL (since any fork-node
+        // has at most two children and only one before the join-operation)
+        child_pbm->root->next_sib = fork_node->children;
+        fork_node->children = child_pbm->root;
+    }
+
+    // insert back of child task graph with appended join-node into parent task
+    // graph
+    {
+        // assumption: child_pbm->last->children == NULL (since it should be
+        // the last task the child did before exit)
+        // append join node to child task-graph
+        child_pbm->last->children = join_node;
+        // insert join node directly after the exit-date-node in the parent pbm
+        join_node->children = child_pbm->exit_date->children;
+        child_pbm->exit_date->children = join_node->children;
+    }
+
+    return 1;
+}
+
+// recursively traverse all PBMs and insert the child task-graphs
+void pbm_post_processing(PBM* pbm) {
+    PBM* sib_pbm;
+    PBM* child_pbm = pbm->children;
+    if(child_pbm) {
+        pbm_post_processing(child_pbm);
+        pbm_join(child_pbm);
+        // TODO Remove from list of childs or just mark as visited?
+    }
+
+    sib_pbm = pbm->next_sib;
+    if(sib_pbm) {
+        pbm_post_processing(sib_pbm);
+        // TODO Remove from list of siblings or just mark as visited?
+    }
+}
+
+/* -----------------------------------------------------------------------------
+* PBM graph output functions
+*/
+
+void pbm_join_and_print_graph_self(pid_t pid) {
+    PBM* pbm;
+
+    debug_print_map();
+    debug_print_procs();
+    debug_print_tasks();
+    printk(KERN_WARNING "indices: %lu, %u, %u\n", index_2_pbm->last_proc_index, procs->curr_proc_index, tasks->curr_task_index);
+    pbm = get_pbm_by_pid(pid);
+    if (pbm) {
+        pbm_post_processing(pbm);
+        pbm_print_graph(pbm, pbm->root);
+    } else {
+        printk(KERN_WARNING "JOIN: PBM not found for: %u\n", pid);
+    }
+}
+
+/* Crude recursive ADG printer, starts with given node */
+void pbm_print_graph(PBM* pbm, pbm_NODE* node) {
+    pbm_NODE* root;
+    char types[5][5] = {"", "FORK", "JOIN", "COMP", "COMM"};
+
+    if(!node)
+        return;
+
+    if(node->visited)
+        return;
+
+    root = node;
+
+    printk(KERN_WARNING "Node %p: (%s, count = %llu), children:\n", node, types[node->type], node->count);
+
+    if(node->children)
+    {
+        node = node->children;
+        while(node)
+        {
+            printk(KERN_WARNING "  -- Node %p: (%s, count = %llu), next sibling: %p\n", node, types[node->type], node->count, node->next_sib);
+            node = node->next_sib;
+        }
+    }
+
+    if(root->children)
+        pbm_print_graph(pbm, root->children);
+
+    if(root->next_sib)
+        pbm_print_graph(pbm, root->next_sib);
+
+    root->visited = 1;
+    if(root == pbm->root)
+        _pbm_unvisit_node(pbm->root);
+}
+
+/* -----------------------------------------------------------------------------
+* Auxiliary functions, not for public use.
+*/
+
+pbm_NODE* _pbm_create_node(uint8_t type, pid_t pid) {
+    pbm_NODE* node = task_alloc();
+    if(!node)
+        return NULL;
+
+    node->thread_id = pid;
+    node->type = type;
+    node->count = 0;
+    node->children = NULL;
+    node->next_sib = NULL;
+    node->visited = 0;
+    return node;
+}
+
+// recursive
+void _pbm_unvisit_node(pbm_NODE* node) {
+    if(!node)
+       return;
+
+    if(!node->visited)
+        return;
+
+    if(node->children)
+        _pbm_unvisit_node(node->children);
+
+    if(node->next_sib)
+        _pbm_unvisit_node(node->next_sib);
+
+    node->visited = 0;
+}
+
+/******************************************************************************/
+
+/*
+ *	handle the perf overflow event -> task needed more instructions than planed
+ */
+/*
+static void overflow_handler(
+		struct perf_event *event,
+		struct perf_sample_data *data,
+		struct pt_regs *regs)
+{
+	printk(KERN_WARNING "PB TASK RAN TOO LONG\n");
+}
+*/
+
+int is_root_process(struct task_struct* p) {
+    return strcmp(p->comm, "mpirun") == 0;
+}
+
+int is_relevant_process(struct task_struct* p) {
+    struct task_struct* proc = p;
+    // check if mpirun is a parent, super-parent, ... until the root-parent ("swapper") is found
+    while (proc && !(strcmp(proc->comm, "swapper") == 0)) {
+        if (is_root_process(proc)) {
+            return 1;
+        }
+        //printk(KERN_WARNING "Searching relevant process: %s\n", proc->comm);
+        proc = proc->real_parent;
+    }
+    return 0;
+}
\ No newline at end of file
diff --git a/kernel/behave.h b/kernel/behave.h
index 4e83c96cfcce2d12280fcbf663620b557f3f1e8f..881970f662dedbf241261bb474334ec4deb5c1cd 100644
--- a/kernel/behave.h
+++ b/kernel/behave.h
@@ -3,12 +3,176 @@
 
 #include <linux/perf_event.h>
 
+/******************************************************************************
+* Based on "libpbm":
+*  Program Behaviour Model (PBM) as a Task Precedence Graph (TPG),
+*  implemented as a Acyclic Directed Graph (ADG) structure. Using
+*  Linux' perf_event interface for task performance measurement.
+*  Author: Michael Zent
+*  Context: Softwareproject 'Cluster Management', Lecturer: Barry Linnert, SS 2022 FU Berlin
+*/
+
+// node types
+#define FORK 1
+#define JOIN 2
+#define COMP 3 // computation
+#define COMM 4 // communication (not supported yet)
+
+/*
+* PBM node, describing a program task
+*/
+typedef struct _pbm_NODE
+{
+    uint8_t type;      // FORK, JOIN, or COMP (COMM not supported yet)
+    int32_t thread_id; // ID of the current thread within its thread group
+
+    /*
+    * Performance count value, interpretation depends on type
+    * FORK - Number of forked threads
+    * JOIN - Number of joined threads
+    * COMP - Number of instructions needed to complete the task
+    * COMM - Total length of all messages sent, in byte (not supported yet)
+    */
+    uint64_t count;
+
+    /*
+    * Inter-node connectors
+    * children - First child in a list of children. There should be >= 1 children
+    *            only if type == FORK, else a node has just one child.
+    * next_sib - Next sibling of a node. Should be != NULL only if the node is a
+    *            child of a FORK node, with exception of the last child.
+    */
+    struct _pbm_NODE* children; // first child (in a list of children)
+    struct _pbm_NODE* next_sib; // next sibling
+
+    // marker for graph traversion
+    uint8_t visited;
+} pbm_NODE;
+
+/*
+* Program Behavior Model (PBM)
+*/
+typedef struct _PBM
+{
+    pbm_NODE* root; // first task of a thread // TODO Rename to first or first_task?
+    pbm_NODE* last; // current last task
+
+    // the task nodes of the parent which are used as time markers/dates to
+    // know where the task graph must be inserted in the parent task-graph in
+    // the post-processing stage
+    pbm_NODE* fork_date; // fork task of the parent process
+    pbm_NODE* exit_date; // current/last task of the parent while this child exited
+
+    /*
+    * The Fork Buffer contains pointers to PBMs which describe the forked
+    * child threads. Should be != NULL with size > 0 only after forking.
+    */
+    struct _PBM* children; // first child (in a list of forked children)
+    struct _PBM* next_sib; // next sibling
+
+    /*
+    * Performance measurement and recording
+    */
+    struct perf_event_attr pea; // config info for the perf_event interface
+    struct perf_event* pevent;
+} PBM;
+
+/*
+* Creates a new PBM and initializes it.
+* Should be the first PBM-function called.
+*
+* Returns a pointer to that PBM, or NULL on error.
+*/
+PBM* pbm_create(void);
+
+/*
+* Deletes the given PBM and frees the associated resources.
+* Should only be called if pbm_create() was called prior, and
+* if no pbm_task_start() or pbm_fork() remained unclosed.
+*/
+void pbm_destroy(PBM* pbm);
+
+/*
+* Inserts into the given PBM a new node of given type (for now only
+* COMP, as COMM in not supported yet) and starts performance counting.
+* Should be called immediately before the task starts, and be closed
+* by pbm_task_end().
+*
+* Returns 0 on error, 1 on full success, or 2 if performance counting
+* could not start.
+*/
+int pbm_task_start(PBM* pbm, uint8_t type, struct task_struct* proc);
+
+/*
+* Ends performance counting for the last node of the given PBM and
+* records the results.
+* Should be called immediately after the task ends, and as the next
+* PBM-method after pbm_task_start().
+*
+* Returns 0 on failure, i.e. the performance counts could not be
+* recorded, otherwise returns 1.
+*/
+int pbm_task_end(PBM* pbm);
+
+ pbm_NODE* pbm_fork_parent_new_task(struct task_struct* parent_proc);
+
+/*
+* Inserts into the given PBM a FORK node which may have up to
+* 'num_thr' children, describing the forked child threads. If
+* num_thr == 0 the max number of threads is determined auto-
+* matically via OpenMP's omp_get_max_threads().
+* Should be called immediately before the thread is forked, and be
+* closed by pbm_join().
+*
+* Returns 0 on error, or 1 on success.
+*
+* The process as an explicit argument is needed since the fork is called by the parent.
+*/
+int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_date);
+
+int pbm_exit(pid_t pid, pid_t parent_pid);
+
+/*
+* Inserts into the given PBM a JOIN node. Records the actual number of
+* forked threads. Merges the sub-PBMs, describing the child threads, from
+* the fork buffer into the parent PBM and releases the fork-buffer.
+* Should be called immediately after the child threads are joined, and as
+* the next PBM-method after pbm_fork().
+*
+* Returns 0 on error, or 1 on success.
+*/
+int pbm_join(PBM* child_pbm);
+
+/*
+* Crude recursive ADG printer. Starts with the given node, which
+* should be the root node of the given PBM.
+*/
+void pbm_print_graph(PBM* pbm, pbm_NODE* node);
+
+void pbm_join_and_print_graph_self(pid_t pid);
+
+/*
+* Auxiliary methods, not for public usage. Nomen est omen.
+*/
+pbm_NODE* _pbm_create_node(uint8_t type, pid_t pid);
+void _pbm_destroy_node(pbm_NODE* node);
+void _pbm_unvisit_node(pbm_NODE* node);
+
+/*
+* Initialize the map necessary to locate the PBM by given PID
+* 
+* BEWARE: Forgetting to call this before calling fork() will trigger
+* Segmentation faults!
+*/
+void pbm_init(void);
+void pbm_uninit(void);
+
+/******************************************************************************/
+
+int is_root_process(struct task_struct* p);
+int is_relevant_process(struct task_struct *p);
+
 int start_counting(struct task_struct *p);
 int stop_counting(void);
 
-void overflow_handler(
-		struct perf_event *,
-		struct perf_sample_data *,
-		struct pt_regs *regs);
-
 #endif //PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
diff --git a/kernel/exit.c b/kernel/exit.c
index 97470434dad1b7dd2884c5bfe33a4d259590b21e..1d7f88f7aec2e1e55e042ce72a034f57a3d9a4e2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -768,11 +768,11 @@ void __noreturn do_exit(long code)
 	int group_dead;
 
 	// call the readout before the process is terminated
-	if (strcmp(tsk->real_parent->comm, "bash") == 0
-			|| strcmp(tsk->real_parent->real_parent->comm, "bash") == 0
-			|| strcmp(tsk->real_parent->real_parent->real_parent->comm, "bash") == 0) {
-		stop_counting();
-		printk(KERN_EMERG "EXIT: %u, CMD: '%s', PARENT-CMD: '%s', PTR: %llu\n", tsk->pid, tsk->comm, tsk->real_parent->comm, (u64)tsk);
+	if (is_relevant_process(tsk)) {
+		pbm_exit(tsk->pid, tsk->real_parent->pid);
+	}
+	if (is_root_process(tsk)) {
+		pbm_join_and_print_graph_self(tsk->pid);
 	}
 
 	TASKS_RCU(int tasks_rcu_i);
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c2aab1bbb6aa3e7201febd4f1fe03bf83a8f0af..b3ec7595bd1cc3df74a9fca6d8e205f92eff06fa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2016,7 +2016,16 @@ long _do_fork(unsigned long clone_flags,
 	int trace = 0;
 	long nr;
 
-    printk(KERN_EMERG "DO FORK CALLED by: %u\n", current->pid);
+	pbm_NODE* fork_date;
+	pid_t parent_pid = current->pid;
+	fork_date = NULL;
+
+	// FIXME: This will not get called for mpirun since then bash will be the parent here
+	if (is_relevant_process(current)) {
+		printk(KERN_EMERG "DO FORK CALLED by: '%s' %u\n", current->comm ,parent_pid);
+		fork_date = pbm_fork_parent_new_task(current);
+	}
+
 	/*
 	 * Determine whether and which event to report to ptracer.  When
 	 * called from kernel_thread or CLONE_UNTRACED is explicitly
@@ -2060,6 +2069,16 @@ long _do_fork(unsigned long clone_flags,
 			get_task_struct(p);
 		}
 
+		// this must be called before the new task wakes up to make sure that
+		// initialization of the perf event is done at that point
+		if (is_relevant_process(p)) {
+			// BEWARE: At this point p->comm is not yet up to date but shows the
+			// command of the parent!
+			printk(KERN_EMERG "FORKED!!!!: %u, Parent: %s, Super-Parent:%s\n",
+					p->pid, p->comm, p->real_parent->real_parent->comm);
+			pbm_fork(p, parent_pid, fork_date);
+		}
+
 		wake_up_new_task(p);
 
 		/* forking complete and child started to run, tell ptracer */
@@ -2072,15 +2091,6 @@ long _do_fork(unsigned long clone_flags,
 		}
 
 		put_pid(pid);
-        printk(KERN_EMERG "FORKED!!!!: %u, Parent: %s, Super-Parent:%s\n",
-				p->pid, p->comm, p->real_parent->real_parent->comm);
-		//FIXME At this point p->comm is not up to date but shows the command of the parent!
-		//      (This may not be a problem since the name of the forked processes are not needed?)
-		if (strcmp(p->comm, "bash") == 0
-				|| strcmp(p->real_parent->comm, "bash") == 0
-				|| strcmp(p->real_parent->real_parent->comm, "bash") == 0) {
-			start_counting(p);
-		}
 	} else {
 		nr = PTR_ERR(p);
 	}