Skip to content
Snippets Groups Projects
Commit be4746ed authored by FKHals's avatar FKHals
Browse files

Fix pbm_join kernel panic caused by early parent exit

which exposed the following precondition for calling
pbm_join_and_print_graph_self(): ALL childs, grandchilds etc. of the
root process of a measured program must be exited since otherwise the
informations in the model to modify and print are not complete.
parent cc757cb4
Branches libpbm_kernel_fix
No related tags found
No related merge requests found
...@@ -390,10 +390,12 @@ int pbm_task_end(PBM* pbm) { ...@@ -390,10 +390,12 @@ int pbm_task_end(PBM* pbm) {
// end task of parent process // end task of parent process
parent_pbm = get_pbm_by_pid(parent_proc->pid); parent_pbm = get_pbm_by_pid(parent_proc->pid);
if(!parent_pbm) { if(!parent_pbm) {
printk(KERN_WARNING "COULD NOT FIND PARENT-PBM!\n"); // this should only happen right at the first call to pbm_fork() since at this point no
//TODO Since this will happen right at the first call to pbm_fork() // parent process has been initialized yet since this is the first relevant parent process
//TODO since no parent process has been initialized (since this is if (current->pid != root_proc) {
//TODO the first relevant parent process) printk(KERN_WARNING "ERROR: COULD NOT FIND PARENT-PBM!\n");
BUG();
}
return NULL; return NULL;
} }
pbm_task_end(parent_pbm); pbm_task_end(parent_pbm);
...@@ -488,7 +490,9 @@ int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_node) { ...@@ -488,7 +490,9 @@ int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_node) {
child_pbm->next_sib = parent_pbm->children; child_pbm->next_sib = parent_pbm->children;
} }
parent_pbm->children = child_pbm; parent_pbm->children = child_pbm;
} } else {
BUG_ON(current->pid != root_proc);
}
// We have to know WHEN the exit happens relative to the parent. So every child remembers the // We have to know WHEN the exit happens relative to the parent. So every child remembers the
// current fork-task-node of the parent on exit (so that the join can happen at the correct // current fork-task-node of the parent on exit (so that the join can happen at the correct
...@@ -531,7 +535,15 @@ int pbm_exit(pid_t pid, pid_t parent_pid) { ...@@ -531,7 +535,15 @@ int pbm_exit(pid_t pid, pid_t parent_pid) {
// set current parent task as the exit task of this child where the join // set current parent task as the exit task of this child where the join
// gets inserted // gets inserted
if(parent_pbm) { if(parent_pbm) {
if (parent_pbm->last == NULL) {
printk(KERN_WARNING "ERROR: ParentPBM->LAST is NULL! ppid: %u\n", parent_pid);
}
pbm->exit_date = parent_pbm->last; pbm->exit_date = parent_pbm->last;
} else {
if (current->pid != root_proc) {
printk(KERN_WARNING "ERROR: COULD NOT FIND PARENT! child: %u, parent: %u\n", pid, parent_pid);
BUG();
}
} }
return 1; return 1;
...@@ -550,6 +562,10 @@ int pbm_join(PBM* child_pbm) { ...@@ -550,6 +562,10 @@ int pbm_join(PBM* child_pbm) {
if(!child_pbm) if(!child_pbm)
return 0; return 0;
if (!child_pbm->exit_date) {
printk(KERN_WARNING "Exit date missing for pid %u. Has the process really been exited?\n", child_pbm->last->thread_id);
}
printk(KERN_WARNING "Joining thread %u with fork_node: %lli\n", child_pbm->last->thread_id, task_2_index(child_pbm->fork_date)); printk(KERN_WARNING "Joining thread %u with fork_node: %lli\n", child_pbm->last->thread_id, task_2_index(child_pbm->fork_date));
fork_node = child_pbm->fork_date; fork_node = child_pbm->fork_date;
...@@ -605,6 +621,14 @@ void pbm_post_processing(PBM* pbm) { ...@@ -605,6 +621,14 @@ void pbm_post_processing(PBM* pbm) {
* PBM graph output functions * PBM graph output functions
*/ */
/**
* BEWARE:
* This function must not be called before ALL childs, grandchilds etc. of the root process of a
* measured program are exited since otherwise the informations in the model are not complete and
* therefore this function will throw an error!
* In this case most probably pbm_join() will generate the error since the exit_date of the not yet
* exited process will not be set and also the perf counter values will probably be wrong.
*/
void pbm_join_and_print_graph_self(pid_t pid) { void pbm_join_and_print_graph_self(pid_t pid) {
PBM* pbm; PBM* pbm;
......
...@@ -771,6 +771,13 @@ void __noreturn do_exit(long code) ...@@ -771,6 +771,13 @@ void __noreturn do_exit(long code)
if (is_relevant_process(tsk)) { if (is_relevant_process(tsk)) {
pbm_exit(tsk->pid, tsk->real_parent->pid); pbm_exit(tsk->pid, tsk->real_parent->pid);
} }
/**
* BEWARE:
* It is necessary that the root process of a measured program does not exit before ALL its
* childs, grandchilds etc. are exited since otherwise the informations in the model are not
* complete and therefore pbm_join_and_print_graph_self() will throw an error!
*/
if (is_root_process(tsk)) { if (is_root_process(tsk)) {
pbm_join_and_print_graph_self(tsk->pid); pbm_join_and_print_graph_self(tsk->pid);
} }
......
...@@ -17,10 +17,11 @@ int main(void) ...@@ -17,10 +17,11 @@ int main(void)
// grandchild. // grandchild.
pid_t n2 = fork(); pid_t n2 = fork();
if (n1 > 0 && n2 > 0) { // barrier: make all fathers wait for all its child processes (since all processes use that line)
// barrier: let the father wait for all the child processes while ((wpid = wait(&status)) > 0) {};
while ((wpid = wait(&status)) > 0) {};
if (n1 > 0 && n2 > 0)
{
fprintf(stderr, "Parent %d of %d %d\n", getpid(), n1, n2); fprintf(stderr, "Parent %d of %d %d\n", getpid(), n1, n2);
// check if program runs && syscall to switch tasks // check if program runs && syscall to switch tasks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment