Skip to content
Snippets Groups Projects
Select Git revision
  • 16b874904c16be44e5406c7df7c39075fe6b9d13
  • pred_err_handling default protected
  • pred_err_handling_more_prints
  • pbm_no_preemption_fix_test_input
  • pbm_no_preemption_fix_test
  • libpbm_kernel_fix
  • libpbm_kernel
  • bugfix/setup
  • libpbm_kernel_fix_bak
  • pbm_no_preemption
  • pbm
  • testing
  • sose22results
  • sose22
  • master protected
  • err_detect
  • kelvin
17 results

pb.c

  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    pb.c 11.70 KiB
    #include <linux/seq_file.h>
    #include <linux/proc_fs.h>
    #include "perf_error_detection.h"
    #include <linux/syscalls.h>
    #include <linux/spinlock.h>
    #include <linux/perf_event.h>
    #include <linux/kthread.h>
    
    typedef struct pb_plan pb_plan_t;
    
    SYSCALL_DEFINE1(pb_set_plan, pb_plan_t __user*, plan) {
    	pb_plan_t _plan;
    	struct task_struct* task;
    	struct rq* rq;
    	struct pb_rq* pb_rq;
    	size_t expected;
    	uint64_t* inst_cnt;
    
    	unsigned long copied;
    	unsigned int i;
    	int res;
    	
    	copied = copy_from_user(&_plan, plan, sizeof(pb_plan_t));
    	
    	if (copied != 0) {
    		return -1;
    	}
    
    	expected = _plan.num_tasks * sizeof(*_plan.inst_cnt);
    
    	inst_cnt = (uint64_t *)kzalloc(expected, GFP_KERNEL);
    
    	if (inst_cnt == NULL) {
    		return -1;
    	}
    
    	copied = copy_from_user(inst_cnt, _plan.inst_cnt, expected);
    
    	if (copied != 0) {
    		printk("copy from user inst_cnt failed \n");
    		return -1;
    	}
    
    	task = find_task_by_vpid(_plan.pid);
    
    	if (!task) {
    		return -1;
    	}
    
    	rq = this_rq();
    
    	task->sched_class = &pb_sched_class;
    
    	pb_rq = &rq->pb;
    
    	set_pb_plan_size(pb_rq, _plan.num_tasks);
    
    	for (i = 0; i < _plan.num_tasks; i++ ) {
    		set_pb_plan_entry(
    			pb_rq,
    			i,
    			inst_cnt[i],
    			i,
    			task
    		);
    	}
    
    	kfree(inst_cnt);
    
    	res = pb_submit_plan(rq);
    
    	if (res == -1) {
    		printk("pb_submit_plan == -1\n");
    		return res;
    	}
    
    	return 0;
    }
    
    /*
     * It is possible submit a plan only if no plan is currently executed
     */
    int pb_submit_plan(struct rq *rq)
    {
    	/*
    	 * Must be volatile to ensure correct initialization order
    	 */
    	volatile struct pb_rq * pb = (volatile struct pb_rq*)(&(rq->pb));
    	int perf_init_res;
    	int i = 0;
    
    	if (pb->mode != PB_DISABLED_MODE) {
    		return -1;
    	}
    
    	perf_init_res = init_perf_event(&pb->plan[i], &pb->pevent);
    	if(perf_init_res < 0) {
    		//initialization error detection/handling could happen here
    		printk(KERN_WARNING "PB INIT,%u: FAILED OPEN PERF EVENT\n", i);
    	} else {
    		printk(KERN_DEBUG "PB INIT,%u\n", i);
    	}
    
    	pb->c_entry = 0;
    	pb->count_pb_cycles = 0;
    	pb->count_admin_cycles = 0;
    	pb->total_instr = 0;
    
    	pb->is_initialized = 1;	// must be initialized last
    
    	resched_curr(rq);	// reschedule ASAP
    
    	return 0;
    }
    EXPORT_SYMBOL(pb_submit_plan);
    /*
     * Kelvin's Testcodes
     */
    void set_pb_plan_size(struct pb_rq *pb_rq, unsigned int size)
    {
    	pb_rq->size = size;
    	pb_rq->plan = kmalloc(sizeof(struct plan_entry) * size , GFP_KERNEL);
    	memset(pb_rq->plan, 0x0, sizeof(struct plan_entry) * size);
    }
    EXPORT_SYMBOL(set_pb_plan_size);
    
    /*
     * Kelvin's Testcode
     */
    //insert into pb queue (analog to enqueue)
    void set_pb_plan_entry(struct pb_rq *pb_rq, unsigned int i, u64 n_instr, u64 task_id, struct task_struct *task_struct)
    {
    	pb_rq->plan[i].n_instr = n_instr;
    	pb_rq->plan[i].task_id = task_id;
    	pb_rq->plan[i].task_struct = task_struct;
    }
    EXPORT_SYMBOL(set_pb_plan_entry);
    
    // called by core.c sched_init
    void init_pb_rq(struct pb_rq *pb_rq)
    {
    	pb_rq->n_pb_cycles = 100;
    	pb_rq->count_pb_cycles = 0;
    	pb_rq->n_admin_cycles = 20;
    	pb_rq->count_admin_cycles = 0;
    	pb_rq->mode = PB_DISABLED_MODE;
    	pb_rq->c_entry = 0;
    	pb_rq->size = 0;
    	pb_rq->pevent = NULL;
    	pb_rq->is_initialized = 0;
    	pb_rq->waiting_on_io = 0;
    }
    EXPORT_SYMBOL(init_pb_rq);
    
    // IO has finished, we can schedule the next task
    static void enqueue_task_pb(struct rq *rq, struct task_struct *p, int flags)
    {
    	struct pb_rq *pb = &(rq->pb);
    
    	pb->waiting_on_io = 0;
    }
    
    // task started IO and thus it is finished
    static void dequeue_task_pb(struct rq *rq, struct task_struct *p, int flags)
    {
    	struct pb_rq *pb = &(rq->pb);
    	unsigned int c_entry_curr;
    	u64 perf_counter;
    	u64 counter_diff;
    	u64 read_error;
    	bool premature_finish = false;
    
    
    	if (pb->waiting_on_io) {
    		return;
    	}
    	pb->waiting_on_io = 1;
    	c_entry_curr = pb->c_entry;
    	
    	if(!pb->pevent) {
    		printk("WARNING: PERF EVENT IS NULL");
    	}
    
    	// printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
    	read_error = get_perf_counter(pb->pevent, &perf_counter);
    	if (read_error) {
    		printk(KERN_WARNING "FETCHING PERFORMANCE COUNTER IN PB SCHEDULER FAILED WITH %llu\n", read_error);
    	}
    	counter_diff = perf_counter - pb->total_instr;
    	pb->plan[c_entry_curr].n_instr_counted = counter_diff;
    	pb->total_instr = perf_counter;
    	if (counter_diff < pb->plan[c_entry_curr].n_instr) {
    		u64 under_time = pb->plan[c_entry_curr].n_instr - counter_diff;
    
    		printk(KERN_WARNING "PB TASK %llu RAN %llu INSTRUCTIONS TOO SHORT\n", pb->plan[pb->c_entry].task_id, under_time);
    	} else if (counter_diff > pb->plan[c_entry_curr].n_instr) {
    		//TODO: Check if actually an overflow occurs and an another calculation is necessary
    		// (setting a flag in the perf overflow_handler could be a solution)
    		u64 over_time = counter_diff - pb->plan[c_entry_curr].n_instr;
    
    		printk(KERN_WARNING "PB TASK %llu RAN %llu INSTRUCTIONS TOO LONG\n", pb->plan[pb->c_entry].task_id, over_time);
    	}
    
    
    	pb->c_entry++;
    
    	/**
    		* Don't schedule a task that is dead. (e.g. plan was incorrect and program finished quicker)
    		* TODO: if we have multiple tasks structs try the next plan entry
    		*/
    	if (pb->c_entry < pb->size && pb->plan[pb->c_entry].task_struct->state == TASK_DEAD) {
    		premature_finish = true;
    	}
    
    	if (pb->c_entry >= pb->size || premature_finish) {
    		if (premature_finish) {
    			printk(KERN_WARNING "PLAN TERMINATED PREMATURELY \n");
    		}
    		else {
    			printk(KERN_WARNING "PLAN DONE \n");
    		}
    
    		// set back to cfs for completion of task
    		pb->is_initialized = 0;
    		pb->plan[0].task_struct->sched_class = &fair_sched_class;
    		resched_curr(rq);
    	}
    }
    
    static void yield_task_pb(struct rq *rq)
    {
    	// NOP
    }
    
    static void check_preempt_curr_pb(struct rq *rq, struct task_struct *p, int flags)
    {
    	// NOP
    }
    
    static struct task_struct * pick_next_task_pb(struct rq *rq,
    		struct task_struct *prev, struct rq_flags *rf)
    {
    	// contains task to be executed
    	struct task_struct *picked = NULL;
    	enum pb_mode current_mode, next_mode;
    	struct pb_rq *pb = &(rq->pb);
    	
    	current_mode = pb->mode;
    	next_mode = determine_next_mode_pb(rq);
    	pb->mode = next_mode;
    
    	if (next_mode == PB_DISABLED_MODE && current_mode == PB_EXEC_MODE) {
    		// After Plan is done do the cleanup
    		terminate_perf_event(pb->pevent);
    		pb->pevent = NULL;
    		// TODO: Check if we have to free the memory or if perf takes care of it
    		// see 'perf_event_release_kernel(struct perf_event *event)' in core.c
    	}
    	/**
    	 * This handles the case where the program to be run is dead before the
    	 * pb scheduler starts executing
    	 */
    	if (current_mode == PB_DISABLED_MODE && current_mode != next_mode) {
    		if (pb->c_entry < pb->size && pb->plan[pb->c_entry].task_struct->state == TASK_DEAD) {
    			pb->mode = PB_DISABLED_MODE;
    			next_mode = PB_DISABLED_MODE;
    			picked = NULL;
    			pb->is_initialized = 0;
    			printk(KERN_WARNING "PLAN TERMINATED PREMATURELY \n");
    		}
    	}
    
    	if (current_mode != next_mode) {
    		printk("SWITCHING MODES\n");
    		pb->count_admin_cycles = 0;
    		pb->count_pb_cycles = 0;
    		// Push last non-plan task back in its corresponding runqueue
    		if (next_mode == PB_EXEC_MODE) {
    			// Necessary to manage the preempted task
    			printk("PUT OLD TASK BACK IN RQ\n");
    			put_prev_task(rq, prev);
    		}
    	}
    
    	// EXEC Mode is next, so we return our next task to be executed
    	if (next_mode == PB_EXEC_MODE) {
    		// printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
    		if(current_mode == PB_ADMIN_MODE) {
    			printk(KERN_DEBUG "PB ADMIN,STOP,%u,%llu\n", pb->c_entry, sched_clock());
    		} else if (current_mode == PB_DISABLED_MODE) {
    			printk("Switching from disabled to EXEC\n");
    		}
    		picked = pb->plan[pb->c_entry].task_struct;
    	}
    
    	return picked;
    }
    
    static void put_prev_task_pb(struct rq *rq, struct task_struct *p)
    {
    	// NOP
    }
    
    static void set_curr_task_pb(struct rq *rq)
    {
    	// NOP
    }
    
    /*
     * TODO: Make sure this does't interrupt determine_next_mode_pb() and pick_next_task_pb()
     */
    static void task_tick_pb(struct rq *rq, struct task_struct *p, int queued)
    {
    	struct pb_rq *pb = &(rq->pb);
    
    	if (pb->mode != PB_EXEC_MODE) {
    		return;
    	}
    
    	pb->count_pb_cycles++;
    
    	// printk("TICK #%d\n",pb->count_pb_cycles);
    
    	if (determine_next_mode_pb(rq) != PB_EXEC_MODE && pb->mode == PB_EXEC_MODE) {
    		//printk("Reschudling in task_tick_pb");
    		resched_curr(rq);
    	}
    }
    
    static unsigned int get_rr_interval_pb(struct rq *rq, struct task_struct *task)
    {
    	return 0;
    }
    
    static void prio_changed_pb(struct rq *rq, struct task_struct *p, int oldprio)
    {
    	// NOP
    }
    
    static void switched_to_pb(struct rq *rq, struct task_struct *p)
    {
    	// NOP
    }
    
    static void update_curr_pb(struct rq *rq)
    {
    	// NOP
    }
    
    const struct sched_class pb_sched_class = {
    	.next			= &dl_sched_class,
    	.enqueue_task		= enqueue_task_pb,
    	.dequeue_task		= dequeue_task_pb,
    	.yield_task		= yield_task_pb,
    
    	.check_preempt_curr	= check_preempt_curr_pb, // NOP
    
    	.pick_next_task		= pick_next_task_pb,
    	.put_prev_task		= put_prev_task_pb, // NOP
    
    	.set_curr_task          = set_curr_task_pb, // NOP
    	.task_tick		= task_tick_pb,
    
    	.get_rr_interval	= get_rr_interval_pb, // NOP (return 0)
    
    	.prio_changed		= prio_changed_pb, // NOP
    	.switched_to		= switched_to_pb, // NOP
    
    	.update_curr		= update_curr_pb, // NOP
    };
    EXPORT_SYMBOL(pb_sched_class);
    
    
    /***********************************************************************
     * /proc filesystem entry
     * use 'cat /proc/pbsched' to read
     **********************************************************************/
    
    static int show_pbsched(struct seq_file *seq, void *v)
    {
        int cpu;
    
    	if (v == (void *)1) {
    		seq_printf(seq, "cpuid mode curr_entry curr_pb_cycles curr_admin_cycles\n");
    	} else {
            char mode;
    		struct rq *rq;
    		struct pb_rq *pb;
    
    		int i;
    		struct plan_entry *plan;
    
    		cpu = (unsigned long)(v - 2);
    		rq = cpu_rq(cpu);
            pb = &(rq->pb);
    
            switch(pb->mode) {
    	        case PB_DISABLED_MODE: mode='D'; break;
    	        case PB_EXEC_MODE: mode='E'; break;
    	        case PB_ADMIN_MODE: mode='A'; break;
    	        default: mode='U'; break;
            }
    
    		/* runqueue-specific stats */
    		seq_printf(seq,
    		    "cpu%d %c %u %llu %llu\n",
    		    cpu,
    		    mode,
                pb->c_entry,
                pb->count_pb_cycles,
                pb->count_admin_cycles
            );
    
    		/* plan stats */
    		if(pb->size){
    			seq_printf(seq, "\ntask_id n_instr n_instr_counted\n");
    
    			plan = pb->plan;
    			for (i=0; i < pb->size; i++){
    				 // only print completed tasks, after completion is_initialized is 0 and we can print the last
    				if(i<pb->c_entry || !pb->is_initialized){
    					seq_printf(seq,
    						"%llu %llu %llu\n",
    						plan[i].task_id,
    						plan[i].n_instr,
    						plan[i].n_instr_counted
    					);
    				}else{
    					seq_printf(seq,
    						"%llu %llu queued\n",
    						plan[i].task_id,
    						plan[i].n_instr
    					);
    				}
    
    
    			}
    
    		}
    	}
    	return 0;
    }
    
    /*
     * This itererator needs some explanation.
     * It returns 1 for the header position.
     * This means 2 is cpu 0.
     * In a hotplugged system some cpus, including cpu 0, may be missing so we have
     * to use cpumask_* to iterate over the cpus.
     */
    static void *pbsched_start(struct seq_file *file, loff_t *offset)
    {
        unsigned long n = *offset;
    
    	if (n == 0)
    		return (void *) 1;
    
    	n--;
    
    	if (n > 0)
    		n = cpumask_next(n - 1, cpu_online_mask);
    	else
    		n = cpumask_first(cpu_online_mask);
    
    	*offset = n + 1;
    
    	if (n < nr_cpu_ids)
    		return (void *)(unsigned long)(n + 2);
    	return NULL;
    }
    
    static void *pbsched_next(struct seq_file *file, void *data, loff_t *offset)
    {
        (*offset)++;
    	return pbsched_start(file, offset);
    }
    
    static void pbsched_stop(struct seq_file *file, void *data)
    {
        // NOP
    }
    
    static const struct seq_operations pbsched_sops = {
    	.start = pbsched_start,
    	.next  = pbsched_next,
    	.stop  = pbsched_stop,
    	.show  = show_pbsched,
    };
    
    static int pbsched_open(struct inode *inode, struct file *file)
    {
    	return seq_open(file, &pbsched_sops);
    }
    
    static const struct file_operations proc_pbsched_operations = {
    	.open    = pbsched_open,
    	.read    = seq_read,
    	.llseek  = seq_lseek,
    	.release = seq_release,
    };
    
    static int __init proc_pbsched_init(void)
    {
    	proc_create("pbsched", 0, NULL, &proc_pbsched_operations);
    	return 0;
    }
    subsys_initcall(proc_pbsched_init);