Select Git revision
pb.c
fu5520tp authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
pb.c 11.70 KiB
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include "perf_error_detection.h"
#include <linux/syscalls.h>
#include <linux/spinlock.h>
#include <linux/perf_event.h>
#include <linux/kthread.h>
typedef struct pb_plan pb_plan_t;
SYSCALL_DEFINE1(pb_set_plan, pb_plan_t __user*, plan) {
pb_plan_t _plan;
struct task_struct* task;
struct rq* rq;
struct pb_rq* pb_rq;
size_t expected;
uint64_t* inst_cnt;
unsigned long copied;
unsigned int i;
int res;
copied = copy_from_user(&_plan, plan, sizeof(pb_plan_t));
if (copied != 0) {
return -1;
}
expected = _plan.num_tasks * sizeof(*_plan.inst_cnt);
inst_cnt = (uint64_t *)kzalloc(expected, GFP_KERNEL);
if (inst_cnt == NULL) {
return -1;
}
copied = copy_from_user(inst_cnt, _plan.inst_cnt, expected);
if (copied != 0) {
printk("copy from user inst_cnt failed \n");
return -1;
}
task = find_task_by_vpid(_plan.pid);
if (!task) {
return -1;
}
rq = this_rq();
task->sched_class = &pb_sched_class;
pb_rq = &rq->pb;
set_pb_plan_size(pb_rq, _plan.num_tasks);
for (i = 0; i < _plan.num_tasks; i++ ) {
set_pb_plan_entry(
pb_rq,
i,
inst_cnt[i],
i,
task
);
}
kfree(inst_cnt);
res = pb_submit_plan(rq);
if (res == -1) {
printk("pb_submit_plan == -1\n");
return res;
}
return 0;
}
/*
* It is possible submit a plan only if no plan is currently executed
*/
int pb_submit_plan(struct rq *rq)
{
/*
* Must be volatile to ensure correct initialization order
*/
volatile struct pb_rq * pb = (volatile struct pb_rq*)(&(rq->pb));
int perf_init_res;
int i = 0;
if (pb->mode != PB_DISABLED_MODE) {
return -1;
}
perf_init_res = init_perf_event(&pb->plan[i], &pb->pevent);
if(perf_init_res < 0) {
//initialization error detection/handling could happen here
printk(KERN_WARNING "PB INIT,%u: FAILED OPEN PERF EVENT\n", i);
} else {
printk(KERN_DEBUG "PB INIT,%u\n", i);
}
pb->c_entry = 0;
pb->count_pb_cycles = 0;
pb->count_admin_cycles = 0;
pb->total_instr = 0;
pb->is_initialized = 1; // must be initialized last
resched_curr(rq); // reschedule ASAP
return 0;
}
EXPORT_SYMBOL(pb_submit_plan);
/*
* Kelvin's Testcodes
*/
void set_pb_plan_size(struct pb_rq *pb_rq, unsigned int size)
{
pb_rq->size = size;
pb_rq->plan = kmalloc(sizeof(struct plan_entry) * size , GFP_KERNEL);
memset(pb_rq->plan, 0x0, sizeof(struct plan_entry) * size);
}
EXPORT_SYMBOL(set_pb_plan_size);
/*
* Kelvin's Testcode
*/
//insert into pb queue (analog to enqueue)
void set_pb_plan_entry(struct pb_rq *pb_rq, unsigned int i, u64 n_instr, u64 task_id, struct task_struct *task_struct)
{
pb_rq->plan[i].n_instr = n_instr;
pb_rq->plan[i].task_id = task_id;
pb_rq->plan[i].task_struct = task_struct;
}
EXPORT_SYMBOL(set_pb_plan_entry);
// called by core.c sched_init
void init_pb_rq(struct pb_rq *pb_rq)
{
pb_rq->n_pb_cycles = 100;
pb_rq->count_pb_cycles = 0;
pb_rq->n_admin_cycles = 20;
pb_rq->count_admin_cycles = 0;
pb_rq->mode = PB_DISABLED_MODE;
pb_rq->c_entry = 0;
pb_rq->size = 0;
pb_rq->pevent = NULL;
pb_rq->is_initialized = 0;
pb_rq->waiting_on_io = 0;
}
EXPORT_SYMBOL(init_pb_rq);
// IO has finished, we can schedule the next task
static void enqueue_task_pb(struct rq *rq, struct task_struct *p, int flags)
{
struct pb_rq *pb = &(rq->pb);
pb->waiting_on_io = 0;
}
// task started IO and thus it is finished
static void dequeue_task_pb(struct rq *rq, struct task_struct *p, int flags)
{
struct pb_rq *pb = &(rq->pb);
unsigned int c_entry_curr;
u64 perf_counter;
u64 counter_diff;
u64 read_error;
bool premature_finish = false;
if (pb->waiting_on_io) {
return;
}
pb->waiting_on_io = 1;
c_entry_curr = pb->c_entry;
if(!pb->pevent) {
printk("WARNING: PERF EVENT IS NULL");
}
// printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
read_error = get_perf_counter(pb->pevent, &perf_counter);
if (read_error) {
printk(KERN_WARNING "FETCHING PERFORMANCE COUNTER IN PB SCHEDULER FAILED WITH %llu\n", read_error);
}
counter_diff = perf_counter - pb->total_instr;
pb->plan[c_entry_curr].n_instr_counted = counter_diff;
pb->total_instr = perf_counter;
if (counter_diff < pb->plan[c_entry_curr].n_instr) {
u64 under_time = pb->plan[c_entry_curr].n_instr - counter_diff;
printk(KERN_WARNING "PB TASK %llu RAN %llu INSTRUCTIONS TOO SHORT\n", pb->plan[pb->c_entry].task_id, under_time);
} else if (counter_diff > pb->plan[c_entry_curr].n_instr) {
//TODO: Check if actually an overflow occurs and an another calculation is necessary
// (setting a flag in the perf overflow_handler could be a solution)
u64 over_time = counter_diff - pb->plan[c_entry_curr].n_instr;
printk(KERN_WARNING "PB TASK %llu RAN %llu INSTRUCTIONS TOO LONG\n", pb->plan[pb->c_entry].task_id, over_time);
}
pb->c_entry++;
/**
* Don't schedule a task that is dead. (e.g. plan was incorrect and program finished quicker)
* TODO: if we have multiple tasks structs try the next plan entry
*/
if (pb->c_entry < pb->size && pb->plan[pb->c_entry].task_struct->state == TASK_DEAD) {
premature_finish = true;
}
if (pb->c_entry >= pb->size || premature_finish) {
if (premature_finish) {
printk(KERN_WARNING "PLAN TERMINATED PREMATURELY \n");
}
else {
printk(KERN_WARNING "PLAN DONE \n");
}
// set back to cfs for completion of task
pb->is_initialized = 0;
pb->plan[0].task_struct->sched_class = &fair_sched_class;
resched_curr(rq);
}
}
static void yield_task_pb(struct rq *rq)
{
// NOP
}
static void check_preempt_curr_pb(struct rq *rq, struct task_struct *p, int flags)
{
// NOP
}
static struct task_struct * pick_next_task_pb(struct rq *rq,
struct task_struct *prev, struct rq_flags *rf)
{
// contains task to be executed
struct task_struct *picked = NULL;
enum pb_mode current_mode, next_mode;
struct pb_rq *pb = &(rq->pb);
current_mode = pb->mode;
next_mode = determine_next_mode_pb(rq);
pb->mode = next_mode;
if (next_mode == PB_DISABLED_MODE && current_mode == PB_EXEC_MODE) {
// After Plan is done do the cleanup
terminate_perf_event(pb->pevent);
pb->pevent = NULL;
// TODO: Check if we have to free the memory or if perf takes care of it
// see 'perf_event_release_kernel(struct perf_event *event)' in core.c
}
/**
* This handles the case where the program to be run is dead before the
* pb scheduler starts executing
*/
if (current_mode == PB_DISABLED_MODE && current_mode != next_mode) {
if (pb->c_entry < pb->size && pb->plan[pb->c_entry].task_struct->state == TASK_DEAD) {
pb->mode = PB_DISABLED_MODE;
next_mode = PB_DISABLED_MODE;
picked = NULL;
pb->is_initialized = 0;
printk(KERN_WARNING "PLAN TERMINATED PREMATURELY \n");
}
}
if (current_mode != next_mode) {
printk("SWITCHING MODES\n");
pb->count_admin_cycles = 0;
pb->count_pb_cycles = 0;
// Push last non-plan task back in its corresponding runqueue
if (next_mode == PB_EXEC_MODE) {
// Necessary to manage the preempted task
printk("PUT OLD TASK BACK IN RQ\n");
put_prev_task(rq, prev);
}
}
// EXEC Mode is next, so we return our next task to be executed
if (next_mode == PB_EXEC_MODE) {
// printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
if(current_mode == PB_ADMIN_MODE) {
printk(KERN_DEBUG "PB ADMIN,STOP,%u,%llu\n", pb->c_entry, sched_clock());
} else if (current_mode == PB_DISABLED_MODE) {
printk("Switching from disabled to EXEC\n");
}
picked = pb->plan[pb->c_entry].task_struct;
}
return picked;
}
static void put_prev_task_pb(struct rq *rq, struct task_struct *p)
{
// NOP
}
static void set_curr_task_pb(struct rq *rq)
{
// NOP
}
/*
* TODO: Make sure this does't interrupt determine_next_mode_pb() and pick_next_task_pb()
*/
static void task_tick_pb(struct rq *rq, struct task_struct *p, int queued)
{
struct pb_rq *pb = &(rq->pb);
if (pb->mode != PB_EXEC_MODE) {
return;
}
pb->count_pb_cycles++;
// printk("TICK #%d\n",pb->count_pb_cycles);
if (determine_next_mode_pb(rq) != PB_EXEC_MODE && pb->mode == PB_EXEC_MODE) {
//printk("Reschudling in task_tick_pb");
resched_curr(rq);
}
}
static unsigned int get_rr_interval_pb(struct rq *rq, struct task_struct *task)
{
return 0;
}
static void prio_changed_pb(struct rq *rq, struct task_struct *p, int oldprio)
{
// NOP
}
static void switched_to_pb(struct rq *rq, struct task_struct *p)
{
// NOP
}
static void update_curr_pb(struct rq *rq)
{
// NOP
}
const struct sched_class pb_sched_class = {
.next = &dl_sched_class,
.enqueue_task = enqueue_task_pb,
.dequeue_task = dequeue_task_pb,
.yield_task = yield_task_pb,
.check_preempt_curr = check_preempt_curr_pb, // NOP
.pick_next_task = pick_next_task_pb,
.put_prev_task = put_prev_task_pb, // NOP
.set_curr_task = set_curr_task_pb, // NOP
.task_tick = task_tick_pb,
.get_rr_interval = get_rr_interval_pb, // NOP (return 0)
.prio_changed = prio_changed_pb, // NOP
.switched_to = switched_to_pb, // NOP
.update_curr = update_curr_pb, // NOP
};
EXPORT_SYMBOL(pb_sched_class);
/***********************************************************************
* /proc filesystem entry
* use 'cat /proc/pbsched' to read
**********************************************************************/
static int show_pbsched(struct seq_file *seq, void *v)
{
int cpu;
if (v == (void *)1) {
seq_printf(seq, "cpuid mode curr_entry curr_pb_cycles curr_admin_cycles\n");
} else {
char mode;
struct rq *rq;
struct pb_rq *pb;
int i;
struct plan_entry *plan;
cpu = (unsigned long)(v - 2);
rq = cpu_rq(cpu);
pb = &(rq->pb);
switch(pb->mode) {
case PB_DISABLED_MODE: mode='D'; break;
case PB_EXEC_MODE: mode='E'; break;
case PB_ADMIN_MODE: mode='A'; break;
default: mode='U'; break;
}
/* runqueue-specific stats */
seq_printf(seq,
"cpu%d %c %u %llu %llu\n",
cpu,
mode,
pb->c_entry,
pb->count_pb_cycles,
pb->count_admin_cycles
);
/* plan stats */
if(pb->size){
seq_printf(seq, "\ntask_id n_instr n_instr_counted\n");
plan = pb->plan;
for (i=0; i < pb->size; i++){
// only print completed tasks, after completion is_initialized is 0 and we can print the last
if(i<pb->c_entry || !pb->is_initialized){
seq_printf(seq,
"%llu %llu %llu\n",
plan[i].task_id,
plan[i].n_instr,
plan[i].n_instr_counted
);
}else{
seq_printf(seq,
"%llu %llu queued\n",
plan[i].task_id,
plan[i].n_instr
);
}
}
}
}
return 0;
}
/*
* This itererator needs some explanation.
* It returns 1 for the header position.
* This means 2 is cpu 0.
* In a hotplugged system some cpus, including cpu 0, may be missing so we have
* to use cpumask_* to iterate over the cpus.
*/
static void *pbsched_start(struct seq_file *file, loff_t *offset)
{
unsigned long n = *offset;
if (n == 0)
return (void *) 1;
n--;
if (n > 0)
n = cpumask_next(n - 1, cpu_online_mask);
else
n = cpumask_first(cpu_online_mask);
*offset = n + 1;
if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2);
return NULL;
}
static void *pbsched_next(struct seq_file *file, void *data, loff_t *offset)
{
(*offset)++;
return pbsched_start(file, offset);
}
static void pbsched_stop(struct seq_file *file, void *data)
{
// NOP
}
static const struct seq_operations pbsched_sops = {
.start = pbsched_start,
.next = pbsched_next,
.stop = pbsched_stop,
.show = show_pbsched,
};
static int pbsched_open(struct inode *inode, struct file *file)
{
return seq_open(file, &pbsched_sops);
}
static const struct file_operations proc_pbsched_operations = {
.open = pbsched_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init proc_pbsched_init(void)
{
proc_create("pbsched", 0, NULL, &proc_pbsched_operations);
return 0;
}
subsys_initcall(proc_pbsched_init);