Skip to content
Snippets Groups Projects
Commit 91b0d99c authored by FKHals's avatar FKHals
Browse files

Move perf related code to perf_error_detection

so that it can be used by behave.c and pb.c and to reduce duplicated
code.
parent 2b85ac6d
No related branches found
No related tags found
No related merge requests found
......@@ -16,9 +16,6 @@ int is_initialized = 0;
* Based on "libpbm" (see header file for more info)
*/
#define TASK_BUF_SIZE 4096
#define PROC_BUF_SIZE 512
/* -----------------------------------------------------------------------------
* PID -> PBM* Hashmap
* Only insertion is needed currently (since memory efficiency is not our
......@@ -37,6 +34,15 @@ typedef struct {
shared_pbm_int_map _index_2_pbm;
shared_pbm_int_map* index_2_pbm = &_index_2_pbm;
/**
* Why not use a generic map data structure using void*, you ask?
*
* Because we do not only need _access_ to the data (PBM) but also need _memory-management_ for it.
* We can not use the generic kernel memory management (e.g. kalloc) since that might need certain
* tasks to be scheduled which in turn could lead to deadlocks.
* Therefore memory management/storage and access is implemented by this map in a simple and
* effective (but certainly inefficient) way.
*/
static void init_pbm_int_map(void) {
int i;
......@@ -259,9 +265,8 @@ void pbm_uninit(void) {
/* Insert a task node into the PBM of given type (only COMP for now) */
int pbm_task_start(PBM* pbm, uint8_t type, struct task_struct* proc) {
pbm_NODE* node;
unsigned long irq_flags;
struct perf_event *pevent;
int error;
// only continue for COMP-nodes since other types are not implemented yet
if(!(pbm && type == COMP)) {
......@@ -284,63 +289,19 @@ int pbm_task_start(PBM* pbm, uint8_t type, struct task_struct* proc) {
// finally the new node becomes the last inserted one
pbm->last = node;
/*
* Configure the performance counter
*/
memset(&(pbm->pea), 0, sizeof(struct perf_event_attr));
pbm->pea.type = PERF_TYPE_HARDWARE;
pbm->pea.size = sizeof(struct perf_event_attr);
pbm->pea.config = PERF_COUNT_HW_INSTRUCTIONS;
pbm->pea.sample_period = 400800;
pbm->pea.disabled = 0; // start the counter as soon as we're in userland
pbm->pea.pinned = 1;
pbm->pea.exclude_kernel = 1;
pbm->pea.exclude_hv = 1;
pbm->pea.wakeup_events = 1; // Not needed on 3.2?
printk(KERN_WARNING "TASK: %u, CPU: %i, PTR: %llu\n", proc->pid, smp_processor_id(), (u64)proc);
/*
* Try to enable the performance counter
*/
// disable irqs to make 'perf_event_ctx_activate' in 'kernel/events/core.c' happy
local_irq_save(irq_flags);
pevent = perf_event_create_kernel_counter(
&(pbm->pea),
-1, // measure on all cores (in case the process runs on different ones)
proc, // exclusively measure the forked process (BEWARE: a process can only measure itself!)
NULL, //&overflow_handler,
NULL
);
local_irq_restore(irq_flags);
if (IS_ERR(pevent)) {
printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", proc->pid, PTR_ERR(pevent));
// cast to prevent compiler warnings
if (-EOPNOTSUPP == (int64_t)pevent) {
printk(KERN_WARNING
"TASK: %u | EOPNOTSUPP (-95): The hardware does not support certain attributes! "
"E.g. perf_event_attr.precise_ip > 0 may not be supported.\n", proc->pid);
}
if (-EINVAL == (int64_t)pevent) {
printk(KERN_WARNING
"TASK: %u | EINVAL (-22): Invalid argument!"
"E.g. CPU with given index does not exist.\n", proc->pid);
}
return 0;
}
if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
printk(KERN_WARNING "TASK: %u | Event is inactive", proc->pid);
error = init_perf_event(proc, 400800, &pevent);
if (error) {
printk(KERN_WARNING "TASK: %u | Counting NOT started due to error\n", proc->pid);
return -1;
}
pbm->pevent = pevent;
pbm->pevent = pevent;
printk(KERN_WARNING "TASK: %u | Counting started...\n", proc->pid);
return 2;
return 0;
}
/* Conclude the last task of the given PBM */
int pbm_task_end(PBM* pbm) {
unsigned long irq_flags;
int read_error;
struct perf_event *pevent;
u64 perf_counter;
......@@ -365,26 +326,14 @@ int pbm_task_end(PBM* pbm) {
printk(KERN_WARNING "TASK: %u | PEVENT INVALID\n", current->pid);
return 0;
}
read_error = perf_event_read_local(pevent, &perf_counter);
read_error = get_perf_counter(pevent, &perf_counter);
if (read_error) {
printk(KERN_WARNING "TASK: %u | FETCHING PERFORMANCE COUNTER IN stop_counting FAILED WITH %i\n", current->pid, read_error);
if (-EINVAL == (int64_t)read_error) {
// If this is a per-task event, it must be for current.
// If this is a per-CPU event, it must be for this CPU.
printk(KERN_WARNING
"TASK: %u | EINVAL (-22): Invalid argument! "
"E.g. trying to measure a different task than itself.\n", current->pid);
printk(KERN_WARNING "TASK: %u | ...something went wront while stopping counting\n", current->pid);
return -1;
}
} else {
pbm->last->count = perf_counter;
}
// disable performance counter while preventing context switching
local_irq_save(irq_flags);
perf_event_disable(pevent);
perf_event_release_kernel(pevent);
local_irq_restore(irq_flags);
pevent = NULL;
terminate_perf_event(pevent);
printk(KERN_WARNING "TASK: %u | ...Counting stopped: %llu instr.\n", current->pid, perf_counter);
......@@ -496,9 +445,6 @@ int pbm_fork(struct task_struct* proc, pid_t parent_pid, pbm_NODE* fork_node) {
return 0;
}
// general configurations for perf_event interface
child_pbm->pea.size = sizeof(struct perf_event_attr);
child_pbm->root = NULL;
child_pbm->last = NULL;
child_pbm->children = NULL;
......@@ -738,19 +684,6 @@ void _pbm_unvisit_node(pbm_NODE* node) {
/******************************************************************************/
/*
* handle the perf overflow event -> task needed more instructions than planed
*/
/*
static void overflow_handler(
struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
printk(KERN_WARNING "PB TASK RAN TOO LONG\n");
}
*/
int is_root_process(struct task_struct* p) {
return strcmp(p->comm, "mpirun") == 0;
}
......
#ifndef PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
#define PLAN_BASED_LINUX_SCHEDULER_BEHAVE_H
#include <linux/perf_event.h>
#include "sched/perf_error_detection.h"
/******************************************************************************
* Based on "libpbm":
......@@ -73,7 +73,6 @@ typedef struct _PBM
/*
* Performance measurement and recording
*/
struct perf_event_attr pea; // config info for the perf_event interface
struct perf_event* pevent;
} PBM;
......
......@@ -5,6 +5,7 @@
#include <linux/spinlock.h>
#include <linux/perf_event.h>
#include <linux/kthread.h>
#include "sched.h"
typedef struct pb_plan pb_plan_t;
......@@ -93,7 +94,7 @@ int pb_submit_plan(struct rq *rq)
return -1;
}
perf_init_res = init_perf_event(&pb->plan[i], &pb->pevent);
perf_init_res = init_perf_event(pb->plan[i].task_struct, pb->plan[i].n_instr, &(pb->pevent));
if(perf_init_res < 0) {
//initialization error detection/handling could happen here
printk(KERN_WARNING "PB INIT,%u: FAILED OPEN PERF EVENT\n", i);
......
......@@ -12,28 +12,58 @@
/*
* initialize perf event for new task
*/
int init_perf_event(struct plan_entry *plan_entry, struct perf_event **pevent){
int init_perf_event(struct task_struct* proc, u64 num_instr,
struct perf_event** pevent)
{
unsigned long irq_flags;
struct perf_event_attr pe;
struct perf_event_attr pea; // config info for the perf_event interface
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.sample_period = plan_entry->n_instr;
pe.disabled = 0; // start the counter as soon as we're in userland
pe.exclude_kernel = 1; // only count user space
pe.exclude_hv = 1; // excluding events that happen in the hypervisor
/*
* Configure the performance counter
*/
memset(&pea, 0, sizeof(struct perf_event_attr));
pea.type = PERF_TYPE_HARDWARE;
pea.size = sizeof(struct perf_event_attr);
pea.config = PERF_COUNT_HW_INSTRUCTIONS;
pea.sample_period = num_instr;
pea.disabled = 0; // start the counter as soon as we're in userland
pea.exclude_kernel = 1; // only count user space
pea.exclude_hv = 1; // excluding events that happen in the hypervisor
printk(KERN_WARNING "TASK: %u, CPU: %i, PTR: %llu\n", proc->pid, smp_processor_id(), (u64)proc);
/*
* Try to enable the performance counter
*/
// disable irqs to make 'perf_event_ctx_activate' in 'kernel/events/core.c' happy
local_irq_save(irq_flags);
*pevent = perf_event_create(&pe, 0, plan_entry->task_struct);
*pevent = perf_event_create_kernel_counter(
&pea,
-1, // measure on all cores (in case the process runs on different ones)
proc, // exclusively measure the forked process (BEWARE: a process can only measure itself!)
NULL, // overflow_handler disabled, because we count within the scheduler
NULL
);
local_irq_restore(irq_flags);
if (IS_ERR(pevent)) {
printk(KERN_WARNING "PB ERROR INITIALISING PERF EVENT\n");
if (IS_ERR(*pevent)) {
printk(KERN_WARNING "TASK: %u | PB ERROR INITIALISING PERF EVENT: %li\n", proc->pid, PTR_ERR(pevent));
// cast to prevent compiler warnings
if (-EOPNOTSUPP == (int64_t)*pevent) {
printk(KERN_WARNING
"TASK: %u | EOPNOTSUPP (-95): The hardware does not support certain attributes! "
"E.g. perf_event_attr.precise_ip > 0 may not be supported.\n", proc->pid);
}
if (-EINVAL == (int64_t)*pevent) {
printk(KERN_WARNING
"TASK: %u | EINVAL (-22): Invalid argument!"
"E.g. CPU with given index does not exist.\n", proc->pid);
}
return -1;
}
if ((*pevent)->state != PERF_EVENT_STATE_ACTIVE) {
printk(KERN_WARNING "TASK: %u | Event is inactive", proc->pid);
}
return 0;
}
......@@ -44,7 +74,18 @@ int init_perf_event(struct plan_entry *plan_entry, struct perf_event **pevent){
*/
u64 get_perf_counter(struct perf_event *pevent, u64 *perf_counter)
{
return perf_event_read_local(pevent, perf_counter);
int read_error = perf_event_read_local(pevent, perf_counter);
if (read_error) {
printk(KERN_WARNING "TASK: %u | FETCHING PERFORMANCE COUNTER IN stop_counting FAILED WITH %i\n", current->pid, read_error);
if (-EINVAL == (int64_t)read_error) {
// If this is a per-task event, it must be for current.
// If this is a per-CPU event, it must be for this CPU.
printk(KERN_WARNING
"TASK: %u | EINVAL (-22): Invalid argument! "
"E.g. trying to measure a different task than itself.\n", current->pid);
}
}
return read_error;
}
u64 terminate_perf_event(struct perf_event *pevent)
......@@ -52,40 +93,12 @@ u64 terminate_perf_event(struct perf_event *pevent)
u64 result;
unsigned long irq_flags;
// disable performance counter while preventing context switching
local_irq_save(irq_flags);
perf_event_disable(pevent);
result = perf_event_release_kernel(pevent);
local_irq_restore(irq_flags);
pevent = NULL;
return result;
}
// /*
// * handle the perf overflow event -> task needed more instructions than planed
// */
// void overflow_handler(
// struct perf_event *event,
// struct perf_sample_data *data,
// struct pt_regs *regs)
// {
// struct pb_rq *pb_rq;
// int cpu;
// cpu = smp_processor_id();
// pb_rq = &cpu_rq(cpu)->pb;
// if(pb_rq->is_initialized)
// printk(KERN_WARNING "OH: PB TASK %llu RAN TOO LONG\n",pb_rq->plan[pb_rq->c_entry].task_id);
// else
// printk(KERN_WARNING "OH: PB TASK RAN TOO LONG\n");
// }
struct perf_event* perf_event_create(struct perf_event_attr *hw_event_uptr, int cpu, struct task_struct *task_struct)
{
return perf_event_create_kernel_counter(
hw_event_uptr,
cpu,
task_struct,
NULL, /* &overflow_handler disabled, because we count within the scheduler */
NULL);
}
......@@ -2,19 +2,24 @@
#define __PERF_ERROR_DETECTION_H
#include <linux/perf_event.h>
#include "sched.h"
int init_perf_event(struct plan_entry*, struct perf_event**);
#define TASK_BUF_SIZE 4096
#define PROC_BUF_SIZE 512
int init_perf_event(struct task_struct* proc, u64 num_instr, struct perf_event **pevent);
u64 get_perf_counter(struct perf_event *pevent, u64 *perf_counter);
u64 terminate_perf_event(struct perf_event *pevent);
void overflow_handler(
struct perf_event *,
struct perf_sample_data *,
struct pt_regs *regs);
struct perf_event *perf_event_create(struct perf_event_attr *hw_event_uptr, int cpu, struct task_struct *task_struct);
typedef struct {
// primitive int -> int and int -> pevent* hashmap combination
// the two arrays store the pid and corresponding PBM* at the same index
pid_t index_to_pid[PROC_BUF_SIZE];
struct perf_event* index_to_pevent[PROC_BUF_SIZE];
// index of currently last process in the arrays
size_t last_proc_index;
//pthread_mutex_t lock;
} pid_2_pevent_map;
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment