Expand cleanup

486cc8f3 · alexander06 · df342fdf · 486cc8f3 · 486cc8f3 · 486cc8f3
Commit 486cc8f3 authored Apr 22, 2024 by alexander06
--- a/Makefile
+++ b/Makefile
+out = clean_data
+clean:
+	@$(RM) -rf ./$(out)/*
\ No newline at end of file
--- a/src/cleanup.ipynb
+++ b/src/cleanup.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleanup\n",
+    "This file is dedicated to data cleanup. See the corresponding markdown headers to get more information on the steps applied"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 425,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports and constants\n",
+    "import pandas, os, re, requests, json, math, numbers\n",
+    "from lib.scraping import get_course, get_complete_catalogue_for\n",
+    "import lib.constants as CONSTANTS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 426,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def normalize_type(type: str) -> str:\n",
+    "\tnormalized_identifiers = {\n",
+    "\t\t\"V\": \"VL\",\n",
+    "\t\t\"Vl\": \"VL\",\n",
+    "\t\t\"VL+Ü\": \"VL\",\n",
+    "\t\t\"VLeKo\": \"VL\",\n",
+    "\t\t\"SLeKo\": \"S\"\n",
+    "\t}\n",
+    "\t# Using get prevents KeyErrors by types that already are right\n",
+    "\treturn normalized_identifiers.get(type, type)\n",
+    "\n",
+    "def make_plural(category: str) -> str:\n",
+    "\tplural_list = {\n",
+    "\t\t\"Vorlesung\": \"Vorlesungen\",\n",
+    "\t\t\"Übung\": \"Übungen\",\n",
+    "\t\t\"Seminar\": \"Seminare\"\n",
+    "\t}\n",
+    "\n",
+    "\treturn plural_list.get(category, category)\n",
+    "\n",
+    "def request_questions():\n",
+    "\tres = requests.get(\"http://localhost:50000/v1/questions\")\n",
+    "\treturn json.loads(res.text)\n",
+    "\n",
+    "def choose(a,b):\n",
+    "\tif type(a) == str:\n",
+    "\t\treturn b if a == \"\" else a\n",
+    "\telif isinstance(a, numbers.Number):\n",
+    "\t\treturn b if math.isnan(a) else a\n",
+    "\telse:\n",
+    "\t\treturn b if a == None else a"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate Paths\n",
+    "Converts all files in the `raw_data` folder into pandas dataframes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 427,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths = []\n",
+    "\n",
+    "for (root, dirs, files) in os.walk(CONSTANTS.RAW_DATA_PATH):\n",
+    "\tif len(files) > 0 and 'daten.csv' in files:\n",
+    "\t\tpaths.append((root, pandas.read_csv(os.path.join(root, 'daten.csv'), sep=\";\", encoding=\"ISO-8859-1\")))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Remove unnecessary columns\n",
+    "Some columns (like text answers) are not needed for the pipeline and get excluded here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 428,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_remove = [\"language\", \"Zulassungsbeschr.\", \"Bearbeitungsstand\", \"zeit\", \"Ausfülldauer\", \"Ausfülldauer (s)\"]\n",
+    "multiples = [\"Was hat Ihnen an dieser Lehrveranstaltung gut gefallen?\", \"Was könnte der Dozent/die Dozentin an dieser Lehrveranstaltung verbessern?\"]\n",
+    "\n",
+    "# Easier handling for doubles columns\n",
+    "for i in range(1, 8):\n",
+    "\tfor j in multiples:\n",
+    "\t\tto_remove.append(j + \".\" + str(i))\n",
+    "to_remove.extend(multiples)\n",
+    "\n",
+    "for (path, df) in paths:\n",
+    "\tto_drop = [column for column in to_remove if column in df]\n",
+    "\tdf.drop(columns=to_drop, inplace=True, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Normalize Course Type\n",
+    "The pipeline expects four questionnaire types `VL` for lectures, `S` for seminars, `Ü` for tutorials and `LeKo` for teaching competence. The provided data has multiple possible notations. \n",
+    "- It is quite common to see `VL`, `Vl` or `V` for lectures. In small cases they are even denoted by `VL + Ü`. To unify these types, they will all be converted to `VL`. \n",
+    "- Teaching competence has multiple ways to be categorized. Older data may have `VLeKo` or `SLeKo` for lectures or seminars evaluated according to teaching competence. This will be normalized to `LeKo`. In this case the course type will be changed in the corresponding course type column, to preserve the original type of the course"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 429,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for (path, df) in paths:\n",
+    "\tdf[\"Veranstaltung-Typ\"] = df[\"Veranstaltung-Typ\"].map(normalize_type)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Normalize Course Category\n",
+    "The pipeline handles the three course types lectures (`Vorlesungen`), tutorials (`Übungen`) and seminars (`Seminare`). The provided date knows a few more types.\n",
+    "- Tutorials are mostly noted with the singular `Übung`. This gets translated into plural to keep it in line with the other types, which all are plural\n",
+    "- Data has a type called `Projekte / Praktika`, which don't have an own questionnaire type, so the name gets normalized to the questionnaire type it belongs to\n",
+    "### Problems:\n",
+    "- if a course has `LeKo` as a questionnaire and course type, it's impossible to handle. Possible Solution: VV Webscraper\n",
+    "- if a course has `Prak` as questionnaire type and `Projekte / Praktika` as course type, it can't be categorized. Possible Solution: Always count as seminar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 430,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "normalized_categories = {\n",
+    "\t\"VL\": \"Vorlesungen\",\n",
+    "\t\"Ü\": \"Übungen\",\n",
+    "\t\"S\": \"Seminare\"\n",
+    "}\n",
+    "for (path, df) in paths:\n",
+    "\t# Iterates over dataframe rows\n",
+    "\tfor i in df.index:\n",
+    "\t\tvalue = df.loc[i]\n",
+    "\n",
+    "\t\t# Normalized courses can be skipped\n",
+    "\t\tif value.loc[\"Veranstaltung-Kategorie\"] in [\"Vorlesungen\", \"Übungen\", \"Seminare\"]: continue\n",
+    "\n",
+    "\t\tevaluation_type = value.loc[\"Veranstaltung-Typ\"]\n",
+    "\t\t\n",
+    "\t\tif evaluation_type in normalized_categories:\n",
+    "\t\t\tdf.at[i, \"Veranstaltung-Kategorie\"] = normalized_categories[evaluation_type]\n",
+    "\t\telse:\n",
+    "\t\t\t# Pluralizing\n",
+    "\t\t\tdf.at[i, \"Veranstaltung-Kategorie\"] = make_plural(value.loc[\"Veranstaltung-Kategorie\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fix Course Numbers\n",
+    "Sometimes Course Numbers contain a space at the end. This hinders comparing them with the same number that does not contain a space. This part removes leading and trailing spaces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 431,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for (path, df) in paths:\n",
+    "\tdf[\"Veranstaltung-Nr.\"] = df[\"Veranstaltung-Nr.\"].map(lambda x: str(x).strip())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Merge questions\n",
+    "Since all questions of all questionnaires are put into a row, some questions are duplicated. But since questions are the header row, they need to be unique. To counteract this, pandas appends numbers to duplicates, thus turning them unique. However a duplicated question most likely will not be answered twice, thus enabling to merge duplicated questions into a single column, where text takes precedence above empty cells."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 432,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "end_regex = re.compile('\\.\\d+$')\n",
+    "\n",
+    "for (path, df) in paths:\n",
+    "\tfor column in df:\n",
+    "\t\t# If this line matches, a sufficient requirement for a duplicated question is met\n",
+    "\t\tif (match := end_regex.search(column)) != None:\n",
+    "\t\t\tclean_name = column[:-(len(match.group()))]\n",
+    "\t \t\t# Merges duplicated column into the original, deletes duplicate\n",
+    "\t\t\tdf[clean_name] = df[clean_name].combine(df[column], choose)\n",
+    "\t\t\tdf.drop(columns=[column], inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Replace Questions with IDs\n",
+    "Takes the IDs from the api and converts the questions into them. This allows to extract the ID programmatically"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 433,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "questions = request_questions()\n",
+    "not_indexable = []\n",
+    "for (path, df) in paths:\n",
+    "\n",
+    "\tfor column in df:\n",
+    "\t\t# Strip is needed, because some questions have a trailing whitespace\n",
+    "\t\t# TODO: Document the regex somewhere\n",
+    "\t\tprepared_string = re.sub(r\"(^\\.+)|(\\.+$)|(\\s?\\?$)|(^\\?)\", \"\", column.strip())\n",
+    "\t\t# In a few files a single question exists with two spaces, this unifies them to one\n",
+    "\t\tprepared_string = ' '.join(prepared_string.split())\n",
+    "\n",
+    "\t\tid = [item for item in questions if item[\"content\"].replace(\"Der/die Lehrende \", \"\") == prepared_string]\n",
+    "\t\tif len(id) > 0:\n",
+    "\t\t\tname = id[0][\"id\"]\n",
+    "\t\t\tdf.rename(columns={column: name}, inplace=True)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Split into course types\n",
+    "Splits data into the four questionnaire types and writes them separately into files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 434,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for (path, df) in paths:\n",
+    "\tinformation = path.split(\"/\")[-1]\n",
+    "\t(institute, semester) = information.split(\"_\")\n",
+    "\tinstitute_path = os.path.join(CONSTANTS.CLEAN_DATA_PATH, institute)\n",
+    "\tsemester_path = os.path.join(institute_path, semester)\n",
+    "\n",
+    "\tif not os.path.isdir(institute_path):\n",
+    "\t\tos.mkdir(institute_path)\n",
+    "\tif not os.path.isdir(semester_path):\n",
+    "\t\tos.mkdir(semester_path)\n",
+    "\n",
+    "\tquestionnaire_types = [\"VL\", \"Ü\", \"S\", \"LeKo\"]\n",
+    "\n",
+    "\t# Separate into types\n",
+    "\tfor q_type in questionnaire_types:\n",
+    "\t\tres = df[df[\"Veranstaltung-Typ\"] == q_type]\n",
+    "\t\tresult_path = os.path.join(semester_path, q_type + \".csv\")\n",
+    "\n",
+    "\t\tres.to_csv(result_path, encoding=\"utf-8\", index=False)\n",
+    "\t\n",
+    "\t# If Outliers exist\n",
+    "\trest = df[~df[\"Veranstaltung-Typ\"].isin(questionnaire_types)]\n",
+    "\n",
+    "\tif len(rest.index) > 0:\n",
+    "\t\trest.to_csv(os.path.join(semester_path, \"rest.csv\"), encoding=\"utf-8\", index=False)\n",
+    "\n",
+    "\t# If everything needs to be handled at once\n",
+    "\tdf.to_csv(os.path.join(semester_path, \"all.csv\"), encoding=\"utf-8\", index=False)\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:markdown id: tags:
+# Cleanup
+This file is dedicated to data cleanup. See the corresponding markdown headers to get more information on the steps applied
+%% Cell type:code id: tags:
+``` python
+# Imports and constants
+import pandas, os, re, requests, json, math, numbers
+from lib.scraping import get_course, get_complete_catalogue_for
+import lib.constants as CONSTANTS
+```
+%% Cell type:code id: tags:
+``` python
+def normalize_type(type: str) -> str:
+	normalized_identifiers = {
+		"V": "VL",
+		"Vl": "VL",
+		"VL+Ü": "VL",
+		"VLeKo": "VL",
+		"SLeKo": "S"
+	}
+	# Using get prevents KeyErrors by types that already are right
+	return normalized_identifiers.get(type, type)
+def make_plural(category: str) -> str:
+	plural_list = {
+		"Vorlesung": "Vorlesungen",
+		"Übung": "Übungen",
+		"Seminar": "Seminare"
+	}
+	return plural_list.get(category, category)
+def request_questions():
+	res = requests.get("http://localhost:50000/v1/questions")
+	return json.loads(res.text)
+def choose(a,b):
+	if type(a) == str:
+		return b if a == "" else a
+	elif isinstance(a, numbers.Number):
+		return b if math.isnan(a) else a
+	else:
+		return b if a == None else a
+```
+%% Cell type:markdown id: tags:
+## Generate Paths
+Converts all files in the `raw_data` folder into pandas dataframes
+%% Cell type:code id: tags:
+``` python
+paths = []
+for (root, dirs, files) in os.walk(CONSTANTS.RAW_DATA_PATH):
+	if len(files) > 0 and 'daten.csv' in files:
+		paths.append((root, pandas.read_csv(os.path.join(root, 'daten.csv'), sep=";", encoding="ISO-8859-1")))
+```
+%% Cell type:markdown id: tags:
+## Remove unnecessary columns
+Some columns (like text answers) are not needed for the pipeline and get excluded here.
+%% Cell type:code id: tags:
+``` python
+to_remove = ["language", "Zulassungsbeschr.", "Bearbeitungsstand", "zeit", "Ausfülldauer", "Ausfülldauer (s)"]
+multiples = ["Was hat Ihnen an dieser Lehrveranstaltung gut gefallen?", "Was könnte der Dozent/die Dozentin an dieser Lehrveranstaltung verbessern?"]
+# Easier handling for doubles columns
+for i in range(1, 8):
+	for j in multiples:
+		to_remove.append(j + "." + str(i))
+to_remove.extend(multiples)
+for (path, df) in paths:
+	to_drop = [column for column in to_remove if column in df]
+	df.drop(columns=to_drop, inplace=True, axis=1)
+```
+%% Cell type:markdown id: tags:
+# Normalize Course Type
+The pipeline expects four questionnaire types `VL` for lectures, `S` for seminars, `Ü` for tutorials and `LeKo` for teaching competence. The provided data has multiple possible notations.
+- It is quite common to see `VL`, `Vl` or `V` for lectures. In small cases they are even denoted by `VL + Ü`. To unify these types, they will all be converted to `VL`.
+- Teaching competence has multiple ways to be categorized. Older data may have `VLeKo` or `SLeKo` for lectures or seminars evaluated according to teaching competence. This will be normalized to `LeKo`. In this case the course type will be changed in the corresponding course type column, to preserve the original type of the course
+%% Cell type:code id: tags:
+``` python
+for (path, df) in paths:
+	df["Veranstaltung-Typ"] = df["Veranstaltung-Typ"].map(normalize_type)
+```
+%% Cell type:markdown id: tags:
+## Normalize Course Category
+The pipeline handles the three course types lectures (`Vorlesungen`), tutorials (`Übungen`) and seminars (`Seminare`). The provided date knows a few more types.
+- Tutorials are mostly noted with the singular `Übung`. This gets translated into plural to keep it in line with the other types, which all are plural
+- Data has a type called `Projekte / Praktika`, which don't have an own questionnaire type, so the name gets normalized to the questionnaire type it belongs to
+### Problems:
+- if a course has `LeKo` as a questionnaire and course type, it's impossible to handle. Possible Solution: VV Webscraper
+- if a course has `Prak` as questionnaire type and `Projekte / Praktika` as course type, it can't be categorized. Possible Solution: Always count as seminar
+%% Cell type:code id: tags:
+``` python
+normalized_categories = {
+	"VL": "Vorlesungen",
+	"Ü": "Übungen",
+	"S": "Seminare"
+}
+for (path, df) in paths:
+	# Iterates over dataframe rows
+	for i in df.index:
+		value = df.loc[i]
+		# Normalized courses can be skipped
+		if value.loc["Veranstaltung-Kategorie"] in ["Vorlesungen", "Übungen", "Seminare"]: continue
+		evaluation_type = value.loc["Veranstaltung-Typ"]
+		if evaluation_type in normalized_categories:
+			df.at[i, "Veranstaltung-Kategorie"] = normalized_categories[evaluation_type]
+		else:
+			# Pluralizing
+			df.at[i, "Veranstaltung-Kategorie"] = make_plural(value.loc["Veranstaltung-Kategorie"])
+```
+%% Cell type:markdown id: tags:
+## Fix Course Numbers
+Sometimes Course Numbers contain a space at the end. This hinders comparing them with the same number that does not contain a space. This part removes leading and trailing spaces
+%% Cell type:code id: tags:
+``` python
+for (path, df) in paths:
+	df["Veranstaltung-Nr."] = df["Veranstaltung-Nr."].map(lambda x: str(x).strip())
+```
+%% Cell type:markdown id: tags:
+## Merge questions
+Since all questions of all questionnaires are put into a row, some questions are duplicated. But since questions are the header row, they need to be unique. To counteract this, pandas appends numbers to duplicates, thus turning them unique. However a duplicated question most likely will not be answered twice, thus enabling to merge duplicated questions into a single column, where text takes precedence above empty cells.
+%% Cell type:code id: tags:
+``` python
+end_regex = re.compile('\.\d+$')
+for (path, df) in paths:
+	for column in df:
+		# If this line matches, a sufficient requirement for a duplicated question is met
+		if (match := end_regex.search(column)) != None:
+			clean_name = column[:-(len(match.group()))]
+	 		# Merges duplicated column into the original, deletes duplicate
+			df[clean_name] = df[clean_name].combine(df[column], choose)
+			df.drop(columns=[column], inplace=True)
+```
+%% Cell type:markdown id: tags:
+## Replace Questions with IDs
+Takes the IDs from the api and converts the questions into them. This allows to extract the ID programmatically
+%% Cell type:code id: tags:
+``` python
+questions = request_questions()
+not_indexable = []
+for (path, df) in paths:
+	for column in df:
+		# Strip is needed, because some questions have a trailing whitespace
+		# TODO: Document the regex somewhere
+		prepared_string = re.sub(r"(^\.+)|(\.+$)|(\s?\?$)|(^\?)", "", column.strip())
+		# In a few files a single question exists with two spaces, this unifies them to one
+		prepared_string = ' '.join(prepared_string.split())
+		id = [item for item in questions if item["content"].replace("Der/die Lehrende ", "") == prepared_string]
+		if len(id) > 0:
+			name = id[0]["id"]
+			df.rename(columns={column: name}, inplace=True)
+```
+%% Cell type:markdown id: tags:
+## Split into course types
+Splits data into the four questionnaire types and writes them separately into files
+%% Cell type:code id: tags:
+``` python
+for (path, df) in paths:
+	information = path.split("/")[-1]
+	(institute, semester) = information.split("_")
+	institute_path = os.path.join(CONSTANTS.CLEAN_DATA_PATH, institute)
+	semester_path = os.path.join(institute_path, semester)
+	if not os.path.isdir(institute_path):
+		os.mkdir(institute_path)
+	if not os.path.isdir(semester_path):
+		os.mkdir(semester_path)
+	questionnaire_types = ["VL", "Ü", "S", "LeKo"]
+	# Separate into types
+	for q_type in questionnaire_types:
+		res = df[df["Veranstaltung-Typ"] == q_type]
+		result_path = os.path.join(semester_path, q_type + ".csv")
+		res.to_csv(result_path, encoding="utf-8", index=False)
+	# If Outliers exist
+	rest = df[~df["Veranstaltung-Typ"].isin(questionnaire_types)]
+	if len(rest.index) > 0:
+		rest.to_csv(os.path.join(semester_path, "rest.csv"), encoding="utf-8", index=False)
+	# If everything needs to be handled at once
+	df.to_csv(os.path.join(semester_path, "all.csv"), encoding="utf-8", index=False)
+```
--- a/src/db/questions.json
+++ b/src/db/questions.json
@@ -99,7 +99,7 @@
 		["Das Thema der Lehrveranstaltung hat mich schon vorher interessiert", "scale_applies_alt"],
 		["Ich verfügte bereits vor dem Lehrveranstaltungsbesuch über umfangreiches Wissen zu den in der LV behandelten Themengebieten", "scale_applies_alt"],
 		["Ich verfügte bereits vor der Lehrveranstaltung über viel Erfahrung mit E-Learning", "scale_applies_alt"],
-		["Ich habe an mindestens zwei Drittel der Termine dieser Lehrveranstaltung teilgenommen bzw. die Inhalte Lerneinheiten bearbeitet", "y_n"],
+		["Ich habe an mindestens zwei Drittel der Termine dieser Lehrveranstaltung teilgenommen bzw. die Inhalte der Lerneinheiten bearbeitet", "y_n"],
 		["Der Stoffumfang, der in der Veranstaltung behandelt wird, ist für mich", "scale_height"],
 		["Der Schwierigkeitsgrad der Veranstaltung ist für mich", "scale_height"],
 		["Der/die Lehrende hat die gesamte Lehrveranstaltung gut strukturiert und nachvollziehbar gegliedert", "scale_applies_alt"],
@@ -112,10 +112,10 @@
 		["Der/die Lehrende fasst regelmäßig die wichtigsten Inhalte der LV zusammen", "scale_applies_alt"],
 		["Der/die Lehrende stellt zu Beginn eines Termins/einer Lerneinheit den Zusammenhang zu dem Termin/der Lerneinheit davor her", "scale_applies_alt"],
 		["Der/die Lehrende stellt immer wieder Bezüge zu bereits behandelten Lerninhalten her", "scale_applies_alt"],
-		["Der/die Lehrende stellt Fragen, die den Studierenden die Gelegenheit geben, zu überprüfen, ob sie den Inhalt verstanden haben", "scale_applies_alt"],
+		["Der/die Lehrende stellt Fragen, die den Studierenden die Gelegenheit geben zu überprüfen, ob sie den Inhalt verstanden haben", "scale_applies_alt"],
 		["Der/die Lehrende vergewissert sich, dass die Studierenden zentrale Aspekte verstanden haben, bevor er/sie im Stoff weitergeht", "scale_applies_alt"],
 		["Der/die Lehrende ist bei Fragen gut erreichbar", "scale_applies_alt"],
-		["Der/die Lehrende gibt mir ein konstruktives Feedback zu meinen Beitragen/Lösungen", "scale_applies_alt"],
+		["Der/die Lehrende gibt mir ein konstruktives Feedback zu meinen Beiträgen/Lösungen", "scale_applies_alt"],
 		["Der/die Lehrende gibt mir zu wenige Rückmeldungen zu meinen Beiträgen/Lösungen", "scale_applies_alt"],
 		["Der/die Lehrende gibt mir konkrete Hinweise zur Verbesserung meiner Leistungen", "scale_applies_alt"],
 		["Der/die Lehrende setzt Modelle, Graphiken oder Schemata so ein, dass sie das Verständnis komplexer Sachverhalte erleichtern", "scale_applies_alt"],
@@ -137,7 +137,7 @@
 		["Der/die Lehrende hat klare Verhaltensregeln für unsere Zusammenarbeit in dieser Lehrveranstaltung kommuniziert", "scale_applies_alt"],
 		["Der/die Lehrende achtet darauf, dass wir die aufgestellten Verhaltensregeln einhalten", "scale_applies_alt"],
 		["Der/die Lehrende bringt wichtige Inhalte gut auf den Punkt", "scale_applies_alt"],
-		["Der/die Lehrende nutzt die Verfügbare Zeit effektiv", "scale_applies_alt"],
+		["Der/die Lehrende nutzt die verfügbare Zeit effektiv", "scale_applies_alt"],
 		["In dieser Lehrveranstaltung habe ich viel dazugelernt", "scale_applies_alt"],
 		["Insgesamt bin ich mit dieser Lehrveranstaltung zufrieden", "scale_applies_alt"]
 	],

--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -172,10 +172,10 @@ INSERT INTO "question" ("id", "content", "possibilities", "dimension") VALUES
 ('fb98204a-e7c6-11ee-9404-00620b2c9060', 'Der Umgang mit rein digitalen Kursinhalten im Vergleich zu regelmäßigen wöchentlichen Präsenzveranstaltungen ist', 'scale_difficulty', NULL),
 ('fb984109-e7c6-11ee-9404-00620b2c9060', 'Ich war an mindestens zwei Drittel der Termine dieser Lehrveranstaltung anwesend', 'y_n', NULL),
 ('fb985e1a-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende trägt anregend und engagiert vor', 'scale_applies', 'B2'),
-('fb987b71-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende stellt Fragen, die den Studierenden die Gelegenheit geben, zu überprüfen, ob sie den Inhalt verstanden haben', 'scale_applies', 'A2'),
+('fb987b71-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende stellt Fragen, die den Studierenden die Gelegenheit geben zu überprüfen, ob sie den Inhalt verstanden haben', 'scale_applies', 'A2'),
 ('fb989861-e7c6-11ee-9404-00620b2c9060', 'Ich verfügte bereits vor dem Veranstaltungsbesuch über umfangreiches Wissen zu den in der Lehrveranstaltung behandelten Themengebieten', 'scale_applies', NULL),
 ('fb98b420-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende stellt zu Beginn einer Sitzung den Zusammenhang zur letzen Sitzung her', 'scale_applies', 'A1'),
-('fb98d60e-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende gibt mir ein konstruktives Feedback zu meinen Beitragen/Lösungen', 'scale_applies', 'A2'),
+('fb98d60e-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende gibt mir ein konstruktives Feedback zu meinen Beiträgen/Lösungen', 'scale_applies', 'A2'),
 ('fb98f563-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende stellt immer wieder Bezüge zu bereits behandelten Lerninhalten her', 'scale_applies', 'A1'),
 ('fb9915da-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende fesselt die Studierenden durch eine anregende und engagierte Vortragsweise', 'scale_applies', 'B2'),
 ('fb993295-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende setzt Modelle, Graphiken oder Schemata so ein, dass sie das Verständnis komplexer Sachverhalte erleichtern', 'scale_applies', 'A1'),
@@ -202,8 +202,8 @@ INSERT INTO "question" ("id", "content", "possibilities", "dimension") VALUES
 ('fb9bd3e1-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende ist in der Lage, die Studierenden für die in der Lehrveranstaltung behandelten Inhalte zu interessieren', 'scale_applies', 'B2'),
 ('fb9bf803-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende gibt den Studierenden zu wenige Rückmeldungen zu ihren Beiträgen/Antworten', 'scale_applies', 'A2'),
 ('fb9c1da7-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende ist bei Fragen gut erreichbar', 'scale_applies', 'A2'),
-('fb9c4149-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende nutzt die Verfügbare Zeit effektiv', 'scale_applies', 'C2'),
+('fb9c4149-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende nutzt die verfügbare Zeit effektiv', 'scale_applies', 'C2'),
-('fb9c6523-e7c6-11ee-9404-00620b2c9060', 'Ich habe an mindestens zwei Drittel der Termine dieser Lehrveranstaltung teilgenommen bzw. die Inhalte Lerneinheiten bearbeitet', 'y_n', NULL),
+('fb9c6523-e7c6-11ee-9404-00620b2c9060', 'Ich habe an mindestens zwei Drittel der Termine dieser Lehrveranstaltung teilgenommen bzw. die Inhalte der Lerneinheiten bearbeitet', 'y_n', NULL),
 ('fb9c8f5a-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende verdeutlicht den Anwendungsbezug der Lerninhalte', 'scale_applies', 'B2'),
 ('fb9cb6cb-e7c6-11ee-9404-00620b2c9060', 'Der/die Lehrende regt die Studierenden dazu an, die Richtigkeit ihrer Beitrage/Antworten selbst zu überprüfen', 'scale_applies', 'B2'),
 ('fb9cdee4-e7c6-11ee-9404-00620b2c9060', 'Der Schwierigkeitsgrad der Veranstaltung ist für mich', 'scale_height', 'A1'),

--- a/src/lib/contants.py
+++ b/src/lib/contants.py
@@ -8,4 +8,8 @@ MATHEMATICS_VALUES = os.path.join(RAW_DATA_PATH, "Mathematik")
 CLEAN_DATA_PATH = os.path.normpath(os.path.join(os.getcwd(), "..", "clean_data"))
 OUT_PATH = os.path.normpath(os.path.join(os.getcwd(), '..', "outputs"))
-INSTITUTES = ["cs", "math", "bio"]
+INSTITUTE_MAP = {
\ No newline at end of file
+	"Bioinformatik": "bio",
+	"Mathematik": "math",
+	"Informatik": "cs"
+}
\ No newline at end of file
--- a/src/lib/scraping.py
+++ b/src/lib/scraping.py
 import requests, re
 from bs4 import BeautifulSoup
+from typing import Union
 # In the VV the Ids for the Institutes are fixed every semester
 INSTITUTE_IDS = {
@@ -134,6 +135,7 @@ def append_dict(dictionary, key, value) -> dict:
 	res = dictionary
 	res[key] = value
 	return res
 def get_catalogue_for(semester: str) -> dict:
 	cs = get_complete_catalogue_for(semester, "cs")
 	cs = {k: append_dict(v, "institute", "cs") for k, v in cs.items()}
@@ -147,3 +149,21 @@ def get_catalogue_for(semester: str) -> dict:
 	merge = math | bio | cs
 	return merge
+def get_course(id: str) -> Union[dict, None]:
+	search_url = "https://www.fu-berlin.de/vv/de/search?utf8=✓&query={0}".format(id)
+	res = requests.get(search_url)
+	soup = BeautifulSoup(res.text, 'html.parser')
+	main_content = soup.select('#main_content .well')[0]
+	name = main_content.select("h1")[0].text
+	if name == "Erweiterte Suche": return None
+	ret = {
+		"id": id,
+		"name": main_content.select("h1")[0].text,
+		"type": main_content.select(".label_container span")[0].text.strip()
+	}
+	return ret
\ No newline at end of file
--- a/src/pipeline.ipynb
+++ b/src/pipeline.ipynb
@@ -2,14 +2,14 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 155,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports and constants\n",
    "import os, pandas\n",
    "from lib.scraping import get_catalogue_for\n",
-    "import lib.contants as CONSTANTS\n",
+    "import lib.constants as CONSTANTS\n",
    "\n",
    "# Add semester here in the form of \"Sommersemester YYYY\" or \"Wintersemester YYYY/YY\"\n",
    "# e.g.: Sommersemester 2023 or Wintersemester 2022/23\n",
@@ -23,7 +23,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -63,7 +63,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 157,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -84,7 +84,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 158,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -154,7 +154,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 159,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -174,7 +174,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 160,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -190,7 +190,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 161,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -206,7 +206,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 162,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -222,7 +222,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 163,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -255,7 +255,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 164,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -275,7 +275,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 165,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -307,7 +307,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 166,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -339,37 +339,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 167,
+   "execution_count": 27,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "71"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "77"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "159"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
   "source": [
    "map = {\n",
    "\t\"Bioinformatik\": \"bio\",\n",
@@ -396,7 +368,7 @@
    "for k, v in type_map.items():\n",
    "\n",
    "\tregistered = all_registered[all_registered[\"Kategorie\"] == v]\n",
-    "\tdisplay(len(registered))\n",
+    "\n",
    "\tcollegial_num = collegial[collegial[\"Kategorie\"] == v]\n",
    "\tless_five = less_five_registered[less_five_registered[\"Kategorie\"] == v]\n",
    "\t# First key\n",

 %% Cell type:code id: tags:
 ``` python
 # Imports and constants
 import os, pandas
 from lib.scraping import get_catalogue_for
-import lib.contants as CONSTANTS
+import lib.constants as CONSTANTS
 # Add semester here in the form of "Sommersemester YYYY" or "Wintersemester YYYY/YY"
 # e.g.: Sommersemester 2023 or Wintersemester 2022/23
 SEMESTER = "Sommersemester 2023"
 # Generates Semester name based on set constant
 SEMESTER_NAME = SEMESTER[0:2] + "Se"
 split = SEMESTER.split(" ")[1].split("/")
 SEMESTER_NAME += '-'.join(split)
 ```
 %% Cell type:code id: tags:
 ``` python
 # Helper Functions
 def norm_name(name: str) -> str:
 	if ', ' not in name: return name
 	split = name.split(", ")
 	return split[1] + " " + split[0]
 def get_percentage_for_institute(all: pandas.DataFrame, min_five: pandas.DataFrame, institute: str) -> float:
 	return (len(min_five[min_five["institute"] == institute]) / len(all[all["institute"] == institute])) * 100
 def get_percentages_for(all: pandas.DataFrame, min_five: pandas.DataFrame, category: str) -> list:
 	category_five = min_five[min_five["Veranstaltung-Kategorie"] == category]
 	category_all = all[all["Veranstaltung-Kategorie"] == category]
 	percentage_all = (len(category_five) / len(category_all)) * 100
 	percentage_cs = get_percentage_for_institute(category_all, category_five, "cs")
 	percentage_bio = get_percentage_for_institute(category_all, category_five, "bio")
 	percentage_math = get_percentage_for_institute(category_all, category_five, "math")
 	return [round(percentage_all,2), round(percentage_bio, 2), round(percentage_cs, 2), round(percentage_math, 2)]
 def get_median_for(df: pandas.DataFrame, category: str, institute: str) -> float:
 	min_five_category = df[df["Veranstaltung-Kategorie"] == category]
 	return min_five_category[min_five_category["institute"] == institute]["size"].median()
 ```
 %% Cell type:markdown id: tags:
 ### Get registered courses
 This section generates the data that corresponds to the first column of the result matrix
 %% Cell type:code id: tags:
 ``` python
 all_registered = pandas.read_csv(os.path.join(CONSTANTS.RAW_DATA_PATH, "participation", (SEMESTER_NAME + '.csv')), sep=";")
 # Names are written Surname, Name but the other files have Name Surname, this normalizes that
 names = all_registered["Person-Name"]
 all_registered["Person-Name"] = names.map(norm_name)
 ```
 %% Cell type:markdown id: tags:
 ### Retrieve data
 Uses the semester to retrieve the corresponding data
 %% Cell type:code id: tags:
 ``` python
 # Build path
 cs_path = os.path.join(CONSTANTS.COMPUTER_SCIENCE_VALUES, ("Informatik_" + SEMESTER_NAME), "daten.csv")
 bio_path = os.path.join(CONSTANTS.BIOINFORMATICS_VALUES, ("Bioinformatik_" + SEMESTER_NAME), "daten.csv")
 math_path = os.path.join(CONSTANTS.MATHEMATICS_VALUES, ("Mathematik_" + SEMESTER_NAME), "daten.csv")
 # Catch
 assert os.path.exists(cs_path) & os.path.exists(bio_path) & os.path.exists(math_path), "Semester doesn't exist for every institute"
 # Read data and append institute
 cs = pandas.read_csv(cs_path, sep=";", encoding="ISO-8859-1")
 bio = pandas.read_csv(bio_path, sep=";", encoding="ISO-8859-1")
 math = pandas.read_csv(math_path, sep=";", encoding="ISO-8859-1")
 cs = cs.assign(institute="cs")
 bio = bio.assign(institute="bio")
 math = math.assign(institute="math")
 # Ignoring index prevents multiple courses having the same index
 all = pandas.concat([bio,cs,math], ignore_index=True)
 #TODO: Remove this when it's in cleanup
 numbers = all["Veranstaltung-Nr."]
 all["Veranstaltung-Nr."] = numbers.map(lambda x: str(x).strip())
 # TODO: export to cleanup
 # Replaces Keys with values in Dataframe
 to_clean = {
 	"Vorlesung": "Vorlesungen",
 	"Übung": "Übungen",
 	"Seminar": "Seminare",
 	"V": "Vorlesungen",
 	"Ü": "Übungen",
 	"S": "Seminare",
 }
 to_replace = all["Veranstaltung-Kategorie"]
 all["Veranstaltung-Kategorie"] = to_replace.map(lambda x: to_clean.get(x, x))
 outliers = all[all["Veranstaltung-Kategorie"] == "Projekte / Praktika"]
 for i in all.index:
 	value = all.loc[i]
 	evaluation_type = value.loc["Veranstaltung-Typ"]
 	if value.loc["Veranstaltung-Kategorie"] in ["Vorlesungen", "Übungen", "Seminare"]: continue
 	if(evaluation_type == "Ü"):
 		all.at[i, 'Veranstaltung-Kategorie'] = "Übungen"
 	elif(evaluation_type == "V" or evaluation_type == "Vl" or evaluation_type == "VL"):
 		all.at[i, "Veranstaltung-Kategorie"] = "Vorlesungen"
 	elif (evaluation_type == "S"):
 		all.at[i, "Veranstaltung-Kategorie"] = "Seminare"
 ```
 %% Cell type:markdown id: tags:
 ### Group evaluations by course
 Grouping by these four columns is the only way to group truly unique, because
 - tutorials and lectures of the same course have the same course number
 - two people could hold the same lecture, but they are still counted as different lectures, grouping by the name of the person ensures a split
 - grouping by course type splits tutorials, lectures and seminars
 - considering the course name ensures that two individual tutorials held by the same person in a course can be identified
 %% Cell type:code id: tags:
 ``` python
 # Institute here is only used to not lose the relation
 grouped = all.groupby(["Veranstaltung-Nr.", "Person-Name", "Veranstaltung-Kategorie", "Veranstaltung-Name", "institute"])
 # reset_index() merges both header lines into one, so size doesn't float alone
 all_size = grouped.size().to_frame('size').reset_index()
 ```
 %% Cell type:markdown id: tags:
 ### Get collegial
 %% Cell type:code id: tags:
 ``` python
 collegial = all_registered[all_registered["Anzahl Lehrende"] > 1]
 ```
 %% Cell type:markdown id: tags:
 ### Courses with less than 5 registered students
 %% Cell type:code id: tags:
 ``` python
 less_five_registered = all_registered[all_registered["Anzahl Teilnehmende"] < 5]
 ```
 %% Cell type:markdown id: tags:
 ### Courses with at least five evaluations
 %% Cell type:code id: tags:
 ``` python
 min_five_eval = all_size[all_size["size"] > 4]
 ```
 %% Cell type:markdown id: tags:
 ### % of evaluated courses
 %% Cell type:code id: tags:
 ``` python
 vl = get_percentages_for(all_size, min_five_eval, "Vorlesungen")
 ueb = get_percentages_for(all_size, min_five_eval, "Übungen")
 sem = get_percentages_for(all_size, min_five_eval, "Seminare")
 percantages_eval = {
 	"Vorlesung": {
 		"all": vl[0],
 		"Bioinformatik": vl[1],
 		"Informatik": vl[2],
 		"Mathematik": vl[3]
 	},
 	"Seminar, SWP": {
 		"all": sem[0],
 		"Bioinformatik": sem[1],
 		"Informatik": sem[2],
 		"Mathematik": sem[3]
 	},
 	"Übung/Tutorium": {
 		"all": ueb[0],
 		"Bioinformatik": ueb[1],
 		"Informatik": ueb[2],
 		"Mathematik":ueb[3]
 	},
 	"Gesamt": len(min_five_eval) / len(all_size) * 100
 }
 ```
 %% Cell type:code id: tags:
 ``` python
 join = min_five_eval.merge(all_registered[['Veranstaltung-Nr.', 'Anzahl Teilnehmende', 'Person-Name', 'Veranstaltung-Name']], on=["Veranstaltung-Nr.", "Person-Name", "Veranstaltung-Name"], how="left")
 bio = join[(join["institute"] == "bio")]
 cs = join[(join["institute"] == "cs")]
 math = join[(join["institute"] == "math")]
 # Median differs, because the evaluation report calculates it wrong
 ```
 %% Cell type:markdown id: tags:
 ### Median number of students per Course
 %% Cell type:code id: tags:
 ``` python
 median_stud = {
 	"Vorlesung": {
 		"Bioinformatik": (bio[bio["Veranstaltung-Kategorie"] == "Vorlesungen"])["Anzahl Teilnehmende"].median(),
 		"Informatik": (cs[cs["Veranstaltung-Kategorie"] == "Vorlesungen"])["Anzahl Teilnehmende"].median(),
 		"Mathematik": (math[math["Veranstaltung-Kategorie"] == "Vorlesungen"])["Anzahl Teilnehmende"].median()
 	},
 	"Seminar, SWP": {
 		"Bioinformatik": (bio[bio["Veranstaltung-Kategorie"] == "Seminare"])["Anzahl Teilnehmende"].median(),
 		"Informatik": (cs[cs["Veranstaltung-Kategorie"] == "Seminare"])["Anzahl Teilnehmende"].median(),
 		"Mathematik": (math[math["Veranstaltung-Kategorie"] == "Seminare"])["Anzahl Teilnehmende"].median()
 	},
 	"Übung/Tutorium": {
 		"Bioinformatik": (bio[bio["Veranstaltung-Kategorie"] == "Übungen"])["Anzahl Teilnehmende"].median(),
 		"Informatik": (cs[cs["Veranstaltung-Kategorie"] == "Übungen"])["Anzahl Teilnehmende"].median(),
 		"Mathematik": (math[math["Veranstaltung-Kategorie"] == "Übungen"])["Anzahl Teilnehmende"].median()
 	}
 }
 ```
 %% Cell type:markdown id: tags:
 ### Median filled out questionnaire of courses
 %% Cell type:code id: tags:
 ``` python
 median_questionnaire = {
 	"Vorlesung": {
 		"Bioinformatik": get_median_for(min_five_eval, "Vorlesungen", "bio"),
 		"Informatik": get_median_for(min_five_eval, "Vorlesungen", "cs"),
 		"Mathematik": get_median_for(min_five_eval, "Vorlesungen", "math")
 	},
 	"Seminar, SWP": {
 		"Bioinformatik": get_median_for(min_five_eval, "Seminare", "bio"),
 		"Informatik": get_median_for(min_five_eval, "Seminare", "cs"),
 		"Mathematik": get_median_for(min_five_eval, "Seminare", "math")
 	},
 	"Übung/Tutorium": {
 		"Bioinformatik": get_median_for(min_five_eval, "Übungen", "bio"),
 		"Informatik": get_median_for(min_five_eval, "Übungen", "cs"),
 		"Mathematik": get_median_for(min_five_eval, "Übungen", "math")
 	}
 }
 ```
 %% Cell type:markdown id: tags:
 ### Result generation
 %% Cell type:code id: tags:
 ``` python
 map = {
 	"Bioinformatik": "bio",
 	"Informatik": "cs",
 	"Mathematik": "math"
 }
 institutes = ["Bioinformatik", "Informatik", "Mathematik"]
 type_map = {
 	"Vorlesung": "Vorlesungen",
 	"Seminar, SWP": "Seminare",
 	"Übung/Tutorium": "Übungen"
 }
 schema =  {
 	"Vorlesung": institutes,
 	"Seminar, SWP": institutes,
 	"Übung/Tutorium": institutes
 }
 header = ["Veranstaltungstyp", "Angemeldete LVen dieses Typs", "Kollegial gehaltene LVen", "LVen <5 Anmeldungen", "Bewertete LVen", "Davon LVen mit mind. 5 Bewertungen", "Evaluierte LVen* in %", "Anzahl angemeldeter Studierender je evaluierter LV* (Median)", "Anzahl ausgefüllter Fragebögen in evaluierten LVen* (Median)"]
 result_matrix = []
 for k, v in type_map.items():
 	registered = all_registered[all_registered["Kategorie"] == v]
-	display(len(registered))
 	collegial_num = collegial[collegial["Kategorie"] == v]
 	less_five = less_five_registered[less_five_registered["Kategorie"] == v]
 	# First key
 	result_matrix.append([k,len(registered), len(collegial_num), len(less_five),
 										len(all_size[all_size["Veranstaltung-Kategorie"] == v]),
 										len(min_five_eval[min_five_eval["Veranstaltung-Kategorie"] == v]),
 										percantages_eval[k]["all"],
 										'-', '-'])
 	for institute in institutes:
 		short = map[institute]
 		inst_size = all_size[(all_size["Veranstaltung-Kategorie"] == v) & (all_size["institute"] == short)]
 		inst_five = min_five_eval[(min_five_eval["Veranstaltung-Kategorie"] == v) & (min_five_eval["institute"] == short)]
 		inst_reg = registered[registered["Bereich"] == institute]
 		col = collegial_num[collegial_num["Bereich"] == institute]
 		l_five = less_five[less_five["Bereich"] == institute]
 		result_matrix.append([institute, len(inst_reg), len(col),
 												len(l_five), len(inst_size), len(inst_five),
 												percantages_eval[k][institute], median_stud[k][institute],
 												median_questionnaire[k][institute]])
 together = ["Gesamt"] + [result_matrix[0][i] + result_matrix[4][i] + result_matrix[8][i] for i in range(1,6)]
 together.append(round(together[5] / together[4] * 100))
 together += ['-'] * 2
 result_matrix.append(together)
 res = pandas.DataFrame(data=result_matrix, columns=header)
 out_file = os.path.join(CONSTANTS.OUT_PATH, SEMESTER_NAME + ".md")
 res.to_markdown(out_file, index=False)
 ```
-%% Output

--- a/tests/scraping/get_from_id/test_function_get_from_id.py
+++ b/tests/scraping/get_from_id/test_function_get_from_id.py
+import os, sys
+from schema import Schema, Regex
+# Makes the lib folder visible
+path = os.path.abspath(__file__)
+steps = 4
+for i in range(steps):
+	path = os.path.dirname(path)
+sys.path.append(path)
+import src.lib.scraping as scraping
+def test_valid_id():
+	id = "19201401"
+	result_schema = Schema({
+		'id': str,
+		'name': str,
+		'type': str
+	})
+	res = scraping.get_course(id)
+	assert result_schema.is_valid(res)
+def test_invalid_id():
+	res = scraping.get_course("0000000")
+	assert res == None
\ No newline at end of file