diff --git a/.gitignore b/.gitignore index fb3b2f4fce5c8e3cc99010a6b393a5a86fc3a782..1f5a397680368c58834a90432c01b42d72d2044d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,12 @@ raw_data/participation/* **/node_modules/ -.env \ No newline at end of file +.env + +# Pyenv +/lib +/etc +/bin +/include +pyvenv.cfg +/share \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..e3b445b9fa9aaa0653fb1381754653f05230172d --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,29 @@ +stages: + - lint + +.install_make: + before_script: + - apt-get update && apt-get install make + +lint_jupyter: + stage: lint + extends: .install_make + image: python:3.9 + script: + - make install_python + - make jupyter_lint +lint_python: + stage: lint + extends: .install_make + image: python:3.9 + script: + - ls ./src + - make install_python + - make python_lint +lint_javascript: + stage: lint + extends: .install_make + image: node:lts + script: + - make install_javascript + - make javascript_lint \ No newline at end of file diff --git a/Makefile b/Makefile index 0ee469108d4d00429d5f07d1919031fa752a87bb..8535f33592bf10312e9f439352c4ef088d8003c1 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,45 @@ out = clean_data +# Cleaning clean: @$(RM) -rf ./$(out)/* +# Installing +install_javascript: + cd ./src/db/api && npm i + +install_python: + pip install -r requirements.txt + +install: install_javascript install_python + +# Doing stuff streamlit: streamlit run src/dashboard/dashboard.py docker_rebuild: - cd ./src/db && docker compose up -d --build \ No newline at end of file + cd ./src/db && docker compose up -d --build + +# Linting +jupyter_lint: + pynblint . + +# E501 is line too long +python_lint: + flake8 --extend-ignore=E501 ./src + +javascript_lint: + cd ./src/db/api && npx prettier -c ./ + +lint: jupyter_lint python_lint javascript_lint + +# Code +jupyter: + jupyter lab ./src + +run_pipeline_full: + make install && make docker_rebuild && jupyter execute ./src/cleanup.ipynb ./src/old-report.ipynb ./src/pipeline.ipynb && make streamlit + +# Expects that install and docker rebuild has already been called +run_pipeline: + jupyter execute ./src/cleanup.ipynb ./src/old-report.ipynb ./src/pipeline.ipynb && make streamlit \ No newline at end of file diff --git a/README.md b/README.md index fb4e61a26522fe6d0d802421bc8c614d45d1f264..72977cb8afd98addc8c987f223b1ed5f8403e1c3 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,9 @@ # Evaluation Report Generator -This project corresponds to the bachelor's thesis "Creating a Pipeline for Reproducible Evaluation Report Generation" written at the Human-Centered Computing research group. -First Examiner: Prof. Dr. Claudia Müller-Birn -Second Examiner: TBA - -## Description +This project corresponds to the bachelors' thesis "Creating a Pipeline for Reproducible Evaluation Report Generation" written at the Human-Centered Computing research group. -This repository contains two pipelines `pipeline.ipynb` and `new_pipeline.ipynb` which generate evaluation reports. `pipeline.ipynb` automates the current evaluation report, while `new_pipeline.ipynb` uses suggestions from different people across the department for Mathematics, Computer Science and Bioinformatics to improve the evaluation report. -To work the pipeline needs data in the `raw_data/` folder in the following structure: +First Examiner: Prof. Dr. Claudia Müller-Birn -```shell -├── Bioinformatik -│ └── Bioinformatik_SoSe2019 -│ └── daten.csv -├── Informatik -│ └── Informatik_SoSe2019 -│ └── daten.csv -├── Mathematik -│ └── Mathematik_SoSe2019 -│ └── daten.csv -└── participation - └── SoSe2019.csv -``` +Second Examiner: Barry Linnert -`daten.csv` comes from an export in [Zensus](https://www.zedat.fu-berlin.de/Benutzerservice/UniZensus). The `data.example.csv` shows the schema the data needs to have. To be able to work the pipeline also needs data for all three institutes per semester. Folders with semester names should always look like this: `[Institute]_[WiSeYYYY/YY|SoSeYYYY]`. +You can find documentation in [the Wiki](https://git.imp.fu-berlin.de/alexander06/evaluation-report-generator/-/wikis/home) \ No newline at end of file diff --git a/outputs/SoSe2021.md b/outputs/SoSe2021.md index 679442ae2acf6f53881f540e05ef8a32ad4cdabe..4e411e94c7e6a0e7c7a801cd071bd5a8e17e5613 100644 --- a/outputs/SoSe2021.md +++ b/outputs/SoSe2021.md @@ -1,8 +1,8 @@ | Veranstaltungstyp | Angemeldete LVen dieses Typs | Kollegial gehaltene LVen | LVen <5 Anmeldungen | Bewertete LVen | Davon LVen mit mind. 5 Bewertungen | Evaluierte LVen* in % | Anzahl angemeldeter Studierender je evaluierter LV* (Median) | Anzahl ausgefüllter Fragebögen in evaluierten LVen* (Median) | |:--------------------|-------------------------------:|---------------------------:|----------------------:|-----------------:|-------------------------------------:|------------------------:|:---------------------------------------------------------------|:---------------------------------------------------------------| -| Vorlesung | 68 | 0 | 8 | 60 | 34 | 56.67 | - | - | +| Vorlesung | 68 | 0 | 8 | 61 | 34 | 55.74 | - | - | | Bioinformatik | 9 | 0 | 8 | 7 | 1 | 14.29 | 3.0 | 11.0 | -| Informatik | 26 | 0 | 0 | 21 | 17 | 80.95 | 148.5 | 12.0 | +| Informatik | 26 | 0 | 0 | 22 | 17 | 77.27 | 148.5 | 12.0 | | Mathematik | 33 | 0 | 0 | 32 | 16 | 50 | nan | 13.5 | | Seminar, SWP | 85 | 0 | 3 | 64 | 14 | 21.88 | - | - | | Bioinformatik | 17 | 0 | 0 | 9 | 5 | 55.56 | 35.0 | 6.0 | @@ -12,4 +12,4 @@ | Bioinformatik | 2 | 0 | 1 | 2 | 1 | 50 | 11.0 | 6.0 | | Informatik | 31 | 0 | 0 | 30 | 25 | 83.33 | 53.0 | 10.0 | | Mathematik | 55 | 0 | 0 | 53 | 35 | 66.04 | nan | 7.0 | -| Gesamt | 241 | 0 | 12 | 209 | 109 | 52 | - | - | \ No newline at end of file +| Gesamt | 241 | 0 | 12 | 210 | 109 | 52 | - | - | \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4e995add34fe6c73b0eba63a2839b839a1f2dc6b..a2e2668302931b3746089b66058966e2b937f7a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,92 @@ -beautifulsoup4==4.12.2 -pytest==8.1.1 -regex==2022.10.31 +altair==5.3.0 +appnope==0.1.4 +attrs==23.2.0 +backcall==0.2.0 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blinker==1.8.2 +cachetools==5.3.3 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +comm==0.2.2 +commonmark==0.9.1 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +entrypoints==0.4 +fastjsonschema==2.19.1 +flake8==7.0.0 +gitdb==4.0.11 +GitPython==3.1.43 +idna==3.7 +importlib_metadata==7.1.0 +ipykernel==6.29.4 +ipython==7.34.0 +jedi==0.19.1 +Jinja2==3.1.4 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +jupyter_client==8.6.1 +jupyter_core==5.7.2 +jupyterlab_pygments==0.3.0 +lxml==5.2.2 +lxml_html_clean==0.1.1 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib-inline==0.1.7 +mccabe==0.7.0 +mdurl==0.1.2 +mistune==0.8.4 +nbclient==0.10.0 +nbconvert==6.5.4 +nbformat==5.10.4 +nest-asyncio==1.6.0 +numpy==1.26.4 +packaging==24.0 +pandas==2.2.2 +pandocfilters==1.5.1 +parso==0.8.4 +pexpect==4.9.0 +pickleshare==0.7.5 +pillow==10.3.0 +platformdirs==4.2.2 +prompt-toolkit==3.0.43 +protobuf==4.25.3 +psutil==5.9.8 +ptyprocess==0.7.0 +pyarrow==16.1.0 +pycodestyle==2.11.1 +pydantic==1.10.15 +pydeck==0.9.1 +pyflakes==3.2.0 +Pygments==2.18.0 +pynblint==0.1.5 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +pyzmq==26.0.3 +referencing==0.35.1 +regex==2024.5.15 requests==2.31.0 -schema==0.7.5 \ No newline at end of file +rich==11.2.0 +rpds-py==0.18.1 +six==1.16.0 +smmap==5.0.1 +soupsieve==2.5 +streamlit==1.34.0 +tabulate==0.9.0 +tenacity==8.3.0 +tinycss2==1.3.0 +toml==0.10.2 +toolz==0.12.1 +tornado==6.4 +traitlets==5.14.3 +typer==0.4.2 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +wcwidth==0.2.13 +webencodings==0.5.1 +zipp==3.18.1 diff --git a/src/cleanup.ipynb b/src/cleanup.ipynb index 0e8a6c6d0aedddd348895a4116e89cc01b98c27b..1be5f4ca881cfcb046e0d76be1a079fbd266ff5f 100644 --- a/src/cleanup.ipynb +++ b/src/cleanup.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -175,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -193,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -219,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -245,7 +245,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -255,7 +255,11 @@ "\n", "\tfor column in df:\n", "\t\t# Strip is needed, because some questions have a trailing whitespace\n", - "\t\t# TODO: Document the regex somewhere\n", + "\t\t\n", + "\t\t# (^\\.+) Removes all dots (.) at the start of the string\n", + "\t\t# (\\.+$) Removes all dots (.) at the end of the string\n", + "\t\t# (\\s?\\?$) Removes a question mark (and whitespaces before it) at the end of the string\n", + "\t\t# (^\\?) Removes a question mark at the beginning of the string\n", "\t\tprepared_string = re.sub(r\"(^\\.+)|(\\.+$)|(\\s?\\?$)|(^\\?)\", \"\", column.strip())\n", "\t\t# In a few files a single question exists with two spaces, this unifies them to one\n", "\t\tprepared_string = ' '.join(prepared_string.split())\n", @@ -276,12 +280,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "for (path, df) in paths:\n", - "\tinformation = path.split(\"/\")[-1]\n", + "\t# normpath and os.sep provide cross plattform functionality\n", + "\tpath = os.path.normpath(path)\n", + "\tinformation = path.split(os.sep)[-1]\n", "\t(institute, semester) = information.split(\"_\")\n", "\tinstitute_path = os.path.join(CONSTANTS.CLEAN_DATA_PATH, institute)\n", "\tsemester_path = os.path.join(institute_path, semester)\n", @@ -320,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ diff --git a/src/dashboard/dashboard.py b/src/dashboard/dashboard.py index 5d79e69b3ea10eca58e935d9dfc288ce0e2d8828..6e46119f23badad1e7f715356327e5b4efa97247 100644 --- a/src/dashboard/dashboard.py +++ b/src/dashboard/dashboard.py @@ -2,25 +2,24 @@ import streamlit as st import pandas as pd import altair as alt -import os, re, sys +import os +import re +import sys from functools import cmp_to_key - # Allows access to lib folder path = os.path.abspath(__file__) steps = 2 for i in range(steps): - path = os.path.dirname(path) + path = os.path.dirname(path) sys.path.append(path) -import lib.api as api -import lib.constants as CONSTANTS - -# TODO: Institute Colors +import lib.constants as CONSTANTS # noqa: E402 (prevents error, since import is supposed to be here) +import lib.api as api # noqa: E402 (prevents error, since import is supposed to be here) -## Globals +# Globals # Allows generation of streamlit interaction elements with unique IDs global curr_key curr_key = 0 @@ -28,369 +27,427 @@ curr_key = 0 # Value cache value_cache = {} -uuid_regex = re.compile('\\b[0-9a-f]{8}\\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\\b[0-9a-f]{12}\\b') +uuid_regex = re.compile( + '\\b[0-9a-f]{8}\\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\\b[0-9a-f]{12}\\b') -dimensions = { - "A": "Vermittlung von Wissen und Unterstützen von Verstehen", - "B": "Motivieren und lerndienliche Atmosphäre herstellen", - "C": "Steuerung und Interaktion in der Lerngruppe" +colors = { + "Bioinformatik": "#31CB00", + "Mathematik": "#008DD5", + "Informatik": "#A13D63" } +dimensions = { + "A": "Vermittlung von Wissen und Unterstützen von Verstehen", + "B": "Motivieren und lerndienliche Atmosphäre herstellen", + "C": "Steuerung und Interaktion in der Lerngruppe" +} map = { - "cs": "Informatik", - "bio": "Bioinformatik", - "math": "Mathematik" + "cs": "Informatik", + "bio": "Bioinformatik", + "math": "Mathematik" } -## Functions +# Functions + def api_response_to_dataframe_compatibility(response): - res = {} - for course in response: - course_obj = { - "Veranstaltungsnummer": course["course_number"], - "Veranstaltungsname": course["course_name"], - "Veranstaltungstyp": course["course_type"], - "Lehrperson": course["lecturer"], - "Semester": course["semester"], - "Institut": map[course["institute"]], - "Antworten": course["answers"], - "Teilnehmende": course["participants"] - } - - for question in course["scores"]: - course_obj[question["question"]] = question["score"] - res[course["id"]] = course_obj - return res + res = {} + for course in response: + course_obj = { + "Veranstaltungsnummer": course["course_number"], + "Veranstaltungsname": course["course_name"], + "Veranstaltungstyp": course["course_type"], + "Lehrperson": course["lecturer"], + "Semester": course["semester"], + "Institut": map[course["institute"]], + "Antworten": course["answers"], + "Teilnehmende": course["participants"] + } + + for question in course["scores"]: + course_obj[question["question"]] = question["score"] + res[course["id"]] = course_obj + return res + def get_interval(percentage): - initial = [pd.Interval(left=0, right=(1/7), closed="both")] - if percentage in initial: return 1 - left = (1 / 7) - for i in range(2, 8): - interval = pd.Interval(left=left, right=left + (1 / 7), closed="right") - if percentage in interval: return i - left += (1 / 7) + initial = [pd.Interval(left=0, right=(1/7), closed="both")] + if percentage in initial: + return 1 + left = (1 / 7) + for i in range(2, 8): + interval = pd.Interval(left=left, right=left + (1 / 7), closed="right") + if percentage in interval: + return i + left += (1 / 7) + def cache_question(question): - scale = api.get_scale_for(question) - dimension = api.get_dimension_for(question)["id"] - if dimension == None: - dimension = "no_dim" - possibilities = api.get_answers_for(scale["id"]) - weights = [answer["value"] for answer in possibilities] - value_cache[question] = { - "value": max(weights), - "dimension": dimension - } + scale = api.get_scale_for(question) + dimension = api.get_dimension_for(question)["id"] + if dimension is None: + dimension = "no_dim" + possibilities = api.get_answers_for(scale["id"]) + weights = [answer["value"] for answer in possibilities] + value_cache[question] = { + "value": max(weights), + "dimension": dimension + } + def convert_scores(scores): - result = {} - for course in scores: - id = course["id"] - dimensions = [] - result[id] = { - "Typ": course["course_type"], - "Institut": map[course["institute"]], - "Semester": course["semester"], - } - - maximum = {} - - for score in course["scores"]: - question = score["question"] - - if question not in value_cache: - cache_question(question) - dimension = value_cache[question]["dimension"] - - # Creates initial values - if dimension not in result[id]: - result[id][dimension] = 0 - dimensions.append(dimension) - if dimension not in maximum: - maximum[dimension] = 0 - - result[id][dimension] += score["score"] - maximum[dimension] += value_cache[question]["value"] * course["answers"] - - # All over score - for dimension in dimensions: - if dimension.endswith("1"): - counterpart = dimension.replace("1", "2") - if counterpart in dimensions: - combined_score = result[id][dimension] + result[id][counterpart] - combined_maximum = maximum[dimension] + maximum[counterpart] - result[id][dimension[0]] = round(((combined_score + combined_maximum) / (combined_maximum * 2)), 2) - else: - result[id][dimension[0]] = round(((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) - elif dimension.endswith("2") and dimension.replace("2", "1") not in dimensions: - result[id][dimension[0]] = round(((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) - - # Dimension was not evaluated - if maximum[dimension] == 0 and result[id][dimension] == 0: - continue - # Maximum and minimum possible Score based on answers numbers - # Moves all values into positive integers, simplifying percentage calculation - result[id][dimension] = round(((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) - - return result + result = {} + for course in scores: + id = course["id"] + dimensions = [] + result[id] = { + "Typ": course["course_type"], + "Institut": map[course["institute"]], + "Semester": course["semester"], + } + + maximum = {} + + for score in course["scores"]: + question = score["question"] + + if question not in value_cache: + cache_question(question) + dimension = value_cache[question]["dimension"] + + # Creates initial values + if dimension not in result[id]: + result[id][dimension] = 0 + dimensions.append(dimension) + if dimension not in maximum: + maximum[dimension] = 0 + + result[id][dimension] += score["score"] + maximum[dimension] += value_cache[question]["value"] * \ + course["answers"] + + # All over score + for dimension in dimensions: + if dimension.endswith("1"): + counterpart = dimension.replace("1", "2") + if counterpart in dimensions: + combined_score = result[id][dimension] + \ + result[id][counterpart] + combined_maximum = maximum[dimension] + \ + maximum[counterpart] + result[id][dimension[0]] = round( + ((combined_score + combined_maximum) / (combined_maximum * 2)), 2) + else: + result[id][dimension[0]] = round( + ((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) + elif dimension.endswith("2") and dimension.replace("2", "1") not in dimensions: + result[id][dimension[0]] = round( + ((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) + + # Dimension was not evaluated + if maximum[dimension] == 0 and result[id][dimension] == 0: + continue + # Maximum and minimum possible Score based on answers numbers + # Moves all values into positive integers, simplifying percentage calculation + result[id][dimension] = round( + ((result[id][dimension] + maximum[dimension]) / (maximum[dimension] * 2)), 2) + + return result + def percentages_to_interval(scores): - res = {} - dimensions = ["A", "A1", "A2", "B", "B1", "B2", "C", "C1", "C2"] - for course in scores: - for dimension in dimensions: - updated = scores[course] - updated[dimension] = get_interval(updated[dimension]) - res[course] = updated - return res + res = {} + dimensions = ["A", "A1", "A2", "B", "B1", "B2", "C", "C1", "C2"] + for course in scores: + for dimension in dimensions: + updated = scores[course] + updated[dimension] = get_interval(updated[dimension]) + res[course] = updated + return res + def compare_semesters(semester1, semester2): - if semester1 == semester2: - return 0 - + if semester1 == semester2: + return 0 - year1 = int(semester1[4:8]) - year2 = int(semester2[4:8]) + year1 = int(semester1[4:8]) + year2 = int(semester2[4:8]) + + if year1 == year2: + return 1 if semester1.startswith("Wi") else -1 + elif year1 > year2: + return 1 + else: + return -1 - if year1 == year2: - return 1 if semester1.startswith("Wi") else -1 - elif year1 > year2: - return 1 - else: - return -1 def get_min_max_semester(df: pd.DataFrame): - semester_list = df["Semester"].to_list() - semester_sorted = sorted(semester_list, key=cmp_to_key(compare_semesters)) - # Oldest, Newsest - return (semester_sorted[0], semester_sorted[-1]) + semester_list = df["Semester"].to_list() + semester_sorted = sorted(semester_list, key=cmp_to_key(compare_semesters)) + # Oldest, Newsest + return (semester_sorted[0], semester_sorted[-1]) + def generate_semester_interval(left, right): - interval = [] - start_year = int(left[4:8]) - gen_sose = left.startswith("SoSe") - year = start_year - semester = f"SoSe{str(start_year)}" if gen_sose else f"WiSe{str(start_year)}/{str(start_year + 1)[2:4]}" - while semester != right: - interval.append(semester) - gen_sose = not gen_sose - if gen_sose: - year += 1 - semester = f"SoSe{str(year)}" if gen_sose else f"WiSe{str(year)}/{str(year + 1)[2:4]}" - interval.append(semester) - - return interval + interval = [] + start_year = int(left[4:8]) + gen_sose = left.startswith("SoSe") + year = start_year + semester = f"SoSe{str(start_year)}" if gen_sose else f"WiSe{str(start_year)}/{str(start_year + 1)[2:4]}" + while semester != right: + interval.append(semester) + gen_sose = not gen_sose + if gen_sose: + year += 1 + semester = f"SoSe{str(year)}" if gen_sose else f"WiSe{str(year)}/{str(year + 1)[2:4]}" + interval.append(semester) + + return interval # Genrate initial values + # Used by graphs with no names scores = pd.DataFrame(convert_scores(api.get_all_scores())).T # For graphs with full names -full_dataset = pd.DataFrame(api_response_to_dataframe_compatibility(api.get_all_scores())).T +full_dataset = pd.DataFrame( + api_response_to_dataframe_compatibility(api.get_all_scores())).T # Basic Page config st.set_page_config( - page_title="Evaluationsbericht", - layout="wide" + page_title="Evaluationsbericht", + layout="wide" ) with st.sidebar: - st.header("Inhaltsverzeichnis") - st.markdown(''' - - [Scatter Plot](#scatter-plot) - - [Boxplot](#boxplot) - - [Violinplot](#violinplot) - - [Linechart](#linechart) - - [Barchart](#barchart) - - [Tabelle](#tabelle) - ''') + st.header("Inhaltsverzeichnis") + st.markdown(''' + - [Scatter Plot](#scatter-plot) + - [Boxplot](#boxplot) + - [Violinplot](#violinplot) + - [Linechart](#linechart) + - [Barchart](#barchart) + - [Tabelle](#tabelle) + ''') st.title("Evaluationsbericht Grafiken") with st.expander(":orange[ :warning: Achtung!]", expanded=True): - st.write(''' - Dieses Dashboard ist nur ein triviales Beispiel dafür, wie die Daten die während der Pipeline generiert werden, auf einem Dashboard angezeigt werden können. Dieses Dahboard ist keineswegs ein komplett fertiges Produkt, funktioniert aber minimal genug für die Demonstration. - ''') -present_mode = st.toggle("Datenschutz Modus (Versteckt Abschnitte mit Personenbezogenen Daten)") + st.write(''' + Dieses Dashboard ist nur ein triviales Beispiel dafür, wie die Daten die während der Pipeline generiert werden, auf einem Dashboard angezeigt werden können. Dieses Dahboard ist keineswegs ein komplett fertiges Produkt, funktioniert aber minimal genug für die Demonstration. + ''') +present_mode = st.toggle( + "Datenschutz Modus (Versteckt Abschnitte mit Personenbezogenen Daten)") if not present_mode: - st.sidebar.markdown("- [Ranking](#ranking)") + st.sidebar.markdown("- [Ranking](#ranking)") # If both is chosen, append scale for every semester -semester_select = st.multiselect("Generierung für folgende Semester:", ["Sommersemester", "Wintersemester"], ["Sommersemester"]) +semester_select = st.multiselect("Generierung für folgende Semester:", [ + "Sommersemester", "Wintersemester"], ["Sommersemester"]) + def generate_plot_section__with_subdimensions(title, markdown, chart): - global curr_key - st.header(title, divider="gray") - with st.expander(":information_source: Hinweis", expanded=True): - st.write(markdown) - column_multiselect, column_slider = st.columns(2) - - with column_multiselect: - selected_dimensions = st.multiselect(key=curr_key, label="Dimensionen, die generiert werden sollen",options=["A", "B", "C"], default=["A", "B", "C"]) - curr_key += 1 - with column_slider: - min_semester, max_semester = get_min_max_semester(chart.data) - interval = generate_semester_interval(min_semester, max_semester) - interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or (sem.startswith("Wi") and "Wintersemester" in semester_select)] - - semester_start, semester_end = st.select_slider(key=curr_key, - label="Generation für Semester zwischen", - options=interval, - value=(interval[0], interval[-1]) - ) - curr_key += 1 - semester_range = interval[interval.index(semester_start):interval.index(semester_end) + 1] - - chart_columns = st.columns(len(selected_dimensions)) - - for (i, dimension) in enumerate(selected_dimensions): - with chart_columns[i]: - chart_copy = (chart - .transform_filter(alt.FieldOneOfPredicate(field="Semester", oneOf=semester_range)) - .encode( - x=alt.X(f"{dimension}1").scale(domain=(.20,1.05)).axis(format="%"), - y=alt.Y(f"{dimension}2").scale(domain=(0,1.05)).axis(format="%"), - tooltip=[alt.Tooltip(f"{dimension}1", format=".2%"), alt.Tooltip(f"{dimension}2", format=".2%")] - ) - .properties( - title=f"Dimension {dimension} - {dimensions[dimension]}" - ) - ) - st.altair_chart(chart_copy, use_container_width=True) + global curr_key + st.header(title, divider="gray") + with st.expander(":information_source: Hinweis", expanded=True): + st.write(markdown) + column_multiselect, column_slider = st.columns(2) + + with column_multiselect: + selected_dimensions = st.multiselect(key=curr_key, label="Dimensionen, die generiert werden sollen", options=[ + "A", "B", "C"], default=["A", "B", "C"]) + curr_key += 1 + with column_slider: + min_semester, max_semester = get_min_max_semester(chart.data) + interval = generate_semester_interval(min_semester, max_semester) + interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or ( + sem.startswith("Wi") and "Wintersemester" in semester_select)] + + semester_start, semester_end = st.select_slider(key=curr_key, + label="Generation für Semester zwischen", + options=interval, + value=( + interval[0], interval[-1]) + ) + curr_key += 1 + semester_range = interval[interval.index( + semester_start):interval.index(semester_end) + 1] + + chart_columns = st.columns(len(selected_dimensions)) + + for (i, dimension) in enumerate(selected_dimensions): + with chart_columns[i]: + chart_copy = (chart + .transform_filter(alt.FieldOneOfPredicate(field="Semester", oneOf=semester_range)) + .encode( + x=alt.X(f"{dimension}1").scale( + domain=(.20, 1.05)).axis(format="%"), + y=alt.Y(f"{dimension}2").scale( + domain=(0, 1.05)).axis(format="%"), + tooltip=[alt.Tooltip(f"{dimension}1", format=".2%"), alt.Tooltip( + f"{dimension}2", format=".2%")] + ) + .properties( + title=f"Dimension {dimension} - {dimensions[dimension]}" + ) + ) + st.altair_chart(chart_copy, use_container_width=True) def generate_section_dimensions(title, markdown, chart, type): - global curr_key - st.header(title, divider="gray") - with st.expander(":information_source: Hinweis", expanded=True): - st.write(markdown) - column_multiselect, column_slider, column_institute = st.columns(3) - - with column_multiselect: - selected_dimensions = st.multiselect(key=curr_key, label="Dimensionen, die generiert werden sollen",options=["A", "B", "C"], default=["A", "B", "C"]) - curr_key += 1 - with column_slider: - min_semester, max_semester = get_min_max_semester(chart.data) - interval = generate_semester_interval(min_semester, max_semester) - interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or (sem.startswith("Wi") and "Wintersemester" in semester_select)] - semester_start, semester_end = st.select_slider(key=curr_key, - label="Generation für Semester zwischen", - options=interval, - value=(interval[0], interval[-1]) - ) - curr_key += 1 - semester_range = interval[interval.index(semester_start):interval.index(semester_end) + 1] - - with column_institute: - selected_institutes = st.multiselect(key=curr_key, label="Generieren für die Institute:", options=["Bioinformatik", "Informatik", "Mathematik"], default=["Bioinformatik", "Informatik", "Mathematik"]) - curr_key += 1 - for dimension in selected_dimensions: - st.subheader("Dimension {0}".format(dimension)) - if type == "box": - smallest = pd.to_numeric(chart.data[dimension]).nsmallest(1).values[0] - largest = pd.to_numeric(chart.data[dimension]).nlargest(1).values[0] - chart_dim = chart.encode(y=alt.Y(dimension).scale(domain=(smallest, largest)).title("Bewertung")) - - columns = st.columns(len(selected_institutes)) - for (i, institute) in enumerate(selected_institutes): - chart_inst = chart_dim.transform_filter({ - "and": [ - alt.FieldOneOfPredicate(field="Semester", oneOf=semester_range), - alt.FieldEqualPredicate(field="Institut", equal=institute) - ] - }) - with columns[i]: - st.altair_chart(chart_inst, use_container_width=True) - - elif type == "violin": - smallest = pd.to_numeric(chart.data[dimension]).nsmallest(1).values[0] - largest = pd.to_numeric(chart.data[dimension]).nlargest(1).values[0] - chart_dim = chart.transform_density( - dimension, - as_=[dimension, 'density'], - groupby=["Semester", "Dimension", "Institut"], - extent=[0, 7] - ).mark_area(orient="horizontal").encode( - x=alt.X("density:Q").stack("center").impute(None).title(None).axis(labels=False, values=[0], grid=False, ticks=True), - y=alt.Y('{0}:Q'.format(dimension)).scale(domain=(int(smallest - 1), int(largest + 1))).title("Bewertung"), - tooltip=[alt.Tooltip("density:Q", title="Dichte", format=".2")], - color=alt.Color("Semester").legend(None), - # width 1000 / len ensures that every facet is 200 in width - ).properties(width=1000 / len(semester_range)).facet(column="Semester", row="Institut") - - chart_inst = chart_dim.transform_filter({ - "and": [ - alt.FieldOneOfPredicate(field="Semester", oneOf=semester_range), - alt.FieldOneOfPredicate(field="Institut", oneOf=selected_institutes) - ] - }) - st.altair_chart(chart_inst) + global curr_key + st.header(title, divider="gray") + with st.expander(":information_source: Hinweis", expanded=True): + st.write(markdown) + column_multiselect, column_slider, column_institute = st.columns(3) + + with column_multiselect: + selected_dimensions = st.multiselect(key=curr_key, label="Dimensionen, die generiert werden sollen", options=[ + "A", "B", "C"], default=["A", "B", "C"]) + curr_key += 1 + with column_slider: + min_semester, max_semester = get_min_max_semester(chart.data) + interval = generate_semester_interval(min_semester, max_semester) + interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or ( + sem.startswith("Wi") and "Wintersemester" in semester_select)] + semester_start, semester_end = st.select_slider(key=curr_key, + label="Generation für Semester zwischen", + options=interval, + value=( + interval[0], interval[-1]) + ) + curr_key += 1 + semester_range = interval[interval.index( + semester_start):interval.index(semester_end) + 1] + + with column_institute: + selected_institutes = st.multiselect(key=curr_key, label="Generieren für die Institute:", options=[ + "Bioinformatik", "Informatik", "Mathematik"], default=["Bioinformatik", "Informatik", "Mathematik"]) + curr_key += 1 + for dimension in selected_dimensions: + st.subheader("Dimension {0}".format(dimension)) + if type == "box": + smallest = pd.to_numeric( + chart.data[dimension]).nsmallest(1).values[0] + largest = pd.to_numeric( + chart.data[dimension]).nlargest(1).values[0] + chart_dim = chart.encode(y=alt.Y(dimension).scale( + domain=(smallest, largest)).title("Bewertung")) + + columns = st.columns(len(selected_institutes)) + for (i, institute) in enumerate(selected_institutes): + chart_inst = chart_dim.transform_filter({ + "and": [ + alt.FieldOneOfPredicate( + field="Semester", oneOf=semester_range), + alt.FieldEqualPredicate( + field="Institut", equal=institute) + ] + }).encode(color=alt.Color("Institut").scale(domain=[institute], range=[colors[institute]])) + with columns[i]: + st.altair_chart(chart_inst, use_container_width=True) + + elif type == "violin": + smallest = pd.to_numeric( + chart.data[dimension]).nsmallest(1).values[0] + largest = pd.to_numeric( + chart.data[dimension]).nlargest(1).values[0] + chart_dim = chart.transform_density( + dimension, + as_=[dimension, 'density'], + groupby=["Semester", "Dimension", "Institut"], + extent=[0, 7] + ).mark_area(orient="horizontal").encode( + x=alt.X("density:Q").stack("center").impute(None).title( + None).axis(labels=False, values=[0], grid=False, ticks=True), + y=alt.Y('{0}:Q'.format(dimension)).scale( + domain=(int(smallest - 1), int(largest + 1))).title("Bewertung"), + tooltip=[alt.Tooltip( + "density:Q", title="Dichte", format=".2")], + color=alt.Color("Semester").legend(None), + # width 1000 / len ensures that every facet is 200 in width + ).properties(width=1000 / len(semester_range)).facet(column="Semester", row="Institut") + + chart_inst = chart_dim.transform_filter({ + "and": [ + alt.FieldOneOfPredicate( + field="Semester", oneOf=semester_range), + alt.FieldOneOfPredicate( + field="Institut", oneOf=selected_institutes) + ] + }) + st.altair_chart(chart_inst) + def generate_semester_slider_with_chart(title, markdown, chart): - global curr_key - st.header(title, divider="gray") - with st.expander(":information_source: Hinweis", expanded=True): - st.write(markdown) - - min_semester, max_semester = get_min_max_semester(chart.data) - interval = generate_semester_interval(min_semester, max_semester) - interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or (sem.startswith("Wi") and "Wintersemester" in semester_select)] - semester_start, semester_end = st.select_slider(key=curr_key, - label="Generation für Semester zwischen", - options=interval, - value=(interval[0], interval[-1]) - ) - curr_key += 1 - semester_range = interval[interval.index(semester_start):interval.index(semester_end) + 1] - - chart = chart.transform_filter(alt.FieldOneOfPredicate(field="Semester", oneOf=semester_range)) - - st.altair_chart(chart, use_container_width=True) + global curr_key + st.header(title, divider="gray") + with st.expander(":information_source: Hinweis", expanded=True): + st.write(markdown) + + min_semester, max_semester = get_min_max_semester(chart.data) + interval = generate_semester_interval(min_semester, max_semester) + interval = [sem for sem in interval if (sem.startswith("So") and "Sommersemester" in semester_select) or ( + sem.startswith("Wi") and "Wintersemester" in semester_select)] + semester_start, semester_end = st.select_slider(key=curr_key, + label="Generation für Semester zwischen", + options=interval, + value=( + interval[0], interval[-1]) + ) + curr_key += 1 + semester_range = interval[interval.index( + semester_start):interval.index(semester_end) + 1] + + chart = chart.transform_filter(alt.FieldOneOfPredicate( + field="Semester", oneOf=semester_range)) + + st.altair_chart(chart, use_container_width=True) + # Scatterplot selection = alt.selection_point(fields=["Typ"], bind="legend") c = (alt.Chart(scores) - .mark_circle(size=100) - .encode( - color=alt.Color("Typ").legend(orient="bottom"), - opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)) - ) - .interactive() - .add_params( - selection - ) - ) -generate_plot_section__with_subdimensions("Scatter Plot", "Dieser Graph zeigt die Verteilung der Lehrveranstaltungen entlang der entsprechenden Unterdimensionen. Die abgegebenen Punkte werden für jede Frage in der dazugehörigen Dimension addiert und dann mit der maximal erreichbaren Punktzahl verrechnet um den Prozentsatz zu ermitteln. Jeder Punkt repräsentiert eine Lehrveranstaltung.", c) + .mark_circle(size=100) + .encode( + color=alt.Color("Typ").legend(orient="bottom"), + opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)) +) + .interactive() + .add_params( + selection +) +) +generate_plot_section__with_subdimensions( + "Scatter Plot", "Dieser Graph zeigt die Verteilung der Lehrveranstaltungen entlang der entsprechenden Unterdimensionen. Die abgegebenen Punkte werden für jede Frage in der dazugehörigen Dimension addiert und dann mit der maximal erreichbaren Punktzahl verrechnet um den Prozentsatz zu ermitteln. Jeder Punkt repräsentiert eine Lehrveranstaltung.", c) dimension_explanation_columns = st.columns(3) dimension_explanation = api.get_dimension_description() for j in range(2): - for (i, dimension) in enumerate(["A", "B", "C"]): - subdimension = "{0}{1}".format(dimension, str(j + 1)) - desc = [dim for dim in dimension_explanation if subdimension in dim][0] - with dimension_explanation_columns[i]: - st.markdown(''' - ### Dimension {0} - {1} - '''.format(subdimension, desc[subdimension]["de"])) - - -## Boxplot + for (i, dimension) in enumerate(["A", "B", "C"]): + subdimension = "{0}{1}".format(dimension, str(j + 1)) + desc = [dim for dim in dimension_explanation if subdimension in dim][0] + with dimension_explanation_columns[i]: + st.markdown(''' + ### Dimension {0} + {1} + '''.format(subdimension, desc[subdimension]["de"])) + + +# Boxplot box = pd.DataFrame(percentages_to_interval(scores.T.to_dict())).T c = (alt.Chart(box) - .mark_boxplot(extent='min-max') - .encode( - x="Semester", - color=alt.Color("Institut").legend(orient="bottom"), - ) - .interactive() - ) + .mark_boxplot(extent='min-max') + .encode( + x="Semester", + color=alt.Color("Institut").legend(orient="bottom"), +) + .interactive() +) generate_section_dimensions("Boxplot", "Dieser Graph zeigt für die Dimensionen des Fragebogens die historische Entwicklung der Fachbereiche. Jeweils eine Spalte bildet einen Fachbereich ab. Die Bewertung entsteht durch aufsummieren aller Studierendenantworten in der jeweiligen Dimension und anschließende Umrechnung in einen Prozentwert mithilfe der maximal erreichbaren Punktzahl. Danach wird der Wert in ein Intervall ähnlich zur Likert Scale von 1-7 eingeordnet. Die Boxen erstecken sich von den unteren 25% bis zu den oberen 75% der Antworten. Alle davor und danach bilden die Schnurrhaare der Box. Der Median ist mit einem Querstrich hervorgehoben.", c, "box") # Violin Plots @@ -399,167 +456,190 @@ c = (alt.Chart(box).encode(tooltip="Institut").interactive()) generate_section_dimensions("Violinplot", "Dieser Graph zeigt für die Dimensionen des Fragebogens die historische Entwicklung der Fachbereiche. Jeweils eine Spalte bildet einen Fachbereich ab. Die Bewertung entsteht durch aufsummieren aller Studierendenantworten in der jeweiligen Dimension und anschließende Umrechnung in einen Prozentwert mithilfe der maximal erreichbaren Punktzahl. Danach wird der Wert in ein Intervall ähnlich zur Likert Scale von 1-7 eingeordnet. Die Violine erstreckt sich über alle Antworten. Die Breite an den Bewertungen geben an, wie oft diese Bewertung vorkam. Je dicker die Violine an einem Punkt, desto öfter kam die Bewertung vor.", c, "violin") -## Line chart +# Line chart groups = full_dataset.groupby(by=["Semester", "Institut"]) results = { - "Semester": [], - "Institut": [], - "Beteiligung": [] + "Semester": [], + "Institut": [], + "Beteiligung": [] } for (header, group) in groups: - percentage_value = group["Antworten"].sum() - # TODO: TELL ABOUT THE EXCEPTION with - 1 - basic_value = group[group["Teilnehmende"] > 0]["Teilnehmende"].sum() - # TODO: write about how this happens: - # (empty dataframe due to missing data) - if basic_value <= 0: - continue - results["Semester"].append(header[0]) - results["Institut"].append(header[1]) - results["Beteiligung"].append(round((percentage_value / basic_value), 2)) + percentage_value = group["Antworten"].sum() + + # Filters out courses without participation data + basic_value = group[group["Teilnehmende"] > 0]["Teilnehmende"].sum() + + # (empty dataframe due to missing data) + if basic_value <= 0: + continue + results["Semester"].append(header[0]) + results["Institut"].append(header[1]) + results["Beteiligung"].append(round((percentage_value / basic_value), 2)) participation_data = pd.DataFrame(results) linechart = ( - alt.Chart(participation_data) - .mark_line(point=True) - .encode(x="Semester", y=alt.Y("Beteiligung").axis(format="%"), color="Institut") - .interactive() + alt.Chart(participation_data) + .mark_line(point=True) + .encode(x="Semester", y=alt.Y("Beteiligung").axis(format="%"), color=alt.Color("Institut").scale(domain=["Bioinformatik", "Mathematik", "Informatik"], range=["#31CB00", "#008DD5", "#A13D63"])) + .interactive() ) generate_semester_slider_with_chart("Linechart", "Dieser Graph zeigt den Verlauf der Beteiligung der Studierenden an den Fachbereichen. Die Punkte auf dem Graph sind feste Werte, die nach der Erhebung der Evaluation berechnet werden können, die Verbindungslinien stellen keinen zeitlichen verlauf da, sondern vereinfachen die Visualisierung des An- oder Abstiegs.", linechart) barchart = ( - alt.Chart(participation_data) - .mark_bar() - .encode(x="Semester", y=alt.Y("Beteiligung").axis(format="%"), color="Institut", xOffset="Institut") - .interactive() + alt.Chart(participation_data) + .mark_bar() + .encode(x="Semester", y=alt.Y("Beteiligung").axis(format="%"), color=alt.Color("Institut").scale(domain=["Bioinformatik", "Mathematik", "Informatik"], range=["#31CB00", "#008DD5", "#A13D63"]), xOffset="Institut") + .interactive() ) -generate_semester_slider_with_chart("Barchart", "Dieser Graph zeigt die historische Beteiligung an Lehrevaluationen im direkten Vergleich der Fachbereiche pro Semester an.", barchart) +generate_semester_slider_with_chart( + "Barchart", "Dieser Graph zeigt die historische Beteiligung an Lehrevaluationen im direkten Vergleich der Fachbereiche pro Semester an.", barchart) # Table st.header("Tabelle", divider="gray") with st.expander(":information_source: Hinweis", expanded=True): - st.write(''' - Diese Tabelle zeigt den historischen Verlauf der Beteiligung an Lehrevaluationen über alle Semester (unabhängig vom Filter ganz oben auf der Seite). - Eine Zelle besteht aus dem aktuellen Beteiligungswert (links) und wie er im historischen Verlauf seit Datenbeginn sich verändert hat (recht). - Die Richtung des Pfeils indiziert das Wachstum, die Prozentzahl darunter zeigt es genau an. - Die Farbe des Pfeils zeigt an wie die Entwicklung einzuschätzen ist, nach einem Ampelsystem. - ''') + st.write(''' + Diese Tabelle zeigt den historischen Verlauf der Beteiligung an Lehrevaluationen über alle Semester (unabhängig vom Filter ganz oben auf der Seite). + Eine Zelle besteht aus dem aktuellen Beteiligungswert (links) und wie er im historischen Verlauf seit Datenbeginn sich verändert hat (recht). + Die Richtung des Pfeils indiziert das Wachstum, die Prozentzahl darunter zeigt es genau an. + Die Farbe des Pfeils zeigt an wie die Entwicklung einzuschätzen ist, nach einem Ampelsystem. + ''') template = "" # This has as precondition that the code is run by the makefile with open(os.path.join("src", "assets", "table_template.html"), "r") as file: - template = file.read() + template = file.read() groups = full_dataset.groupby(by=["Semester", "Institut", "Veranstaltungstyp"]) results = { - "Semester": [], - "Institut": [], - "Veranstaltungstyp": [], - "Beteiligung": [] + "Semester": [], + "Institut": [], + "Veranstaltungstyp": [], + "Beteiligung": [] } for (header, group) in groups: - percentage_value = group["Antworten"].sum() - # TODO: TELL ABOUT THE EXCEPTION with - 1 - basic_value = group[group["Teilnehmende"] > 0]["Teilnehmende"].sum() - # TODO: write how this happens: - # (empty dataframe due to missing data) - if basic_value <= 0: - continue - results["Semester"].append(header[0]) - results["Institut"].append(header[1]) - results["Veranstaltungstyp"].append(header[2]) - results["Beteiligung"].append(round((percentage_value / basic_value) * 100, 2)) + percentage_value = group["Antworten"].sum() + # Filter out courses with no participation data + basic_value = group[group["Teilnehmende"] > 0]["Teilnehmende"].sum() + + # (empty dataframe due to missing data) + if basic_value <= 0: + continue + results["Semester"].append(header[0]) + results["Institut"].append(header[1]) + results["Veranstaltungstyp"].append(header[2]) + results["Beteiligung"].append( + round((percentage_value / basic_value) * 100, 2)) participation_data = pd.DataFrame(results) for inst in ["Bioinformatik", "Informatik", "Mathematik"]: - inst_df = participation_data[participation_data["Institut"] == inst] - - semester_list = inst_df["Semester"].to_list() - oldest_semester = sorted(semester_list, key=cmp_to_key(compare_semesters))[0] - newest_semester = sorted(semester_list, key=cmp_to_key(compare_semesters))[-1] - - for category in ["Vorlesungen", "Seminare", "Übungen"]: - oldest_participation = inst_df[(inst_df["Semester"] == oldest_semester) & (inst_df["Veranstaltungstyp"] == category)]["Beteiligung"].values[0] - newest_participation = inst_df[(inst_df["Semester"] == newest_semester) & (inst_df["Veranstaltungstyp"] == category)]["Beteiligung"].values[0] - difference = round(newest_participation - oldest_participation, 2) - - # -difference ensures correct rotation - rotation = round(90 * (-difference / 100),2) - - color = "yellow" - - if difference in pd.Interval(left=5, right=100, closed="both"): - color = "green" - elif difference in pd.Interval(left=-100, right=-5, closed="both"): - color = "red" - - cat_map = { - "Vorlesungen": "VL", - "Seminare": "S", - "Übungen": "U" - } - shorthand_start = "%{0}-{1}".format(CONSTANTS.INSTITUTE_MAP[inst].upper(), cat_map[category]) - - template = template.replace(shorthand_start + "%", str(newest_participation) + "%") - template = template.replace(shorthand_start + "-ROT%", str(rotation)) - template = template.replace(shorthand_start + "-ARROW%", color) - template = template.replace(shorthand_start + "-IND%", str(difference) + "%") + inst_df = participation_data[participation_data["Institut"] == inst] + + semester_list = inst_df["Semester"].to_list() + oldest_semester = sorted( + semester_list, key=cmp_to_key(compare_semesters))[0] + newest_semester = sorted( + semester_list, key=cmp_to_key(compare_semesters))[-1] + + for category in ["Vorlesungen", "Seminare", "Übungen"]: + oldest_participation = inst_df[(inst_df["Semester"] == oldest_semester) & ( + inst_df["Veranstaltungstyp"] == category)]["Beteiligung"].values[0] + newest_participation = inst_df[(inst_df["Semester"] == newest_semester) & ( + inst_df["Veranstaltungstyp"] == category)]["Beteiligung"].values[0] + difference = round(newest_participation - oldest_participation, 2) + + # -difference ensures correct rotation + rotation = round(90 * (-difference / 100), 2) + + color = "yellow" + + if difference in pd.Interval(left=5, right=100, closed="both"): + color = "green" + elif difference in pd.Interval(left=-100, right=-5, closed="both"): + color = "red" + + cat_map = { + "Vorlesungen": "VL", + "Seminare": "S", + "Übungen": "U" + } + shorthand_start = "%{0}-{1}".format( + CONSTANTS.INSTITUTE_MAP[inst].upper(), cat_map[category]) + + template = template.replace( + shorthand_start + "%", str(newest_participation) + "%") + template = template.replace(shorthand_start + "-ROT%", str(rotation)) + template = template.replace(shorthand_start + "-ARROW%", color) + template = template.replace( + shorthand_start + "-IND%", str(difference) + "%") st.write(template, unsafe_allow_html=True) -## Ranking +# Ranking # Slider for no of ranks if not present_mode: - st.header("Ranking", divider="gray") - with st.expander(":information_source: Hinweis", expanded=True): - st.markdown("Der Ranking Score wird berechnet indem alle erreichten Punkte für jede Frage summiert werden, diese dann durch die Anzahl an Rückmeldungen geteilt wird und somit die Durchschnittspunkte pro Evaluation erhalten werden") - # Auto generate - semester_ranking_select = st.multiselect("Ranking generieren für Semester:", ["SoSe2019", "SoSe2020", "SoSe2021", "SoSe2022", "SoSe2023"], ["SoSe2023"]) - col1, col2 = st.columns(2) - col3, col4 = st.columns(2) - with col1: - type_select = st.multiselect("Generieren für Kategorien:", ["Vorlesungen", "Übungen", "Seminare"], ["Vorlesungen"]) - with col2: - inst_select = st.multiselect("Generieren für die Institute:", ["Bioinformatik", "Informatik", "Mathematik"], ["Informatik"]) - with col3: - combine_type = st.toggle("Kategorien kombinieren") - with col4: - combine_inst = st.toggle("Institute kombinieren") - - filtered = full_dataset[(full_dataset["Semester"].isin(semester_ranking_select)) & (full_dataset["Institut"].isin(inst_select)) & (full_dataset["Veranstaltungstyp"].isin(type_select))] - cols = [col for col in filtered.columns if uuid_regex.match(col) != None] - filtered["sum"] = filtered[cols].sum(axis=1) - dropped = filtered.drop(columns=cols) - dropped["Punkte"] = dropped["sum"] / dropped["Antworten"] - dropped["Punkte"] = dropped["Punkte"].apply(lambda x: round(x, 2)) - dropped = dropped.drop(columns=["Veranstaltungsnummer", "Antworten", "Teilnehmende", "sum"]) - if not combine_inst: - for institute in inst_select: - st.subheader(institute) - if not combine_type: - columns = st.columns(len(type_select)) - for i in range(len(type_select)): - with columns[i]: - st.subheader(type_select[i]) - frame = dropped[(dropped["Institut"] == institute) & (dropped["Veranstaltungstyp"] == type_select[i])] - frame.sort_values("Punkte", ascending=False, inplace=True) - st.dataframe(frame.head(10), hide_index=True, use_container_width=True) - else: - frame = dropped[dropped["Institut"] == institute] - frame.sort_values("Punkte", ascending=False, inplace=True) - st.dataframe(frame.head(10), hide_index=True, use_container_width=True) - else: - if not combine_type: - columns = st.columns(len(type_select)) - for i in range(len(type_select)): - with columns[i]: - st.header(type_select[i]) - frame = dropped[dropped["Veranstaltungstyp"] == type_select[i]] - frame.sort_values("Punkte", ascending=False, inplace=True) - st.dataframe(frame.head(10), hide_index=True, use_container_width=True) - else: - frame = dropped.sort_values("Punkte", ascending=False) - st.dataframe(frame.head(10), hide_index=True, use_container_width=True) \ No newline at end of file + st.header("Ranking", divider="gray") + with st.expander(":information_source: Hinweis", expanded=True): + st.markdown("Der Ranking Score wird berechnet indem alle erreichten Punkte für jede Frage summiert werden, diese dann durch die Anzahl an Rückmeldungen geteilt wird und somit die Durchschnittspunkte pro Evaluation erhalten werden") + # Auto generate + semester_ranking_select = st.multiselect("Ranking generieren für Semester:", [ + "SoSe2019", "SoSe2020", "SoSe2021", "SoSe2022", "SoSe2023"], ["SoSe2023"]) + col1, col2 = st.columns(2) + col3, col4 = st.columns(2) + with col1: + type_select = st.multiselect("Generieren für Kategorien:", [ + "Vorlesungen", "Übungen", "Seminare"], ["Vorlesungen"]) + with col2: + inst_select = st.multiselect("Generieren für die Institute:", [ + "Bioinformatik", "Informatik", "Mathematik"], ["Informatik"]) + with col3: + combine_type = st.toggle("Kategorien kombinieren") + with col4: + combine_inst = st.toggle("Institute kombinieren") + + filtered = full_dataset[(full_dataset["Semester"].isin(semester_ranking_select)) & ( + full_dataset["Institut"].isin(inst_select)) & (full_dataset["Veranstaltungstyp"].isin(type_select))] + cols = [col for col in filtered.columns if uuid_regex.match( + col) is not None] + filtered["sum"] = filtered[cols].sum(axis=1) + dropped = filtered.drop(columns=cols) + dropped["Punkte"] = dropped["sum"] / dropped["Antworten"] + dropped["Punkte"] = dropped["Punkte"].apply(lambda x: round(x, 2)) + dropped = dropped.drop( + columns=["Veranstaltungsnummer", "Antworten", "Teilnehmende", "sum"]) + if not combine_inst: + for institute in inst_select: + st.subheader(institute) + if not combine_type: + columns = st.columns(len(type_select)) + for i in range(len(type_select)): + with columns[i]: + st.subheader(type_select[i]) + frame = dropped[(dropped["Institut"] == institute) & ( + dropped["Veranstaltungstyp"] == type_select[i])] + frame.sort_values( + "Punkte", ascending=False, inplace=True) + st.dataframe(frame.head(10), hide_index=True, + use_container_width=True) + else: + frame = dropped[dropped["Institut"] == institute] + frame.sort_values("Punkte", ascending=False, inplace=True) + st.dataframe(frame.head(10), hide_index=True, + use_container_width=True) + else: + if not combine_type: + columns = st.columns(len(type_select)) + for i in range(len(type_select)): + with columns[i]: + st.header(type_select[i]) + frame = dropped[dropped["Veranstaltungstyp"] + == type_select[i]] + frame.sort_values("Punkte", ascending=False, inplace=True) + st.dataframe(frame.head(10), hide_index=True, + use_container_width=True) + else: + frame = dropped.sort_values("Punkte", ascending=False) + st.dataframe(frame.head(10), hide_index=True, + use_container_width=True) diff --git a/src/db/api/api-v1/paths/dimensions/questions/{dimension}.js b/src/db/api/api-v1/paths/dimensions/questions/{dimension}.js index 5ca1d4d26d40639ee706f48b9d093b4c7821cf08..580c939e8ae40217ba196a99212f31991d30fddb 100644 --- a/src/db/api/api-v1/paths/dimensions/questions/{dimension}.js +++ b/src/db/api/api-v1/paths/dimensions/questions/{dimension}.js @@ -5,7 +5,7 @@ export default function (dimensionsService) { async function GET(req, res, _next) { let answer = await dimensionsService.getQuestionsForDimension( - req.params.dimension + req.params.dimension, ); if (answer.length > 0) { res.status(200).json(answer); diff --git a/src/db/api/api-v1/paths/questions/{question}/dimension.js b/src/db/api/api-v1/paths/questions/{question}/dimension.js index 04e7a3f4a3bafbedf82937a958e5ffbd7355f3c0..f2cec4eefb80472972b3dbeb557423e63a626b1a 100644 --- a/src/db/api/api-v1/paths/questions/{question}/dimension.js +++ b/src/db/api/api-v1/paths/questions/{question}/dimension.js @@ -6,7 +6,9 @@ export default function (dimensionsService) { async function GET(req, res, _next) { res .status(200) - .json(await dimensionsService.getDimensionForQuestion(req.params.question)); + .json( + await dimensionsService.getDimensionForQuestion(req.params.question), + ); } GET.apiDoc = { @@ -26,13 +28,13 @@ export default function (dimensionsService) { 200: { description: "A list of questions that match the requested id", schema: { - type: "object", - properties: { - id: { - type: "string", - description: "Dimension Id" - } - } + type: "object", + properties: { + id: { + type: "string", + description: "Dimension Id", + }, + }, }, }, default: { diff --git a/src/db/api/api-v1/paths/scores/course/{question}.js b/src/db/api/api-v1/paths/scores/course/{question}.js index 82a4f085dd2c0ec8452287088379f3ca39429990..403ba7ffc6910853bf17585dd19679877655373b 100644 --- a/src/db/api/api-v1/paths/scores/course/{question}.js +++ b/src/db/api/api-v1/paths/scores/course/{question}.js @@ -11,8 +11,8 @@ export default function (scoresService) { .json( await scoresService.deleteScoreForQuestion( req.body, - req.params.question - ) + req.params.question, + ), ); } diff --git a/src/db/api/api-v1/paths/scores/{semester}/{dimension}.js b/src/db/api/api-v1/paths/scores/{semester}/{dimension}.js index 8d613d0e3e824eb500d8cccfef3fe3ff4785a9d3..5ac3fda93ad838917265f4ad3320d103c70f6139 100644 --- a/src/db/api/api-v1/paths/scores/{semester}/{dimension}.js +++ b/src/db/api/api-v1/paths/scores/{semester}/{dimension}.js @@ -9,8 +9,8 @@ export default function (scoresService) { .json( await scoresService.getAllScoresForDimensionInSemester( req.params.semester, - req.params.dimension - ) + req.params.dimension, + ), ); } diff --git a/src/db/api/api-v1/paths/scores/{semester}/{dimension}/{institute}.js b/src/db/api/api-v1/paths/scores/{semester}/{dimension}/{institute}.js index 775898b7a51896b49d798a2b13ea485434040bda..794dc8326ce661a54eb8311d15657dbaedcba4ab 100644 --- a/src/db/api/api-v1/paths/scores/{semester}/{dimension}/{institute}.js +++ b/src/db/api/api-v1/paths/scores/{semester}/{dimension}/{institute}.js @@ -10,8 +10,8 @@ export default function (scoresService) { await scoresService.getAllScoresForDimensionInInstitute( req.params.semester, req.params.dimension, - req.params.institute - ) + req.params.institute, + ), ); } diff --git a/src/db/api/api-v1/paths/{institute}/{semester}/score.js b/src/db/api/api-v1/paths/{institute}/{semester}/score.js index 1484b9de5746876374440714aaa33feb78ca23a8..1d8eedf789b0f820277bdb0ce6c600cee8e63c03 100644 --- a/src/db/api/api-v1/paths/{institute}/{semester}/score.js +++ b/src/db/api/api-v1/paths/{institute}/{semester}/score.js @@ -9,8 +9,8 @@ export default function (scoresService) { .json( await scoresService.getAllScoresForInstituteFromSemester( req.params.institute, - req.params.semester - ) + req.params.semester, + ), ); } diff --git a/src/db/api/api-v1/services/dimensionsService.js b/src/db/api/api-v1/services/dimensionsService.js index c32a7980d3e8ce3127495d1df3cd8eebb9766b81..eae1169afd5ed5bee6c681f5644c7c337460d373 100644 --- a/src/db/api/api-v1/services/dimensionsService.js +++ b/src/db/api/api-v1/services/dimensionsService.js @@ -1,4 +1,7 @@ -import { getQuestionsWithDimension, getDimensionForQuestion } from "../../postgres.js"; +import { + getQuestionsWithDimension, + getDimensionForQuestion, +} from "../../postgres.js"; const dimensions = [ { @@ -51,8 +54,8 @@ const dimensionsService = { }, async getDimensionForQuestion(question) { let res = await getDimensionForQuestion(question); - return {id: res.id} - } + return { id: res.id }; + }, }; export default dimensionsService; diff --git a/src/db/api/api-v1/services/scoresService.js b/src/db/api/api-v1/services/scoresService.js index 504ba0f80e3894361a095ef14fb0257aec871322..94bf9b0733cfacbb471c69477dd71ce7a2b98390 100644 --- a/src/db/api/api-v1/services/scoresService.js +++ b/src/db/api/api-v1/services/scoresService.js @@ -35,7 +35,7 @@ const scoresService = { return await getAllScoresForDimensionInInstitute( semester, dimension, - institute + institute, ); }, async postScores(scores, force = false) { @@ -52,7 +52,7 @@ const scoresService = { score.semester, score.institute, score.answers, - score.participants + score.participants, ); for (let subpoints of score.scores) { diff --git a/src/db/api/index.js b/src/db/api/index.js index 89197b73a05a11aedd4279a30e2442b1e73314c9..965eff52dea5fb8780bc895555abe8e34093e3c1 100644 --- a/src/db/api/index.js +++ b/src/db/api/index.js @@ -12,7 +12,6 @@ import v1DimensionsService from "./api-v1/services/dimensionsService.js"; import v1ScoresService from "./api-v1/services/scoresService.js"; //TODO: examples? -//TODO: Documentation const app = express(); const port = 3000; @@ -41,7 +40,7 @@ app.use( swaggerOptions: { url: "/v1/api-docs", }, - }) + }), ); app.get("/", (_req, res) => { diff --git a/src/db/api/package-lock.json b/src/db/api/package-lock.json index 2bfa19e4faf6a2fdf62ef752c5c2967335095b35..8585d7e54bbcdd74552629c348c803eab78ad131 100644 --- a/src/db/api/package-lock.json +++ b/src/db/api/package-lock.json @@ -14,6 +14,7 @@ "express-openapi": "^12.1.3", "pg": "^8.11.3", "pg-format": "^1.0.4", + "prettier": "^3.2.5", "swagger-ui-express": "^5.0.0" } }, @@ -1064,6 +1065,20 @@ "node": ">=0.10.0" } }, + "node_modules/prettier": { + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.2.5.tgz", + "integrity": "sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/proxy-addr": { "version": "2.0.7", "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", diff --git a/src/db/api/package.json b/src/db/api/package.json index e96b6b6d8ea32b18385dbb91221e30569eabeca8..75a6874620e4bda596e387543f6f563579fbfbb1 100644 --- a/src/db/api/package.json +++ b/src/db/api/package.json @@ -15,6 +15,7 @@ "express-openapi": "^12.1.3", "pg": "^8.11.3", "pg-format": "^1.0.4", + "prettier": "^3.2.5", "swagger-ui-express": "^5.0.0" } } diff --git a/src/db/api/postgres.js b/src/db/api/postgres.js index 6b7dcbd84fd4c74a1d44e4e1b60c282ccf16aa05..35b2c4d8daf761c86808000ab5e0038534f31652 100644 --- a/src/db/api/postgres.js +++ b/src/db/api/postgres.js @@ -20,7 +20,7 @@ export async function getAll(table) { export async function getQuestions(courseId) { const res = await client.query( 'SELECT id, content, scale, dimension FROM question INNER JOIN (SELECT question FROM "question-belongs-to" WHERE course = $1) ON question.id = question', - [courseId] + [courseId], ); return res.rows; } @@ -28,7 +28,7 @@ export async function getQuestions(courseId) { export async function getAnswers(scale) { const res = await client.query( 'SELECT id, content, value FROM answer INNER JOIN (SELECT answer FROM "answer-belongs-to" WHERE scale = $1) ON answer.id = answer', - [scale] + [scale], ); return res.rows; } @@ -38,7 +38,7 @@ export async function getQuestionsWithDimension(dimensionId) { if (["A", "B", "C"].includes(dimensionId)) { res = await client.query( "SELECT * FROM question WHERE dimension LIKE $1 || '%'", - [dimensionId] + [dimensionId], ); } else { res = await client.query("SELECT * FROM question WHERE dimension = $1", [ @@ -60,12 +60,13 @@ export async function getScaleForQuestion(questionId) { } export async function getDimensionForQuestion(questionId) { - let res = await client.query("SELECT dimension FROM question WHERE id = $1", [questionId]); - + let res = await client.query("SELECT dimension FROM question WHERE id = $1", [ + questionId, + ]); + if (res.rows.length > 0) { - return { id: res.rows[0].dimension } - } - else { + return { id: res.rows[0].dimension }; + } else { return null; } } @@ -109,7 +110,7 @@ export async function getCourse(id) { console.log(id); let res = await client.query( "SELECT number, name, type, lecturer, semester FROM course WHERE id = $1", - [id] + [id], ); return res.rows; @@ -117,7 +118,7 @@ export async function getCourse(id) { export async function getAllScores() { let res = await client.query( - "SELECT * FROM course INNER JOIN (SELECT * FROM score) on id = course" + "SELECT * FROM course INNER JOIN (SELECT * FROM score) on id = course", ); if (res.rows.length == 0) { return []; @@ -134,18 +135,27 @@ export async function createCourseIfNotExists( semester, institute, answers, - participants + participants, ) { // Exists check let course = await client.query( "SELECT id FROM course where number = $1 AND name = $2 AND type = $3 AND lecturer = $4 and semester = $5 AND institute = $6", - [number, name, type, lecturer, semester, institute] + [number, name, type, lecturer, semester, institute], ); if (course.rows.length == 0) { let res = await client.query( "INSERT INTO course(number, name, type, lecturer, semester, institute, answers, participants) VALUES($1, $2, $3, $4, $5, $6, $7, $8) RETURNING id", - [number, name, type, lecturer, semester, institute, answers, participants] + [ + number, + name, + type, + lecturer, + semester, + institute, + answers, + participants, + ], ); return res.rows[0].id; } else { @@ -169,19 +179,19 @@ export async function insertScores(scores, force = false) { await client.query( format( "INSERT INTO score VALUES %L ON CONFLICT (course, question) DO UPDATE SET course=EXCLUDED.course, question=EXCLUDED.question, score=EXCLUDED.score", - scores - ) + scores, + ), ); } export async function getAllScoresForDimensionInInstitute( semester, dimension, - institute + institute, ) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score INNER JOIN (SELECT * FROM question) ON question=id) ON course.id = course WHERE semester=$1 AND institute=$2 AND dimension LIKE $3 || '%'", - [semester, institute, dimension] + [semester, institute, dimension], ); return generateScoreStructure(res.rows); } @@ -189,7 +199,7 @@ export async function getAllScoresForDimensionInInstitute( export async function getAllScoresForDimensionInSemester(semester, dimension) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score INNER JOIN (SELECT * FROM question) ON question=id) ON course.id = course WHERE semester=$1 AND dimension LIKE $2 || '%'", - [semester, dimension] + [semester, dimension], ); return generateScoreStructure(res.rows); } @@ -197,7 +207,7 @@ export async function getAllScoresForDimensionInSemester(semester, dimension) { export async function getAllScoresForDimension(dimension) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score INNER JOIN (SELECT id as questionId, dimension FROM question) ON question=questionId) ON course.id = course WHERE dimension LIKE $1 || '%'", - [dimension] + [dimension], ); return generateScoreStructure(res.rows); } @@ -205,7 +215,7 @@ export async function getAllScoresForDimension(dimension) { export async function getAllScoresForSemester(semester) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score) on id = course WHERE semester = $1", - [semester] + [semester], ); return generateScoreStructure(res.rows); } @@ -213,18 +223,18 @@ export async function getAllScoresForSemester(semester) { export async function getAllScoresForInstitute(institute) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score) on id = course WHERE institute = $1", - [institute] + [institute], ); return generateScoreStructure(res.rows); } export async function getAllScoresForInstituteFromSemester( institute, - semester + semester, ) { let res = await client.query( "SELECT * FROM course INNER JOIN (SELECT * FROM score) on id = course WHERE institute = $1 AND semester = $2", - [institute, semester] + [institute, semester], ); return generateScoreStructure(res.rows); } @@ -244,7 +254,7 @@ export async function deleteScoreForQuestion(courses, question) { course.course_type, course.lecturer, course.semester, - ] + ], ); } } diff --git a/src/lib/api.py b/src/lib/api.py index 113feff2d4674d17352fc63b21e6a48d370afa10..8c07c0344f197c973049c801cba1134989827c28 100644 --- a/src/lib/api.py +++ b/src/lib/api.py @@ -1,173 +1,198 @@ import requests + BASE_URL = "http://localhost:50000" + def request_questions() -> list[dict]: - """Requests all questions that exist in the question API. - Calls the `/questions` Endpoint - - Returns - ------- - list[dict]: Array of questions, where the dict represents the question. - Contains `id` (uuid), `content` (string), `scale` (string), `dimension` (string or null) - """ - res = requests.get("{0}/v1/questions".format(BASE_URL)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """Requests all questions that exist in the question API. + Calls the `/questions` Endpoint + + Returns + ------- + list[dict]: Array of questions, where the dict represents the question. + Contains `id` (uuid), `content` (string), `scale` (string), `dimension` (string or null) + """ + res = requests.get("{0}/v1/questions".format(BASE_URL)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_questions_for_dimension(dimension: str) -> list[dict]: - """Requests all questions that belong to the specified dimension in the question API - Calls the `/dimensions/questions/{dimension}` Endpoint - - Parameters - ---------- - dimension: str - Represents dimension. Can be `A`, `B`, `C` or any of these three in combination with `1` or `2` - - Returns - ------- - list[dict]: Array of questions, where the dict represents the question. - Contains `id` (uuid), `content` (string), `scale` (string), `dimension` (string or null) - """ - res = requests.get("{0}/v1/dimensions/questions/{1}".format(BASE_URL, dimension)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """Requests all questions that belong to the specified dimension in the question API + Calls the `/dimensions/questions/{dimension}` Endpoint + + Parameters + ---------- + dimension: str + Represents dimension. Can be `A`, `B`, `C` or any of these three in combination with `1` or `2` + + Returns + ------- + list[dict]: Array of questions, where the dict represents the question. + Contains `id` (uuid), `content` (string), `scale` (string), `dimension` (string or null) + """ + res = requests.get( + "{0}/v1/dimensions/questions/{1}".format(BASE_URL, dimension)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_scales() -> list[dict]: - """Requests all scales in the question API - Calls the `/scales` Endpoint - - Returns - ------- - list[dict]: Array of objects, where the dict represents the scales. - Contains `id` (string) - """ - res = requests.get("{0}/v1/scales".format(BASE_URL)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """Requests all scales in the question API + Calls the `/scales` Endpoint + + Returns + ------- + list[dict]: Array of objects, where the dict represents the scales. + Contains `id` (string) + """ + res = requests.get("{0}/v1/scales".format(BASE_URL)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_answers_for(scale: str) -> list[dict]: - """ Requests answers for the provided scale in the question API - Calls the `/answers/{scale}` Endpoint - - Parameters - ---------- - scale: str - The scale you want the answers for - - Returns - ------- - list[dict]: Array of objects, where the dict represents the answers. - Contains `id` (string), `content` (string), `value` (integer) - """ - res = requests.get("{0}/v1/answers/{1}".format(BASE_URL, scale)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """ Requests answers for the provided scale in the question API + Calls the `/answers/{scale}` Endpoint + + Parameters + ---------- + scale: str + The scale you want the answers for + + Returns + ------- + list[dict]: Array of objects, where the dict represents the answers. + Contains `id` (string), `content` (string), `value` (integer) + """ + res = requests.get("{0}/v1/answers/{1}".format(BASE_URL, scale)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_scale_for(question: str) -> dict: - """ Requests scale for the provided question in question API - Calls the `/scales/{question}` Endpoint - - Parameters - ---------- - question: str - The question you want the scale for. Needs to be provided as an UUID - - Returns - ------- - dict: Object representing the scale - Contains `id` (string) - """ - res = requests.get("{0}/v1/scales/{1}".format(BASE_URL, question)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """ Requests scale for the provided question in question API + Calls the `/scales/{question}` Endpoint + + Parameters + ---------- + question: str + The question you want the scale for. Needs to be provided as an UUID + + Returns + ------- + dict: Object representing the scale + Contains `id` (string) + """ + res = requests.get("{0}/v1/scales/{1}".format(BASE_URL, question)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_dimension_description() -> list[dict]: - """Requests the dimension objects from the question API - Calls the `/dimensions` Endpoint + """Requests the dimension objects from the question API + Calls the `/dimensions` Endpoint - Returns - ------- - list[dict]: A list with objects, where the dict represents the dimension - Contains `id` (string), `description` (object) containing `en` (string, english description) and `de` (string, german description) - """ - res = requests.get("{0}/v1/dimensions".format(BASE_URL)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") + Returns + ------- + list[dict]: A list with objects, where the dict represents the dimension + Contains `id` (string), `description` (object) containing `en` (string, english description) and `de` (string, german description) + """ + res = requests.get("{0}/v1/dimensions".format(BASE_URL)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + return res.json() -def get_dimension_for(question: str) -> dict: - """Requests the dimension for the provided question from the question API - Calls the `/questions/{question}/dimension` Endpoint - Parameters - ---------- - question: str - Id of the questions +def get_dimension_for(question: str) -> dict: + """Requests the dimension for the provided question from the question API + Calls the `/questions/{question}/dimension` Endpoint + + Parameters + ---------- + question: str + Id of the questions + + Returns + -------- + dict: Dictionary containing the id of the dimension or None + """ + res = requests.get( + "{0}/v1/questions/{1}/dimension".format(BASE_URL, question)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + + return res.json() - Returns - -------- - dict: Dictionary containing the id of the dimension or None - """ - res = requests.get("{0}/v1/questions/{1}/dimension".format(BASE_URL, question)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") +# Results - return res.json() -# Results def force_courses(courses): - """Inserts the provided courses with their scores into the score API. Overrides existing data - Calls the `/scores` Endpoint - - Parameters - ---------- - courses: list[dict] - List of objects where each object represents a course with it's scores. - """ - res = requests.put("{0}/v1/scores".format(BASE_URL), json=courses) - - # If requests are too large, split them - if res.status_code == 413: - head = courses[len(courses) // 2:] - tail = courses[:len(courses) // 2] - force_courses(head) - force_courses(tail) - elif res.status_code != 201: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") + """Inserts the provided courses with their scores into the score API. Overrides existing data + Calls the `/scores` Endpoint + + Parameters + ---------- + courses: list[dict] + List of objects where each object represents a course with it's scores. + """ + res = requests.put("{0}/v1/scores".format(BASE_URL), json=courses) + + # If requests are too large, split them + if res.status_code == 413: + head = courses[len(courses) // 2:] + tail = courses[:len(courses) // 2] + force_courses(head) + force_courses(tail) + elif res.status_code != 201: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + def get_all_scores() -> list[dict]: - """Returns all scores that exist in the scores API - Calls the `/scores` Endpoint - - Returns - ------- - list[dict]: A list of courses with their score for questions - """ - res = requests.get("{0}/v1/scores".format(BASE_URL)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() + """Returns all scores that exist in the scores API + Calls the `/scores` Endpoint + + Returns + ------- + list[dict]: A list of courses with their score for questions + """ + res = requests.get("{0}/v1/scores".format(BASE_URL)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() + def get_scores_for_dimension(dimension: str) -> list[dict]: - """Returns all scores that exist in the scores API for the specified dimension - Calls the `/scores/dimension/{dimension}` Endpoint - - Parameters - ---------- - dimension: str - The desired dimension. Can be either `A`, `B`, `C` or any of the three in combination with `1` or `2` - - Returns - ------- - list[dict]: A list representing the courses with their scores - """ - res = requests.get("{0}/v1/scores/dimensions/{1}".format(BASE_URL, dimension)) - if res.status_code != 200: - raise requests.RequestException(f"Request was not fullfilled successfully. Server responded with {res.status_code}") - return res.json() \ No newline at end of file + """Returns all scores that exist in the scores API for the specified dimension + Calls the `/scores/dimension/{dimension}` Endpoint + + Parameters + ---------- + dimension: str + The desired dimension. Can be either `A`, `B`, `C` or any of the three in combination with `1` or `2` + + Returns + ------- + list[dict]: A list representing the courses with their scores + """ + res = requests.get( + "{0}/v1/scores/dimensions/{1}".format(BASE_URL, dimension)) + if res.status_code != 200: + raise requests.RequestException( + f"Request was not fullfilled successfully. Server responded with {res.status_code}") + return res.json() diff --git a/src/lib/constants.py b/src/lib/constants.py index e77b95b5ae6ae7171cdb6f4fcf074139dc4f74cc..6e3c7f09534dfefc6babb772469b5d5753f390a5 100644 --- a/src/lib/constants.py +++ b/src/lib/constants.py @@ -1,15 +1,16 @@ -# This file keeps commonly used constats throughout the project +# This file keeps commonly used constats throughout the project import os RAW_DATA_PATH = os.path.normpath(os.path.join(os.getcwd(), "..", "raw_data")) -CLEAN_DATA_PATH = os.path.normpath(os.path.join(os.getcwd(), "..", "clean_data")) +CLEAN_DATA_PATH = os.path.normpath( + os.path.join(os.getcwd(), "..", "clean_data")) COMPUTER_SCIENCE_VALUES = os.path.join(CLEAN_DATA_PATH, "Informatik") BIOINFORMATICS_VALUES = os.path.join(CLEAN_DATA_PATH, "Bioinformatik") MATHEMATICS_VALUES = os.path.join(CLEAN_DATA_PATH, "Mathematik") OUT_PATH = os.path.normpath(os.path.join(os.getcwd(), '..', "outputs")) INSTITUTE_MAP = { - "Bioinformatik": "bio", - "Mathematik": "math", - "Informatik": "cs" -} \ No newline at end of file + "Bioinformatik": "bio", + "Mathematik": "math", + "Informatik": "cs" +} diff --git a/src/old-report.ipynb b/src/old-report.ipynb index 53baa32317dc5a5e9897200a96d64578b210c3b9..ff4b5177acb0dd31ac60673a894f9503a0eeeb05 100644 --- a/src/old-report.ipynb +++ b/src/old-report.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Old Pipeline\n", + "This file contains the steps to generate the old report, in order to compare results.\n", + "\n", + "> To select a semester, simply replace `SEMESTER` on line 7" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -12,7 +22,7 @@ "\n", "# Add semester here in the form of \"Sommersemester YYYY\" or \"Wintersemester YYYY/YY\"\n", "# e.g.: Sommersemester 2023 or Wintersemester 2022/23\n", - "SEMESTER = \"Sommersemester 2023\"\n", + "SEMESTER = \"Sommersemester 2021\"\n", "\n", "# Generates Semester name based on set constant\n", "SEMESTER_NAME = SEMESTER[0:2] + \"Se\"\n", @@ -241,7 +251,20 @@ "cell_type": "code", "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/alex/.pyenv/versions/3.9.15/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:1215: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/alex/.pyenv/versions/3.9.15/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:1215: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/alex/.pyenv/versions/3.9.15/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:1215: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n" + ] + } + ], "source": [ "median_stud = {\n", "\t\"Vorlesung\": {\n", @@ -298,7 +321,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Result generation" + "### Result generation\n", + "Combines all results into one DataFrame" ] }, { diff --git a/src/pipeline.ipynb b/src/pipeline.ipynb index 5050b045b58da1ced5b9a45918724ecbf8a20b82..cfb6c2f37d5e65aed693ccabce0e63f64e4ba70e 100644 --- a/src/pipeline.ipynb +++ b/src/pipeline.ipynb @@ -14,7 +14,7 @@ "metadata": {}, "outputs": [], "source": [ - "import os, pandas, math,json\n", + "import os, pandas, math\n", "import regex as re\n", "import lib.constants as CONSTANTS\n", "import lib.api as api\n", @@ -35,7 +35,9 @@ "\t\tif len(files) > 0 and 'all.csv' in files:\n", "\t\t\tfile_path = os.path.join(root, \"all.csv\")\n", "\t\t\tdf = pandas.read_csv(file_path)\n", - "\t\t\tyield root.split(\"/\")[-2:] + [df]\n", + "\t\t\t# normpath and os.sep provide cross platform functionality\n", + "\t\t\troot = os.path.normpath(root)\n", + "\t\t\tyield root.split(os.sep)[-2:] + [df]\n", "\n", "def generate_scales():\n", "\tresult = {}\n", @@ -75,12 +77,16 @@ "def calc_score(series: pandas.Series) -> float:\n", "\n", "\tif uuid_regex.match(series.name) != None:\n", - "\t\t# TODO: Negate that one question\n", "\t\tif series.name not in question_cache:\n", "\t\t\tquestion_cache[series.name] = intervals[api.get_scale_for(series.name)[\"id\"]]\n", "\n", - "\n", "\t\tfactor = question_cache[series.name]\n", + "\n", + "\t\t# A single questions exists that has a negated scale\n", + "\t\tif series.name == \"fb994be2-e7c6-11ee-9404-00620b2c9060\":\n", + "\t\t\t# Reverse factor\n", + "\t\t\tfactor = factor[::-1]\n", + "\n", "\t\t# Catches weightless questions\n", "\t\tif len([x for x in factor if x != 0]) == 0:\n", "\t\t\treturn 0\n", @@ -141,6 +147,14 @@ "\tresult_frames[\"{0}_{1}\".format(institute,semester)] = res_df" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insert into database\n", + "Formats the dataframe data into a schema, that can be interpreted by the API and \"forces\" (overwriting duplicates) them into it " + ] + }, { "cell_type": "code", "execution_count": 5, @@ -192,7 +206,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -210,5 +224,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }