diff --git a/unisportomat/course_scraper/course_scraper.py b/unisportomat/quiz/course_scraper/course_scraper.py similarity index 93% rename from unisportomat/course_scraper/course_scraper.py rename to unisportomat/quiz/course_scraper/course_scraper.py index 4c4a7103633a2b69de4ccd6f760bbe57c843e6f3..855a951c21bbf77c6e02302e14f57c2992c50ab1 100644 --- a/unisportomat/course_scraper/course_scraper.py +++ b/unisportomat/quiz/course_scraper/course_scraper.py @@ -5,6 +5,7 @@ for http://www.buchsys.de for SWP UniSport-O-Mat. import requests from bs4 import BeautifulSoup +from collections import OrderedDict def fetch_website(url): @@ -27,14 +28,14 @@ def fetch_website(url): raise -def scraping(site=None) -> dict: +def scraping(site=None) -> OrderedDict: """ Returns a dictionary of the form {name: link}, containing the scraped content of https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, unless another URL is given as an argument. """ - courses = {} + courses = OrderedDict() if site is None: site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/" diff --git a/unisportomat/course_scraper/test_course_scraper.py b/unisportomat/quiz/course_scraper/test_course_scraper.py similarity index 85% rename from unisportomat/course_scraper/test_course_scraper.py rename to unisportomat/quiz/course_scraper/test_course_scraper.py index 62909ec52d70d6e7cee664b28aaf2eedd181253f..b48a3f437b6e084b44687ad95a13ab848a7aa542 100644 --- a/unisportomat/course_scraper/test_course_scraper.py +++ b/unisportomat/quiz/course_scraper/test_course_scraper.py @@ -3,6 +3,7 @@ Testing module, yo. Just for the course_scraper.py. """ from django.test import TestCase from course_scraper import scraping # , fetch_website +from collections import OrderedDict class ScraperTestCase(TestCase): @@ -15,7 +16,7 @@ class ScraperTestCase(TestCase): """ Testing return type of scraping(). """ - self.assertIsInstance(scraping(), dict) + self.assertIsInstance(scraping(), OrderedDict) def test_dict_not_empty(self): """ diff --git a/unisportomat/quiz/models.py b/unisportomat/quiz/models.py index a1ae9d0a6e2bdb50207f811b9fdf5d0c14b2f1d3..4dc897fe18a8d06cf990c5fb15643d083b808c53 100644 --- a/unisportomat/quiz/models.py +++ b/unisportomat/quiz/models.py @@ -42,7 +42,7 @@ class SportManager(models.Manager): def create_sport(self, **kwargs): """ - Creates new Sport Object and every CriterionRating for it + Creates new sport object and every CriterionRating for it """ sport = self.create(**kwargs) sport.currently_active = True @@ -56,10 +56,10 @@ class SportManager(models.Manager): class Sport(models.Model): """ - Defines a Sport with name, url that leads to the booking page. + Defines a sport with name, url that leads to the booking page. A sport includes ratings for all criterions. (e.g. How much it corresponds to the criterion "Martial Arts") - TODO: last_used may be changed in the future to better work with other functionalities, eg Statistics. + TODO: last_used may be changed in the future to better work with other functionalities, eg. statistics. """ name = models.TextField() @@ -67,7 +67,7 @@ class Sport(models.Model): criteria_ratings = models.ManyToManyField("Criterion", through="CriterionRating") # The Date Field last_used is set to now-time everytime a sport is activated - # either through manual activation or activation through the scraper + # Either through manual activation or activation through the scraper last_used = models.DateField(default=timezone.localdate) # Boolean currently_active states whether the sport is in the archive or not diff --git a/unisportomat/quiz/serializers.py b/unisportomat/quiz/serializers.py index 7ff977894fb8ce4c0a50967f40b55a351648d329..8f4a71dcb43173ff09c9fbeef3e5358017919533 100644 --- a/unisportomat/quiz/serializers.py +++ b/unisportomat/quiz/serializers.py @@ -206,6 +206,32 @@ class CriteriaSerializer(serializers.BaseSerializer): return criteria_list +class ArchiveSerializer(serializers.BaseSerializer): + """ + Serializes Sports in an archive format + """ + + def to_representation(self, sport_list): + """ + Takes a list of Sport Objects and returns id, name, last_used, url serialized + """ + + json_sport_list = [] + + for sport in sport_list: + + json_sport_list.append( + { + "id": sport.pk, + "name": sport.name, + "last_used": sport.last_used, + "url": sport.url, + } + ) + + return json_sport_list + + class GreetingEndSerializer(serializers.BaseSerializer): """ Serializer for GreetingText and EndText diff --git a/unisportomat/quiz/tests.py b/unisportomat/quiz/tests.py index e6eef13f33c3ccd6078f35f493ccd923652d006c..60f3af7b4786d4e047403134a00428979e90d701 100644 --- a/unisportomat/quiz/tests.py +++ b/unisportomat/quiz/tests.py @@ -536,7 +536,7 @@ class APITest(APITestCase): def test_currently_active(self): """ - Tests if PATCHing the "currently_active" value to false correctly changes the sport + Tests if PATCHing the 'currently_active' value to false correctly changes the sport """ # Set Up Values @@ -550,6 +550,57 @@ class APITest(APITestCase): self.assertEqual(response.data["currently_active"], False) + def test_sport_archive(self): + """ + Tests whether a sport is correctly displayed in the archive + """ + + # Get initial State of Archive + response = self.client.get(reverse("archive")) + + self.assertEqual(response.data["results"], []) + + # Change State of Sport + response = self.client.patch( + reverse("small-sport-list-detail", kwargs={"pk": 1}), + data={"currently_active": False}, + format="json", + ) + + # Check whether the Archive changed + response = self.client.get(reverse("archive")) + + self.assertEqual(response.data["results"][0]["id"], 1) + self.assertEqual(response.data["results"][0]["name"], "Jiu Jitsu") + + def test_sport_scraper(self): + """ + Tests what the SportScrapingAPI returns + """ + + # Get small-sport-list + sport_response = self.client.get(reverse("small-sport-list-list")) + self.assertEqual(len(sport_response.data["results"]), 1) + + # Get Diff List + response = self.client.get(reverse("scraper")) + + # Check for Length + # At the moment of the Test, 121 Sports are in buchsys + # (Might Break in new Semester with new Sports!) + self.assertEqual(len(response.data["results"]), 122) + + # Send Diff back to Server + response = self.client.post( + reverse("scraper"), response.data["results"], format="json" + ) + + sport_response = self.client.get(reverse("small-sport-list-list")) + self.assertEqual(len(sport_response.data["results"]), 121) + + sport_response = self.client.get(reverse("archive")) + self.assertEqual(len(sport_response.data["results"]), 1) + def test_greeting_view(self): """ Test whether the greeting behaves correctly diff --git a/unisportomat/quiz/views.py b/unisportomat/quiz/views.py index 0098ce623019185823fdc4b327cb153c036d6c65..79072a26f06b7f645bac192a81239aca8b95a534 100644 --- a/unisportomat/quiz/views.py +++ b/unisportomat/quiz/views.py @@ -2,7 +2,7 @@ Defines the views for the API """ -# from django.shortcuts import render +import copy from rest_framework import viewsets from rest_framework.views import APIView from rest_framework.response import Response @@ -10,6 +10,8 @@ from django.shortcuts import get_object_or_404 from django.http import HttpResponse from .pagination import PageNumberWithPageSizePagination +from .course_scraper.course_scraper import scraping + from .serializers import ( SmallSportListSerializer, SportListSerializer, @@ -18,6 +20,7 @@ from .serializers import ( SingleSportSerializer, CriteriaSerializer, IncompleteSportSerializer, + ArchiveSerializer, GreetingEndSerializer, ) from .models import Sport, Criterion, Question, GreetingText, EndText @@ -284,6 +287,178 @@ class CriteriaView(APIView): return Response(response.data) +class SportArchiveView(APIView): + """ + View for the list of all archived sports (so all sports with currently_active = False) + """ + + def get(self, request): + """ + GET for api/admin/archive/ + """ + + paginator = PageNumberWithPageSizePagination() + + archived_sports = paginator.paginate_queryset( + Sport.objects.filter(currently_active=False).order_by("name"), request + ) + + response = ArchiveSerializer(archived_sports) + + return paginator.get_paginated_response(response.data) + + +class ScraperView(APIView): + """ + View for the scraper, including GET and POST + """ + + def get(self, request): # pylint: disable=too-many-locals + """ + Scrapes the sports currently on the default website, + and sends a diff of the current sports and the scraped ones + """ + + # Scrape sports from their website + scraped_sports = scraping() + + # Iterate through DB Sport entries to see which ones are in the new Sport list etc + diff_list = [] + + id_counter = 1 + + empty_diff_dict = { + "id": -1, + "kind_of_diff": "", + "old_sport": { + "id": -1, + "name": "", + "url": "", + "last_used": "", + }, + "new_sport": { + "name": "", + "url": "", + }, + } + + for old_sport in Sport.objects.filter(currently_active=True): + + diff_dict = copy.deepcopy(empty_diff_dict) + + diff_dict["id"] = id_counter + + diff_dict["old_sport"]["id"] = old_sport.pk + diff_dict["old_sport"]["name"] = old_sport.name + diff_dict["old_sport"]["url"] = old_sport.url + diff_dict["old_sport"]["last_used"] = old_sport.last_used + + if old_sport.name in scraped_sports.keys(): + # A Sport currently active is also found in the new scraped sports + # -> "same" Sport + + diff_dict["kind_of_diff"] = "same" + + new_sport_url = scraped_sports.pop(old_sport.name) + + diff_dict["new_sport"]["name"] = old_sport.name + diff_dict["new_sport"]["url"] = new_sport_url + + else: + # A Sport currently active is _not_ found in the new scraped sports + # -> "to_be_archived" Sport + + diff_dict["kind_of_diff"] = "to_be_archived" + + diff_list.append(diff_dict) + id_counter += 1 + + for new_sport_name, new_sport_url in scraped_sports.items(): + + # The query should only contain zero values, or one value. + # The name is technically not primary key candidate, but they shouldn't be included more than once + query = Sport.objects.filter(name=new_sport_name) + + diff_dict = copy.deepcopy(empty_diff_dict) + + diff_dict["id"] = id_counter + + diff_dict["new_sport"]["name"] = new_sport_name + diff_dict["new_sport"]["url"] = new_sport_url + + if query.count() == 0: + # The new Sport is not found in the Archive, so it is completely new + # -> "new" Sport + + diff_dict["kind_of_diff"] = "new" + + else: + # The new Sport is in the Archive, so it needs to be resurrected + # -> "from_archive" Sport + + old_sport = query.get(name=new_sport_name) + + diff_dict["kind_of_diff"] = "from_archive" + + diff_dict["old_sport"]["id"] = old_sport.pk + diff_dict["old_sport"]["name"] = old_sport.name + diff_dict["old_sport"]["url"] = old_sport.url + diff_dict["old_sport"]["last_used"] = old_sport.last_used + + diff_list.append(diff_dict) + id_counter += 1 + + # Paginate it all! + + paginator = PageNumberWithPageSizePagination() + paginated_list = paginator.paginate_queryset(diff_list, request) + + return paginator.get_paginated_response(paginated_list) + + def post(self, request): + """ + Gets list of diffs from Frontend and writes them into the database + """ + + diff_data = request.data + + for diff in diff_data: + + if diff["kind_of_diff"] == "new": + + sport = Sport.objects.create_sport() + sport.name = diff["new_sport"]["name"] + sport.url = diff["new_sport"]["url"] + + elif diff["kind_of_diff"] == "same": + # In case of URL changes, "same" updates the URL + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + + sport.url = diff["new_sport"]["url"] + + # To update last_used + sport.reactivate() + + elif diff["kind_of_diff"] == "to_be_archived": + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + sport.currently_active = False + + else: + # -> from_archive + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + sport.reactivate() + + # Same as in "same", URL could be different + sport.url = diff["new_sport"]["url"] + + sport.save() + + return Response(status=200) + + class GreetingEndView(APIView): """ View for handling the beginning sentence diff --git a/unisportomat/unisportomat/settings.py b/unisportomat/unisportomat/settings.py index e41e8870fef01befcb256039eca5ed9ace7164a1..b2d243ebf8673eac54e2635e40207fa0b74a99ea 100644 --- a/unisportomat/unisportomat/settings.py +++ b/unisportomat/unisportomat/settings.py @@ -60,7 +60,8 @@ MIDDLEWARE = [ # SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework REST_FRAMEWORK = { "DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination", - "PAGE_SIZE": 10, + # Page size can be chosen by Frontend, so we set it high internally to streamline testing + "PAGE_SIZE": 1000, } ROOT_URLCONF = "unisportomat.urls" diff --git a/unisportomat/unisportomat/urls.py b/unisportomat/unisportomat/urls.py index b83056ca42a46937eb4fdb2739d54ba30def99d7..b054d3f60dc8d0a4e089741ed245cc78e4398766 100644 --- a/unisportomat/unisportomat/urls.py +++ b/unisportomat/unisportomat/urls.py @@ -33,6 +33,8 @@ urlpatterns = [ name="incomplete", ), path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"), + path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"), + path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"), path("api/admin/greeting/", views.GreetingView.as_view(), name="greeting"), path("api/admin/end/", views.EndView.as_view(), name="end"), ]